CoCalc -- PPCISelLowering.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
¹⁰⁴¹⁸⁶ views
1
//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the PPCISelLowering class.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "PPCISelLowering.h"
14
#include "MCTargetDesc/PPCMCTargetDesc.h"
15
#include "MCTargetDesc/PPCPredicates.h"
16
#include "PPC.h"
17
#include "PPCCCState.h"
18
#include "PPCCallingConv.h"
19
#include "PPCFrameLowering.h"
20
#include "PPCInstrInfo.h"
21
#include "PPCMachineFunctionInfo.h"
22
#include "PPCPerfectShuffle.h"
23
#include "PPCRegisterInfo.h"
24
#include "PPCSubtarget.h"
25
#include "PPCTargetMachine.h"
26
#include "llvm/ADT/APFloat.h"
27
#include "llvm/ADT/APInt.h"
28
#include "llvm/ADT/APSInt.h"
29
#include "llvm/ADT/ArrayRef.h"
30
#include "llvm/ADT/DenseMap.h"
31
#include "llvm/ADT/STLExtras.h"
32
#include "llvm/ADT/SmallPtrSet.h"
33
#include "llvm/ADT/SmallSet.h"
34
#include "llvm/ADT/SmallVector.h"
35
#include "llvm/ADT/Statistic.h"
36
#include "llvm/ADT/StringRef.h"
37
#include "llvm/ADT/StringSwitch.h"
38
#include "llvm/CodeGen/CallingConvLower.h"
39
#include "llvm/CodeGen/ISDOpcodes.h"
40
#include "llvm/CodeGen/MachineBasicBlock.h"
41
#include "llvm/CodeGen/MachineFrameInfo.h"
42
#include "llvm/CodeGen/MachineFunction.h"
43
#include "llvm/CodeGen/MachineInstr.h"
44
#include "llvm/CodeGen/MachineInstrBuilder.h"
45
#include "llvm/CodeGen/MachineJumpTableInfo.h"
46
#include "llvm/CodeGen/MachineLoopInfo.h"
47
#include "llvm/CodeGen/MachineMemOperand.h"
48
#include "llvm/CodeGen/MachineModuleInfo.h"
49
#include "llvm/CodeGen/MachineOperand.h"
50
#include "llvm/CodeGen/MachineRegisterInfo.h"
51
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
52
#include "llvm/CodeGen/SelectionDAG.h"
53
#include "llvm/CodeGen/SelectionDAGNodes.h"
54
#include "llvm/CodeGen/TargetInstrInfo.h"
55
#include "llvm/CodeGen/TargetLowering.h"
56
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
57
#include "llvm/CodeGen/TargetRegisterInfo.h"
58
#include "llvm/CodeGen/ValueTypes.h"
59
#include "llvm/CodeGenTypes/MachineValueType.h"
60
#include "llvm/IR/CallingConv.h"
61
#include "llvm/IR/Constant.h"
62
#include "llvm/IR/Constants.h"
63
#include "llvm/IR/DataLayout.h"
64
#include "llvm/IR/DebugLoc.h"
65
#include "llvm/IR/DerivedTypes.h"
66
#include "llvm/IR/Function.h"
67
#include "llvm/IR/GlobalValue.h"
68
#include "llvm/IR/IRBuilder.h"
69
#include "llvm/IR/Instructions.h"
70
#include "llvm/IR/Intrinsics.h"
71
#include "llvm/IR/IntrinsicsPowerPC.h"
72
#include "llvm/IR/Module.h"
73
#include "llvm/IR/Type.h"
74
#include "llvm/IR/Use.h"
75
#include "llvm/IR/Value.h"
76
#include "llvm/MC/MCContext.h"
77
#include "llvm/MC/MCExpr.h"
78
#include "llvm/MC/MCRegisterInfo.h"
79
#include "llvm/MC/MCSectionXCOFF.h"
80
#include "llvm/MC/MCSymbolXCOFF.h"
81
#include "llvm/Support/AtomicOrdering.h"
82
#include "llvm/Support/BranchProbability.h"
83
#include "llvm/Support/Casting.h"
84
#include "llvm/Support/CodeGen.h"
85
#include "llvm/Support/CommandLine.h"
86
#include "llvm/Support/Compiler.h"
87
#include "llvm/Support/Debug.h"
88
#include "llvm/Support/ErrorHandling.h"
89
#include "llvm/Support/Format.h"
90
#include "llvm/Support/KnownBits.h"
91
#include "llvm/Support/MathExtras.h"
92
#include "llvm/Support/raw_ostream.h"
93
#include "llvm/Target/TargetMachine.h"
94
#include "llvm/Target/TargetOptions.h"
95
#include <algorithm>
96
#include <cassert>
97
#include <cstdint>
98
#include <iterator>
99
#include <list>
100
#include <optional>
101
#include <utility>
102
#include <vector>
103

104
using namespace llvm;
105

106
#define DEBUG_TYPE "ppc-lowering"
107

108
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110

111
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113

114
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116

117
static cl::opt<bool> DisableSCO("disable-ppc-sco",
118
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119

120
static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121
cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122

123
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125

126
static cl::opt<bool>
127
    DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128
                          cl::desc("disable vector permute decomposition"),
129
                          cl::init(true), cl::Hidden);
130

131
cl::opt<bool> DisableAutoPairedVecSt(
132
    "disable-auto-paired-vec-st",
133
    cl::desc("disable automatically generated 32byte paired vector stores"),
134
    cl::init(true), cl::Hidden);
135

136
static cl::opt<unsigned> PPCMinimumJumpTableEntries(
137
    "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138
    cl::desc("Set minimum number of entries to use a jump table on PPC"));
139

140
static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
141
    "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142
    cl::desc("max depth when checking alias info in GatherAllAliases()"));
143

144
static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
145
    "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146
    cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147
             "function to use initial-exec"));
148

149
STATISTIC(NumTailCalls, "Number of tail calls");
150
STATISTIC(NumSiblingCalls, "Number of sibling calls");
151
STATISTIC(ShufflesHandledWithVPERM,
152
          "Number of shuffles lowered to a VPERM or XXPERM");
153
STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
154

155
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
156

157
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
158

159
static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
160

161
// A faster local-[exec|dynamic] TLS access sequence (enabled with the
162
// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163
// variables; consistent with the IBM XL compiler, we apply a max size of
164
// slightly under 32KB.
165
constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
166

167
// FIXME: Remove this once the bug has been fixed!
168
extern cl::opt<bool> ANDIGlueBug;
169

170
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
171
                                     const PPCSubtarget &STI)
172
    : TargetLowering(TM), Subtarget(STI) {
173
  // Initialize map that relates the PPC addressing modes to the computed flags
174
  // of a load/store instruction. The map is used to determine the optimal
175
  // addressing mode when selecting load and stores.
176
  initializeAddrModeMap();
177
  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178
  // arguments are at least 4/8 bytes aligned.
179
  bool isPPC64 = Subtarget.isPPC64();
180
  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
181

182
  // Set up the register classes.
183
  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184
  if (!useSoftFloat()) {
185
    if (hasSPE()) {
186
      addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187
      // EFPU2 APU only supports f32
188
      if (!Subtarget.hasEFPU2())
189
        addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190
    } else {
191
      addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192
      addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193
    }
194
  }
195

196
  // Match BITREVERSE to customized fast code sequence in the td file.
197
  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
198
  setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
199

200
  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
201
  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
202

203
  // Custom lower inline assembly to check for special registers.
204
  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
205
  setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
206

207
  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208
  for (MVT VT : MVT::integer_valuetypes()) {
209
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
210
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
211
  }
212

213
  if (Subtarget.isISA3_0()) {
214
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
215
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
216
    setTruncStoreAction(MVT::f64, MVT::f16, Legal);
217
    setTruncStoreAction(MVT::f32, MVT::f16, Legal);
218
  } else {
219
    // No extending loads from f16 or HW conversions back and forth.
220
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
221
    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
222
    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
223
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
224
    setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
225
    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
226
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
227
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
228
  }
229

230
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
231

232
  // PowerPC has pre-inc load and store's.
233
  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
234
  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
235
  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
236
  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
237
  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
238
  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
239
  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
240
  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
241
  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
242
  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
243
  if (!Subtarget.hasSPE()) {
244
    setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
245
    setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
246
    setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
247
    setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
248
  }
249

250
  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251
  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
252
  for (MVT VT : ScalarIntVTs) {
253
    setOperationAction(ISD::ADDC, VT, Legal);
254
    setOperationAction(ISD::ADDE, VT, Legal);
255
    setOperationAction(ISD::SUBC, VT, Legal);
256
    setOperationAction(ISD::SUBE, VT, Legal);
257
  }
258

259
  if (Subtarget.useCRBits()) {
260
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
261

262
    if (isPPC64 || Subtarget.hasFPCVT()) {
263
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
264
      AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
265
                        isPPC64 ? MVT::i64 : MVT::i32);
266
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
267
      AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
268
                        isPPC64 ? MVT::i64 : MVT::i32);
269

270
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
271
      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
272
                         isPPC64 ? MVT::i64 : MVT::i32);
273
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
274
      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
275
                        isPPC64 ? MVT::i64 : MVT::i32);
276

277
      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
278
      AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
279
                        isPPC64 ? MVT::i64 : MVT::i32);
280
      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
281
      AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
282
                        isPPC64 ? MVT::i64 : MVT::i32);
283

284
      setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
285
      AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
286
                        isPPC64 ? MVT::i64 : MVT::i32);
287
      setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
288
      AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
289
                        isPPC64 ? MVT::i64 : MVT::i32);
290
    } else {
291
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
292
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
293
      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
294
      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
295
    }
296

297
    // PowerPC does not support direct load/store of condition registers.
298
    setOperationAction(ISD::LOAD, MVT::i1, Custom);
299
    setOperationAction(ISD::STORE, MVT::i1, Custom);
300

301
    // FIXME: Remove this once the ANDI glue bug is fixed:
302
    if (ANDIGlueBug)
303
      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
304

305
    for (MVT VT : MVT::integer_valuetypes()) {
306
      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
307
      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
308
      setTruncStoreAction(VT, MVT::i1, Expand);
309
    }
310

311
    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
312
  }
313

314
  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315
  // PPC (the libcall is not available).
316
  setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
317
  setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
318
  setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
319
  setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
320

321
  // We do not currently implement these libm ops for PowerPC.
322
  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
323
  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
324
  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
325
  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
326
  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
327
  setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
328

329
  // PowerPC has no SREM/UREM instructions unless we are on P9
330
  // On P9 we may use a hardware instruction to compute the remainder.
331
  // When the result of both the remainder and the division is required it is
332
  // more efficient to compute the remainder from the result of the division
333
  // rather than use the remainder instruction. The instructions are legalized
334
  // directly because the DivRemPairsPass performs the transformation at the IR
335
  // level.
336
  if (Subtarget.isISA3_0()) {
337
    setOperationAction(ISD::SREM, MVT::i32, Legal);
338
    setOperationAction(ISD::UREM, MVT::i32, Legal);
339
    setOperationAction(ISD::SREM, MVT::i64, Legal);
340
    setOperationAction(ISD::UREM, MVT::i64, Legal);
341
  } else {
342
    setOperationAction(ISD::SREM, MVT::i32, Expand);
343
    setOperationAction(ISD::UREM, MVT::i32, Expand);
344
    setOperationAction(ISD::SREM, MVT::i64, Expand);
345
    setOperationAction(ISD::UREM, MVT::i64, Expand);
346
  }
347

348
  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
349
  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
350
  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
351
  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
352
  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
353
  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
354
  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
355
  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
356
  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
357

358
  // Handle constrained floating-point operations of scalar.
359
  // TODO: Handle SPE specific operation.
360
  setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
361
  setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
362
  setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
363
  setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
364
  setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
365

366
  setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
367
  setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
368
  setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
369
  setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
370

371
  if (!Subtarget.hasSPE()) {
372
    setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
373
    setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
374
  }
375

376
  if (Subtarget.hasVSX()) {
377
    setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
378
    setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
379
  }
380

381
  if (Subtarget.hasFSQRT()) {
382
    setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
383
    setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
384
  }
385

386
  if (Subtarget.hasFPRND()) {
387
    setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
388
    setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
389
    setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
390
    setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
391

392
    setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
393
    setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
394
    setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
395
    setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
396
  }
397

398
  // We don't support sin/cos/sqrt/fmod/pow
399
  setOperationAction(ISD::FSIN , MVT::f64, Expand);
400
  setOperationAction(ISD::FCOS , MVT::f64, Expand);
401
  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
402
  setOperationAction(ISD::FREM , MVT::f64, Expand);
403
  setOperationAction(ISD::FPOW , MVT::f64, Expand);
404
  setOperationAction(ISD::FSIN , MVT::f32, Expand);
405
  setOperationAction(ISD::FCOS , MVT::f32, Expand);
406
  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
407
  setOperationAction(ISD::FREM , MVT::f32, Expand);
408
  setOperationAction(ISD::FPOW , MVT::f32, Expand);
409

410
  // MASS transformation for LLVM intrinsics with replicating fast-math flag
411
  // to be consistent to PPCGenScalarMASSEntries pass
412
  if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
413
    setOperationAction(ISD::FSIN , MVT::f64, Custom);
414
    setOperationAction(ISD::FCOS , MVT::f64, Custom);
415
    setOperationAction(ISD::FPOW , MVT::f64, Custom);
416
    setOperationAction(ISD::FLOG, MVT::f64, Custom);
417
    setOperationAction(ISD::FLOG10, MVT::f64, Custom);
418
    setOperationAction(ISD::FEXP, MVT::f64, Custom);
419
    setOperationAction(ISD::FSIN , MVT::f32, Custom);
420
    setOperationAction(ISD::FCOS , MVT::f32, Custom);
421
    setOperationAction(ISD::FPOW , MVT::f32, Custom);
422
    setOperationAction(ISD::FLOG, MVT::f32, Custom);
423
    setOperationAction(ISD::FLOG10, MVT::f32, Custom);
424
    setOperationAction(ISD::FEXP, MVT::f32, Custom);
425
  }
426

427
  if (Subtarget.hasSPE()) {
428
    setOperationAction(ISD::FMA  , MVT::f64, Expand);
429
    setOperationAction(ISD::FMA  , MVT::f32, Expand);
430
  } else {
431
    setOperationAction(ISD::FMA  , MVT::f64, Legal);
432
    setOperationAction(ISD::FMA  , MVT::f32, Legal);
433
  }
434

435
  if (Subtarget.hasSPE())
436
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
437

438
  setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
439

440
  // If we're enabling GP optimizations, use hardware square root
441
  if (!Subtarget.hasFSQRT() &&
442
      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
443
        Subtarget.hasFRE()))
444
    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
445

446
  if (!Subtarget.hasFSQRT() &&
447
      !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
448
        Subtarget.hasFRES()))
449
    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
450

451
  if (Subtarget.hasFCPSGN()) {
452
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
453
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
454
  } else {
455
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
456
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
457
  }
458

459
  if (Subtarget.hasFPRND()) {
460
    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
461
    setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
462
    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
463
    setOperationAction(ISD::FROUND, MVT::f64, Legal);
464

465
    setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
466
    setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
467
    setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
468
    setOperationAction(ISD::FROUND, MVT::f32, Legal);
469
  }
470

471
  // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472
  // instruction xxbrd to speed up scalar BSWAP64.
473
  if (Subtarget.isISA3_1()) {
474
    setOperationAction(ISD::BSWAP, MVT::i32, Legal);
475
    setOperationAction(ISD::BSWAP, MVT::i64, Legal);
476
  } else {
477
    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
478
    setOperationAction(
479
        ISD::BSWAP, MVT::i64,
480
        (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
481
  }
482

483
  // CTPOP or CTTZ were introduced in P8/P9 respectively
484
  if (Subtarget.isISA3_0()) {
485
    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
486
    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
487
  } else {
488
    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
489
    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
490
  }
491

492
  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
493
    setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
494
    setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
495
  } else {
496
    setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
497
    setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
498
  }
499

500
  // PowerPC does not have ROTR
501
  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
502
  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
503

504
  if (!Subtarget.useCRBits()) {
505
    // PowerPC does not have Select
506
    setOperationAction(ISD::SELECT, MVT::i32, Expand);
507
    setOperationAction(ISD::SELECT, MVT::i64, Expand);
508
    setOperationAction(ISD::SELECT, MVT::f32, Expand);
509
    setOperationAction(ISD::SELECT, MVT::f64, Expand);
510
  }
511

512
  // PowerPC wants to turn select_cc of FP into fsel when possible.
513
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
514
  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
515

516
  // PowerPC wants to optimize integer setcc a bit
517
  if (!Subtarget.useCRBits())
518
    setOperationAction(ISD::SETCC, MVT::i32, Custom);
519

520
  if (Subtarget.hasFPU()) {
521
    setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
522
    setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
523
    setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
524

525
    setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
526
    setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
527
    setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
528
  }
529

530
  // PowerPC does not have BRCOND which requires SetCC
531
  if (!Subtarget.useCRBits())
532
    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
533

534
  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
535

536
  if (Subtarget.hasSPE()) {
537
    // SPE has built-in conversions
538
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
539
    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
540
    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
541
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
542
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
543
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
544

545
    // SPE supports signaling compare of f32/f64.
546
    setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
547
    setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
548
  } else {
549
    // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
550
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
551
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
552

553
    // PowerPC does not have [U|S]INT_TO_FP
554
    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
555
    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
556
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
557
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
558
  }
559

560
  if (Subtarget.hasDirectMove() && isPPC64) {
561
    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
562
    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
563
    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
564
    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
565
    if (TM.Options.UnsafeFPMath) {
566
      setOperationAction(ISD::LRINT, MVT::f64, Legal);
567
      setOperationAction(ISD::LRINT, MVT::f32, Legal);
568
      setOperationAction(ISD::LLRINT, MVT::f64, Legal);
569
      setOperationAction(ISD::LLRINT, MVT::f32, Legal);
570
      setOperationAction(ISD::LROUND, MVT::f64, Legal);
571
      setOperationAction(ISD::LROUND, MVT::f32, Legal);
572
      setOperationAction(ISD::LLROUND, MVT::f64, Legal);
573
      setOperationAction(ISD::LLROUND, MVT::f32, Legal);
574
    }
575
  } else {
576
    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
577
    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
578
    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
579
    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
580
  }
581

582
  // We cannot sextinreg(i1).  Expand to shifts.
583
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
584

585
  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586
  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587
  // support continuation, user-level threading, and etc.. As a result, no
588
  // other SjLj exception interfaces are implemented and please don't build
589
  // your own exception handling based on them.
590
  // LLVM/Clang supports zero-cost DWARF exception handling.
591
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
592
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
593

594
  // We want to legalize GlobalAddress and ConstantPool nodes into the
595
  // appropriate instructions to materialize the address.
596
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
597
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
598
  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
599
  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
600
  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
601
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
602
  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
603
  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
604
  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
605
  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
606

607
  // TRAP is legal.
608
  setOperationAction(ISD::TRAP, MVT::Other, Legal);
609

610
  // TRAMPOLINE is custom lowered.
611
  setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
612
  setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
613

614
  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
615
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
616

617
  if (Subtarget.is64BitELFABI()) {
618
    // VAARG always uses double-word chunks, so promote anything smaller.
619
    setOperationAction(ISD::VAARG, MVT::i1, Promote);
620
    AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
621
    setOperationAction(ISD::VAARG, MVT::i8, Promote);
622
    AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
623
    setOperationAction(ISD::VAARG, MVT::i16, Promote);
624
    AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
625
    setOperationAction(ISD::VAARG, MVT::i32, Promote);
626
    AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
627
    setOperationAction(ISD::VAARG, MVT::Other, Expand);
628
  } else if (Subtarget.is32BitELFABI()) {
629
    // VAARG is custom lowered with the 32-bit SVR4 ABI.
630
    setOperationAction(ISD::VAARG, MVT::Other, Custom);
631
    setOperationAction(ISD::VAARG, MVT::i64, Custom);
632
  } else
633
    setOperationAction(ISD::VAARG, MVT::Other, Expand);
634

635
  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636
  if (Subtarget.is32BitELFABI())
637
    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
638
  else
639
    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
640

641
  // Use the default implementation.
642
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
643
  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
644
  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
645
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
646
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
647
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
648
  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
649
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
650
  setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
651

652
  // We want to custom lower some of our intrinsics.
653
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
654
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
655
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
656
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
657
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
658

659
  // To handle counter-based loop conditions.
660
  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
661

662
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
663
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
664
  setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
665
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
666

667
  // Comparisons that require checking two conditions.
668
  if (Subtarget.hasSPE()) {
669
    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
670
    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
671
    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
672
    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
673
  }
674
  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
675
  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
676
  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
677
  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
678
  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
679
  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
680
  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
681
  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
682
  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
683
  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
684
  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
685
  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
686

687
  setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
688
  setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
689

690
  if (Subtarget.has64BitSupport()) {
691
    // They also have instructions for converting between i64 and fp.
692
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
693
    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
694
    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
695
    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
696
    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
697
    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
698
    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
699
    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
700
    // This is just the low 32 bits of a (signed) fp->i64 conversion.
701
    // We cannot do this with Promote because i64 is not a legal type.
702
    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
703
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
704

705
    if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
706
      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
707
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
708
    }
709
  } else {
710
    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711
    if (Subtarget.hasSPE()) {
712
      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
713
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
714
    } else {
715
      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
716
      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
717
    }
718
  }
719

720
  // With the instructions enabled under FPCVT, we can do everything.
721
  if (Subtarget.hasFPCVT()) {
722
    if (Subtarget.has64BitSupport()) {
723
      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
724
      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
725
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
726
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
727
      setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
728
      setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
729
      setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
730
      setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
731
    }
732

733
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
734
    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
735
    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
736
    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
737
    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
738
    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
739
    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
740
    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
741
  }
742

743
  if (Subtarget.use64BitRegs()) {
744
    // 64-bit PowerPC implementations can support i64 types directly
745
    addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
746
    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
747
    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
748
    // 64-bit PowerPC wants to expand i128 shifts itself.
749
    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
750
    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
751
    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
752
  } else {
753
    // 32-bit PowerPC wants to expand i64 shifts itself.
754
    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
755
    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
756
    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
757
  }
758

759
  // PowerPC has better expansions for funnel shifts than the generic
760
  // TargetLowering::expandFunnelShift.
761
  if (Subtarget.has64BitSupport()) {
762
    setOperationAction(ISD::FSHL, MVT::i64, Custom);
763
    setOperationAction(ISD::FSHR, MVT::i64, Custom);
764
  }
765
  setOperationAction(ISD::FSHL, MVT::i32, Custom);
766
  setOperationAction(ISD::FSHR, MVT::i32, Custom);
767

768
  if (Subtarget.hasVSX()) {
769
    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
770
    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
771
    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
772
    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
773
  }
774

775
  if (Subtarget.hasAltivec()) {
776
    for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
777
      setOperationAction(ISD::SADDSAT, VT, Legal);
778
      setOperationAction(ISD::SSUBSAT, VT, Legal);
779
      setOperationAction(ISD::UADDSAT, VT, Legal);
780
      setOperationAction(ISD::USUBSAT, VT, Legal);
781
    }
782
    // First set operation action for all vector types to expand. Then we
783
    // will selectively turn on ones that can be effectively codegen'd.
784
    for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
785
      // add/sub are legal for all supported vector VT's.
786
      setOperationAction(ISD::ADD, VT, Legal);
787
      setOperationAction(ISD::SUB, VT, Legal);
788

789
      // For v2i64, these are only valid with P8Vector. This is corrected after
790
      // the loop.
791
      if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
792
        setOperationAction(ISD::SMAX, VT, Legal);
793
        setOperationAction(ISD::SMIN, VT, Legal);
794
        setOperationAction(ISD::UMAX, VT, Legal);
795
        setOperationAction(ISD::UMIN, VT, Legal);
796
      }
797
      else {
798
        setOperationAction(ISD::SMAX, VT, Expand);
799
        setOperationAction(ISD::SMIN, VT, Expand);
800
        setOperationAction(ISD::UMAX, VT, Expand);
801
        setOperationAction(ISD::UMIN, VT, Expand);
802
      }
803

804
      if (Subtarget.hasVSX()) {
805
        setOperationAction(ISD::FMAXNUM, VT, Legal);
806
        setOperationAction(ISD::FMINNUM, VT, Legal);
807
      }
808

809
      // Vector instructions introduced in P8
810
      if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
811
        setOperationAction(ISD::CTPOP, VT, Legal);
812
        setOperationAction(ISD::CTLZ, VT, Legal);
813
      }
814
      else {
815
        setOperationAction(ISD::CTPOP, VT, Expand);
816
        setOperationAction(ISD::CTLZ, VT, Expand);
817
      }
818

819
      // Vector instructions introduced in P9
820
      if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
821
        setOperationAction(ISD::CTTZ, VT, Legal);
822
      else
823
        setOperationAction(ISD::CTTZ, VT, Expand);
824

825
      // We promote all shuffles to v16i8.
826
      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
827
      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
828

829
      // We promote all non-typed operations to v4i32.
830
      setOperationAction(ISD::AND   , VT, Promote);
831
      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
832
      setOperationAction(ISD::OR    , VT, Promote);
833
      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
834
      setOperationAction(ISD::XOR   , VT, Promote);
835
      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
836
      setOperationAction(ISD::LOAD  , VT, Promote);
837
      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
838
      setOperationAction(ISD::SELECT, VT, Promote);
839
      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
840
      setOperationAction(ISD::VSELECT, VT, Legal);
841
      setOperationAction(ISD::SELECT_CC, VT, Promote);
842
      AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
843
      setOperationAction(ISD::STORE, VT, Promote);
844
      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
845

846
      // No other operations are legal.
847
      setOperationAction(ISD::MUL , VT, Expand);
848
      setOperationAction(ISD::SDIV, VT, Expand);
849
      setOperationAction(ISD::SREM, VT, Expand);
850
      setOperationAction(ISD::UDIV, VT, Expand);
851
      setOperationAction(ISD::UREM, VT, Expand);
852
      setOperationAction(ISD::FDIV, VT, Expand);
853
      setOperationAction(ISD::FREM, VT, Expand);
854
      setOperationAction(ISD::FNEG, VT, Expand);
855
      setOperationAction(ISD::FSQRT, VT, Expand);
856
      setOperationAction(ISD::FLOG, VT, Expand);
857
      setOperationAction(ISD::FLOG10, VT, Expand);
858
      setOperationAction(ISD::FLOG2, VT, Expand);
859
      setOperationAction(ISD::FEXP, VT, Expand);
860
      setOperationAction(ISD::FEXP2, VT, Expand);
861
      setOperationAction(ISD::FSIN, VT, Expand);
862
      setOperationAction(ISD::FCOS, VT, Expand);
863
      setOperationAction(ISD::FABS, VT, Expand);
864
      setOperationAction(ISD::FFLOOR, VT, Expand);
865
      setOperationAction(ISD::FCEIL,  VT, Expand);
866
      setOperationAction(ISD::FTRUNC, VT, Expand);
867
      setOperationAction(ISD::FRINT,  VT, Expand);
868
      setOperationAction(ISD::FLDEXP, VT, Expand);
869
      setOperationAction(ISD::FNEARBYINT, VT, Expand);
870
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
871
      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
872
      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
873
      setOperationAction(ISD::MULHU, VT, Expand);
874
      setOperationAction(ISD::MULHS, VT, Expand);
875
      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
876
      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
877
      setOperationAction(ISD::UDIVREM, VT, Expand);
878
      setOperationAction(ISD::SDIVREM, VT, Expand);
879
      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
880
      setOperationAction(ISD::FPOW, VT, Expand);
881
      setOperationAction(ISD::BSWAP, VT, Expand);
882
      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
883
      setOperationAction(ISD::ROTL, VT, Expand);
884
      setOperationAction(ISD::ROTR, VT, Expand);
885

886
      for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
887
        setTruncStoreAction(VT, InnerVT, Expand);
888
        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
889
        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
890
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
891
      }
892
    }
893
    setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
894
    if (!Subtarget.hasP8Vector()) {
895
      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
896
      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
897
      setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
898
      setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
899
    }
900

901
    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902
    // with merges, splats, etc.
903
    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
904

905
    // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906
    // are cheap, so handle them before they get expanded to scalar.
907
    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
908
    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
909
    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
910
    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
911
    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
912

913
    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
914
    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
915
    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
916
    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
917
    setOperationAction(ISD::SELECT, MVT::v4i32,
918
                       Subtarget.useCRBits() ? Legal : Expand);
919
    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
920
    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
921
    setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
922
    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
923
    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
924
    setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
925
    setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
926
    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
927
    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
928
    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
929
    setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
930
    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
931
    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
932

933
    // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934
    setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
935
    // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936
    if (Subtarget.hasAltivec())
937
      for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
938
        setOperationAction(ISD::ROTL, VT, Legal);
939
    // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940
    if (Subtarget.hasP8Altivec())
941
      setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
942

943
    addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
944
    addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
945
    addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
946
    addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
947

948
    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
949
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);
950

951
    if (Subtarget.hasVSX()) {
952
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
953
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
954
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
955
    }
956

957
    if (Subtarget.hasP8Altivec())
958
      setOperationAction(ISD::MUL, MVT::v4i32, Legal);
959
    else
960
      setOperationAction(ISD::MUL, MVT::v4i32, Custom);
961

962
    if (Subtarget.isISA3_1()) {
963
      setOperationAction(ISD::MUL, MVT::v2i64, Legal);
964
      setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
965
      setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
966
      setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
967
      setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
968
      setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
969
      setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
970
      setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
971
      setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
972
      setOperationAction(ISD::UREM, MVT::v2i64, Legal);
973
      setOperationAction(ISD::SREM, MVT::v2i64, Legal);
974
      setOperationAction(ISD::UREM, MVT::v4i32, Legal);
975
      setOperationAction(ISD::SREM, MVT::v4i32, Legal);
976
      setOperationAction(ISD::UREM, MVT::v1i128, Legal);
977
      setOperationAction(ISD::SREM, MVT::v1i128, Legal);
978
      setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
979
      setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
980
      setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
981
    }
982

983
    setOperationAction(ISD::MUL, MVT::v8i16, Legal);
984
    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
985

986
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
987
    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
988

989
    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
990
    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
991
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
992
    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
993

994
    // Altivec does not contain unordered floating-point compare instructions
995
    setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
996
    setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
997
    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
998
    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
999

1000
    if (Subtarget.hasVSX()) {
1001
      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
1002
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1003
      if (Subtarget.hasP8Vector()) {
1004
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
1005
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
1006
      }
1007
      if (Subtarget.hasDirectMove() && isPPC64) {
1008
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
1009
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
1010
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
1011
        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
1012
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
1013
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
1014
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
1015
        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
1016
      }
1017
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1018

1019
      // The nearbyint variants are not allowed to raise the inexact exception
1020
      // so we can only code-gen them with unsafe math.
1021
      if (TM.Options.UnsafeFPMath) {
1022
        setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1023
        setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1024
      }
1025

1026
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027
      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028
      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1029
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1030
      setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031
      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1032
      setOperationAction(ISD::FROUND, MVT::f64, Legal);
1033
      setOperationAction(ISD::FRINT, MVT::f64, Legal);
1034

1035
      setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1036
      setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037
      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1038
      setOperationAction(ISD::FROUND, MVT::f32, Legal);
1039
      setOperationAction(ISD::FRINT, MVT::f32, Legal);
1040

1041
      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042
      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043

1044
      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045
      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046

1047
      // Share the Altivec comparison restrictions.
1048
      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049
      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050
      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
1051
      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052

1053
      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054
      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055

1056
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1057

1058
      if (Subtarget.hasP8Vector())
1059
        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060

1061
      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062

1063
      addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064
      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065
      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066

1067
      if (Subtarget.hasP8Altivec()) {
1068
        setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069
        setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070
        setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071

1072
        // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073
        // SRL, but not for SRA because of the instructions available:
1074
        // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075
        // doing
1076
        setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077
        setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078
        setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079

1080
        setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081
      }
1082
      else {
1083
        setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084
        setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085
        setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086

1087
        setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088

1089
        // VSX v2i64 only supports non-arithmetic operations.
1090
        setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091
        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092
      }
1093

1094
      if (Subtarget.isISA3_1())
1095
        setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096
      else
1097
        setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098

1099
      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1101
      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1102
      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103

1104
      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1105

1106
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1107
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1108
      setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1109
      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1110
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1111
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1112
      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1113
      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1114

1115
      // Custom handling for partial vectors of integers converted to
1116
      // floating point. We already have optimal handling for v2i32 through
1117
      // the DAG combine, so those aren't necessary.
1118
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1119
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1120
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1121
      setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1122
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1123
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1124
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1125
      setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1126
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1127
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1128
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1129
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1130
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1131
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1132
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1133
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1134

1135
      setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136
      setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137
      setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138
      setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1139
      setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1140
      setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1141

1142
      setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1143
      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1144

1145
      // Handle constrained floating-point operations of vector.
1146
      // The predictor is `hasVSX` because altivec instruction has
1147
      // no exception but VSX vector instruction has.
1148
      setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1149
      setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1150
      setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1151
      setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1152
      setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1153
      setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1154
      setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1155
      setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1156
      setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1157
      setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1158
      setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1159
      setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1160
      setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1161

1162
      setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1163
      setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1164
      setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1165
      setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1166
      setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1167
      setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1168
      setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1169
      setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1170
      setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1171
      setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1172
      setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1173
      setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1174
      setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1175

1176
      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177
      addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178

1179
      for (MVT FPT : MVT::fp_valuetypes())
1180
        setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181

1182
      // Expand the SELECT to SELECT_CC
1183
      setOperationAction(ISD::SELECT, MVT::f128, Expand);
1184

1185
      setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186
      setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187

1188
      // No implementation for these ops for PowerPC.
1189
      setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1190
      setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191
      setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192
      setOperationAction(ISD::FPOW, MVT::f128, Expand);
1193
      setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1194
      setOperationAction(ISD::FREM, MVT::f128, Expand);
1195
    }
1196

1197
    if (Subtarget.hasP8Altivec()) {
1198
      addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199
      addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200
    }
1201

1202
    if (Subtarget.hasP9Vector()) {
1203
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1204
      setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1205

1206
      // Test data class instructions store results in CR bits.
1207
      if (Subtarget.useCRBits()) {
1208
        setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
1209
        setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
1210
        setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
1211
      }
1212

1213
      // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214
      // SRL, but not for SRA because of the instructions available:
1215
      // VS{RL} and VS{RL}O.
1216
      setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217
      setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218
      setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219

1220
      setOperationAction(ISD::FADD, MVT::f128, Legal);
1221
      setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222
      setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223
      setOperationAction(ISD::FMUL, MVT::f128, Legal);
1224
      setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1225

1226
      setOperationAction(ISD::FMA, MVT::f128, Legal);
1227
      setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1228
      setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1229
      setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1230
      setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1231
      setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1232
      setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1233

1234
      setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1235
      setOperationAction(ISD::FRINT, MVT::f128, Legal);
1236
      setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1237
      setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1238
      setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1239
      setOperationAction(ISD::FROUND, MVT::f128, Legal);
1240

1241
      setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1242
      setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1243
      setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1244

1245
      // Handle constrained floating-point operations of fp128
1246
      setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1247
      setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1248
      setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1249
      setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1250
      setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1251
      setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1252
      setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1253
      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1254
      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1255
      setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1256
      setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1257
      setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1258
      setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1259
      setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1260
      setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1261
      setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1262
      setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263
      setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264
      setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265
      setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266
    } else if (Subtarget.hasVSX()) {
1267
      setOperationAction(ISD::LOAD, MVT::f128, Promote);
1268
      setOperationAction(ISD::STORE, MVT::f128, Promote);
1269

1270
      AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271
      AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272

1273
      // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274
      // fp_to_uint and int_to_fp.
1275
      setOperationAction(ISD::FADD, MVT::f128, LibCall);
1276
      setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1277

1278
      setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279
      setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280
      setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281
      setOperationAction(ISD::FABS, MVT::f128, Expand);
1282
      setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1283
      setOperationAction(ISD::FMA, MVT::f128, Expand);
1284
      setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1285

1286
      // Expand the fp_extend if the target type is fp128.
1287
      setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1288
      setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1289

1290
      // Expand the fp_round if the source type is fp128.
1291
      for (MVT VT : {MVT::f32, MVT::f64}) {
1292
        setOperationAction(ISD::FP_ROUND, VT, Custom);
1293
        setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1294
      }
1295

1296
      setOperationAction(ISD::SETCC, MVT::f128, Custom);
1297
      setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1298
      setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1299
      setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1300

1301
      // Lower following f128 select_cc pattern:
1302
      // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1303
      setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1304

1305
      // We need to handle f128 SELECT_CC with integer result type.
1306
      setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1307
      setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308
    }
1309

1310
    if (Subtarget.hasP9Altivec()) {
1311
      if (Subtarget.isISA3_1()) {
1312
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1313
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1314
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1315
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1316
      } else {
1317
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1318
        setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1319
      }
1320
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1321
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1322
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1323
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1324
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1325
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1326
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1327

1328
      setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329
      setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330
      setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331
      setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332
    }
1333

1334
    if (Subtarget.hasP10Vector()) {
1335
      setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1336
    }
1337
  }
1338

1339
  if (Subtarget.pairedVectorMemops()) {
1340
    addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341
    setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342
    setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343
  }
1344
  if (Subtarget.hasMMA()) {
1345
    if (Subtarget.isISAFuture())
1346
      addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347
    else
1348
      addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349
    setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350
    setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1351
    setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1352
  }
1353

1354
  if (Subtarget.has64BitSupport())
1355
    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1356

1357
  if (Subtarget.isISA3_1())
1358
    setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359

1360
  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361

1362
  if (!isPPC64) {
1363
    setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1364
    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1365
  }
1366

1367
  if (shouldInlineQuadwordAtomics()) {
1368
    setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1369
    setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1370
    setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1371
  }
1372

1373
  setBooleanContents(ZeroOrOneBooleanContent);
1374

1375
  if (Subtarget.hasAltivec()) {
1376
    // Altivec instructions set fields to all zeros or all ones.
1377
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1378
  }
1379

1380
  if (shouldInlineQuadwordAtomics())
1381
    setMaxAtomicSizeInBitsSupported(128);
1382
  else if (isPPC64)
1383
    setMaxAtomicSizeInBitsSupported(64);
1384
  else
1385
    setMaxAtomicSizeInBitsSupported(32);
1386

1387
  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1388

1389
  // We have target-specific dag combine patterns for the following nodes:
1390
  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1391
                       ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1392
  if (Subtarget.hasFPCVT())
1393
    setTargetDAGCombine(ISD::UINT_TO_FP);
1394
  setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1395
  if (Subtarget.useCRBits())
1396
    setTargetDAGCombine(ISD::BRCOND);
1397
  setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1398
                       ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1399

1400
  setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1401

1402
  setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1403

1404
  if (Subtarget.useCRBits()) {
1405
    setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1406
  }
1407

1408
  setLibcallName(RTLIB::LOG_F128, "logf128");
1409
  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1410
  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1411
  setLibcallName(RTLIB::EXP_F128, "expf128");
1412
  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1413
  setLibcallName(RTLIB::SIN_F128, "sinf128");
1414
  setLibcallName(RTLIB::COS_F128, "cosf128");
1415
  setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1416
  setLibcallName(RTLIB::POW_F128, "powf128");
1417
  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1418
  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1419
  setLibcallName(RTLIB::REM_F128, "fmodf128");
1420
  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1421
  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1422
  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1423
  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1424
  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1425
  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1426
  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1427
  setLibcallName(RTLIB::RINT_F128, "rintf128");
1428
  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1429
  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1430
  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1431
  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1432
  setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1433

1434
  if (Subtarget.isAIXABI()) {
1435
    setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1436
    setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1437
    setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1438
    setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1439
  }
1440

1441
  // With 32 condition bits, we don't need to sink (and duplicate) compares
1442
  // aggressively in CodeGenPrep.
1443
  if (Subtarget.useCRBits()) {
1444
    setHasMultipleConditionRegisters();
1445
    setJumpIsExpensive();
1446
  }
1447

1448
  // TODO: The default entry number is set to 64. This stops most jump table
1449
  // generation on PPC. But it is good for current PPC HWs because the indirect
1450
  // branch instruction mtctr to the jump table may lead to bad branch predict.
1451
  // Re-evaluate this value on future HWs that can do better with mtctr.
1452
  setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1453

1454
  setMinFunctionAlignment(Align(4));
1455

1456
  switch (Subtarget.getCPUDirective()) {
1457
  default: break;
1458
  case PPC::DIR_970:
1459
  case PPC::DIR_A2:
1460
  case PPC::DIR_E500:
1461
  case PPC::DIR_E500mc:
1462
  case PPC::DIR_E5500:
1463
  case PPC::DIR_PWR4:
1464
  case PPC::DIR_PWR5:
1465
  case PPC::DIR_PWR5X:
1466
  case PPC::DIR_PWR6:
1467
  case PPC::DIR_PWR6X:
1468
  case PPC::DIR_PWR7:
1469
  case PPC::DIR_PWR8:
1470
  case PPC::DIR_PWR9:
1471
  case PPC::DIR_PWR10:
1472
  case PPC::DIR_PWR11:
1473
  case PPC::DIR_PWR_FUTURE:
1474
    setPrefLoopAlignment(Align(16));
1475
    setPrefFunctionAlignment(Align(16));
1476
    break;
1477
  }
1478

1479
  if (Subtarget.enableMachineScheduler())
1480
    setSchedulingPreference(Sched::Source);
1481
  else
1482
    setSchedulingPreference(Sched::Hybrid);
1483

1484
  computeRegisterProperties(STI.getRegisterInfo());
1485

1486
  // The Freescale cores do better with aggressive inlining of memcpy and
1487
  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1488
  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1489
      Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1490
    MaxStoresPerMemset = 32;
1491
    MaxStoresPerMemsetOptSize = 16;
1492
    MaxStoresPerMemcpy = 32;
1493
    MaxStoresPerMemcpyOptSize = 8;
1494
    MaxStoresPerMemmove = 32;
1495
    MaxStoresPerMemmoveOptSize = 8;
1496
  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1497
    // The A2 also benefits from (very) aggressive inlining of memcpy and
1498
    // friends. The overhead of a the function call, even when warm, can be
1499
    // over one hundred cycles.
1500
    MaxStoresPerMemset = 128;
1501
    MaxStoresPerMemcpy = 128;
1502
    MaxStoresPerMemmove = 128;
1503
    MaxLoadsPerMemcmp = 128;
1504
  } else {
1505
    MaxLoadsPerMemcmp = 8;
1506
    MaxLoadsPerMemcmpOptSize = 4;
1507
  }
1508

1509
  IsStrictFPEnabled = true;
1510

1511
  // Let the subtarget (CPU) decide if a predictable select is more expensive
1512
  // than the corresponding branch. This information is used in CGP to decide
1513
  // when to convert selects into branches.
1514
  PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1515

1516
  GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1517
}
1518

1519
// *********************************** NOTE ************************************
1520
// For selecting load and store instructions, the addressing modes are defined
1521
// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1522
// patterns to match the load the store instructions.
1523
//
1524
// The TD definitions for the addressing modes correspond to their respective
1525
// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1526
// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1527
// address mode flags of a particular node. Afterwards, the computed address
1528
// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1529
// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1530
// accordingly, based on the preferred addressing mode.
1531
//
1532
// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1533
// MemOpFlags contains all the possible flags that can be used to compute the
1534
// optimal addressing mode for load and store instructions.
1535
// AddrMode contains all the possible load and store addressing modes available
1536
// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1537
//
1538
// When adding new load and store instructions, it is possible that new address
1539
// flags may need to be added into MemOpFlags, and a new addressing mode will
1540
// need to be added to AddrMode. An entry of the new addressing mode (consisting
1541
// of the minimal and main distinguishing address flags for the new load/store
1542
// instructions) will need to be added into initializeAddrModeMap() below.
1543
// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1544
// need to be updated to account for selecting the optimal addressing mode.
1545
// *****************************************************************************
1546
/// Initialize the map that relates the different addressing modes of the load
1547
/// and store instructions to a set of flags. This ensures the load/store
1548
/// instruction is correctly matched during instruction selection.
1549
void PPCTargetLowering::initializeAddrModeMap() {
1550
  AddrModesMap[PPC::AM_DForm] = {
1551
      // LWZ, STW
1552
      PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1553
      PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1554
      PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1555
      PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1556
      // LBZ, LHZ, STB, STH
1557
      PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1558
      PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1559
      PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1560
      PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1561
      // LHA
1562
      PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1563
      PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1564
      PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1565
      PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1566
      // LFS, LFD, STFS, STFD
1567
      PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1568
      PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1569
      PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1570
      PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1571
  };
1572
  AddrModesMap[PPC::AM_DSForm] = {
1573
      // LWA
1574
      PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1575
      PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1576
      PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1577
      // LD, STD
1578
      PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1579
      PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1580
      PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1581
      // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1582
      PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1583
      PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1584
      PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1585
  };
1586
  AddrModesMap[PPC::AM_DQForm] = {
1587
      // LXV, STXV
1588
      PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1589
      PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1590
      PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1591
  };
1592
  AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1593
                                       PPC::MOF_SubtargetP10};
1594
  // TODO: Add mapping for quadword load/store.
1595
}
1596

1597
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1598
/// the desired ByVal argument alignment.
1599
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1600
  if (MaxAlign == MaxMaxAlign)
1601
    return;
1602
  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1603
    if (MaxMaxAlign >= 32 &&
1604
        VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1605
      MaxAlign = Align(32);
1606
    else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1607
             MaxAlign < 16)
1608
      MaxAlign = Align(16);
1609
  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1610
    Align EltAlign;
1611
    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1612
    if (EltAlign > MaxAlign)
1613
      MaxAlign = EltAlign;
1614
  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1615
    for (auto *EltTy : STy->elements()) {
1616
      Align EltAlign;
1617
      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1618
      if (EltAlign > MaxAlign)
1619
        MaxAlign = EltAlign;
1620
      if (MaxAlign == MaxMaxAlign)
1621
        break;
1622
    }
1623
  }
1624
}
1625

1626
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1627
/// function arguments in the caller parameter area.
1628
uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1629
                                                  const DataLayout &DL) const {
1630
  // 16byte and wider vectors are passed on 16byte boundary.
1631
  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1632
  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1633
  if (Subtarget.hasAltivec())
1634
    getMaxByValAlign(Ty, Alignment, Align(16));
1635
  return Alignment.value();
1636
}
1637

1638
bool PPCTargetLowering::useSoftFloat() const {
1639
  return Subtarget.useSoftFloat();
1640
}
1641

1642
bool PPCTargetLowering::hasSPE() const {
1643
  return Subtarget.hasSPE();
1644
}
1645

1646
bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1647
  return VT.isScalarInteger();
1648
}
1649

1650
bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1651
    Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1652
  if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1653
    return false;
1654

1655
  if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1656
    if (VTy->getScalarType()->isIntegerTy()) {
1657
      // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1658
      if (ElemSizeInBits == 32) {
1659
        Index = Subtarget.isLittleEndian() ? 2 : 1;
1660
        return true;
1661
      }
1662
      if (ElemSizeInBits == 64) {
1663
        Index = Subtarget.isLittleEndian() ? 1 : 0;
1664
        return true;
1665
      }
1666
    }
1667
  }
1668
  return false;
1669
}
1670

1671
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1672
  switch ((PPCISD::NodeType)Opcode) {
1673
  case PPCISD::FIRST_NUMBER:    break;
1674
  case PPCISD::FSEL:            return "PPCISD::FSEL";
1675
  case PPCISD::XSMAXC:          return "PPCISD::XSMAXC";
1676
  case PPCISD::XSMINC:          return "PPCISD::XSMINC";
1677
  case PPCISD::FCFID:           return "PPCISD::FCFID";
1678
  case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1679
  case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1680
  case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1681
  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1682
  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1683
  case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1684
  case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1685
  case PPCISD::FRE:             return "PPCISD::FRE";
1686
  case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1687
  case PPCISD::FTSQRT:
1688
    return "PPCISD::FTSQRT";
1689
  case PPCISD::FSQRT:
1690
    return "PPCISD::FSQRT";
1691
  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1692
  case PPCISD::VPERM:           return "PPCISD::VPERM";
1693
  case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1694
  case PPCISD::XXSPLTI_SP_TO_DP:
1695
    return "PPCISD::XXSPLTI_SP_TO_DP";
1696
  case PPCISD::XXSPLTI32DX:
1697
    return "PPCISD::XXSPLTI32DX";
1698
  case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1699
  case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1700
  case PPCISD::XXPERM:
1701
    return "PPCISD::XXPERM";
1702
  case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1703
  case PPCISD::CMPB:            return "PPCISD::CMPB";
1704
  case PPCISD::Hi:              return "PPCISD::Hi";
1705
  case PPCISD::Lo:              return "PPCISD::Lo";
1706
  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1707
  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1708
  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1709
  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1710
  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1711
  case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1712
  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1713
  case PPCISD::SRL:             return "PPCISD::SRL";
1714
  case PPCISD::SRA:             return "PPCISD::SRA";
1715
  case PPCISD::SHL:             return "PPCISD::SHL";
1716
  case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1717
  case PPCISD::CALL:            return "PPCISD::CALL";
1718
  case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1719
  case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1720
  case PPCISD::CALL_RM:
1721
    return "PPCISD::CALL_RM";
1722
  case PPCISD::CALL_NOP_RM:
1723
    return "PPCISD::CALL_NOP_RM";
1724
  case PPCISD::CALL_NOTOC_RM:
1725
    return "PPCISD::CALL_NOTOC_RM";
1726
  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1727
  case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1728
  case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1729
  case PPCISD::BCTRL_RM:
1730
    return "PPCISD::BCTRL_RM";
1731
  case PPCISD::BCTRL_LOAD_TOC_RM:
1732
    return "PPCISD::BCTRL_LOAD_TOC_RM";
1733
  case PPCISD::RET_GLUE:        return "PPCISD::RET_GLUE";
1734
  case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1735
  case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1736
  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1737
  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1738
  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1739
  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1740
  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1741
  case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1742
  case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1743
  case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1744
    return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1745
  case PPCISD::ANDI_rec_1_EQ_BIT:
1746
    return "PPCISD::ANDI_rec_1_EQ_BIT";
1747
  case PPCISD::ANDI_rec_1_GT_BIT:
1748
    return "PPCISD::ANDI_rec_1_GT_BIT";
1749
  case PPCISD::VCMP:            return "PPCISD::VCMP";
1750
  case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1751
  case PPCISD::LBRX:            return "PPCISD::LBRX";
1752
  case PPCISD::STBRX:           return "PPCISD::STBRX";
1753
  case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1754
  case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1755
  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1756
  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1757
  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1758
  case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1759
  case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1760
  case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1761
  case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1762
  case PPCISD::ST_VSR_SCAL_INT:
1763
                                return "PPCISD::ST_VSR_SCAL_INT";
1764
  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1765
  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1766
  case PPCISD::BDZ:             return "PPCISD::BDZ";
1767
  case PPCISD::MFFS:            return "PPCISD::MFFS";
1768
  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1769
  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1770
  case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1771
  case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1772
  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1773
  case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1774
  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1775
  case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1776
  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1777
  case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1778
  case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1779
  case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1780
  case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1781
  case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
1782
  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1783
  case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
1784
  case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
1785
  case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1786
  case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1787
  case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1788
  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1789
  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1790
  case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1791
  case PPCISD::PADDI_DTPREL:
1792
    return "PPCISD::PADDI_DTPREL";
1793
  case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1794
  case PPCISD::SC:              return "PPCISD::SC";
1795
  case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1796
  case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1797
  case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1798
  case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1799
  case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1800
  case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1801
  case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1802
  case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1803
  case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1804
  case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1805
  case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1806
  case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1807
  case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1808
    return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1809
  case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1810
    return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1811
  case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1812
  case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1813
  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1814
  case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1815
  case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1816
  case PPCISD::ZEXT_LD_SPLAT:   return "PPCISD::ZEXT_LD_SPLAT";
1817
  case PPCISD::SEXT_LD_SPLAT:   return "PPCISD::SEXT_LD_SPLAT";
1818
  case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1819
  case PPCISD::STRICT_FADDRTZ:
1820
    return "PPCISD::STRICT_FADDRTZ";
1821
  case PPCISD::STRICT_FCTIDZ:
1822
    return "PPCISD::STRICT_FCTIDZ";
1823
  case PPCISD::STRICT_FCTIWZ:
1824
    return "PPCISD::STRICT_FCTIWZ";
1825
  case PPCISD::STRICT_FCTIDUZ:
1826
    return "PPCISD::STRICT_FCTIDUZ";
1827
  case PPCISD::STRICT_FCTIWUZ:
1828
    return "PPCISD::STRICT_FCTIWUZ";
1829
  case PPCISD::STRICT_FCFID:
1830
    return "PPCISD::STRICT_FCFID";
1831
  case PPCISD::STRICT_FCFIDU:
1832
    return "PPCISD::STRICT_FCFIDU";
1833
  case PPCISD::STRICT_FCFIDS:
1834
    return "PPCISD::STRICT_FCFIDS";
1835
  case PPCISD::STRICT_FCFIDUS:
1836
    return "PPCISD::STRICT_FCFIDUS";
1837
  case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1838
  case PPCISD::STORE_COND:
1839
    return "PPCISD::STORE_COND";
1840
  }
1841
  return nullptr;
1842
}
1843

1844
EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1845
                                          EVT VT) const {
1846
  if (!VT.isVector())
1847
    return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1848

1849
  return VT.changeVectorElementTypeToInteger();
1850
}
1851

1852
bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1853
  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1854
  return true;
1855
}
1856

1857
//===----------------------------------------------------------------------===//
1858
// Node matching predicates, for use by the tblgen matching code.
1859
//===----------------------------------------------------------------------===//
1860

1861
/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1862
static bool isFloatingPointZero(SDValue Op) {
1863
  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1864
    return CFP->getValueAPF().isZero();
1865
  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1866
    // Maybe this has already been legalized into the constant pool?
1867
    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1868
      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1869
        return CFP->getValueAPF().isZero();
1870
  }
1871
  return false;
1872
}
1873

1874
/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1875
/// true if Op is undef or if it matches the specified value.
1876
static bool isConstantOrUndef(int Op, int Val) {
1877
  return Op < 0 || Op == Val;
1878
}
1879

1880
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1881
/// VPKUHUM instruction.
1882
/// The ShuffleKind distinguishes between big-endian operations with
1883
/// two different inputs (0), either-endian operations with two identical
1884
/// inputs (1), and little-endian operations with two different inputs (2).
1885
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1886
bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1887
                               SelectionDAG &DAG) {
1888
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1889
  if (ShuffleKind == 0) {
1890
    if (IsLE)
1891
      return false;
1892
    for (unsigned i = 0; i != 16; ++i)
1893
      if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1894
        return false;
1895
  } else if (ShuffleKind == 2) {
1896
    if (!IsLE)
1897
      return false;
1898
    for (unsigned i = 0; i != 16; ++i)
1899
      if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1900
        return false;
1901
  } else if (ShuffleKind == 1) {
1902
    unsigned j = IsLE ? 0 : 1;
1903
    for (unsigned i = 0; i != 8; ++i)
1904
      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1905
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1906
        return false;
1907
  }
1908
  return true;
1909
}
1910

1911
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1912
/// VPKUWUM instruction.
1913
/// The ShuffleKind distinguishes between big-endian operations with
1914
/// two different inputs (0), either-endian operations with two identical
1915
/// inputs (1), and little-endian operations with two different inputs (2).
1916
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1917
bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1918
                               SelectionDAG &DAG) {
1919
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1920
  if (ShuffleKind == 0) {
1921
    if (IsLE)
1922
      return false;
1923
    for (unsigned i = 0; i != 16; i += 2)
1924
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1925
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1926
        return false;
1927
  } else if (ShuffleKind == 2) {
1928
    if (!IsLE)
1929
      return false;
1930
    for (unsigned i = 0; i != 16; i += 2)
1931
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1932
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1933
        return false;
1934
  } else if (ShuffleKind == 1) {
1935
    unsigned j = IsLE ? 0 : 2;
1936
    for (unsigned i = 0; i != 8; i += 2)
1937
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1938
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1939
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1940
          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1941
        return false;
1942
  }
1943
  return true;
1944
}
1945

1946
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1947
/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1948
/// current subtarget.
1949
///
1950
/// The ShuffleKind distinguishes between big-endian operations with
1951
/// two different inputs (0), either-endian operations with two identical
1952
/// inputs (1), and little-endian operations with two different inputs (2).
1953
/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1954
bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1955
                               SelectionDAG &DAG) {
1956
  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1957
  if (!Subtarget.hasP8Vector())
1958
    return false;
1959

1960
  bool IsLE = DAG.getDataLayout().isLittleEndian();
1961
  if (ShuffleKind == 0) {
1962
    if (IsLE)
1963
      return false;
1964
    for (unsigned i = 0; i != 16; i += 4)
1965
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1966
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1967
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1968
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1969
        return false;
1970
  } else if (ShuffleKind == 2) {
1971
    if (!IsLE)
1972
      return false;
1973
    for (unsigned i = 0; i != 16; i += 4)
1974
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1975
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1976
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1977
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1978
        return false;
1979
  } else if (ShuffleKind == 1) {
1980
    unsigned j = IsLE ? 0 : 4;
1981
    for (unsigned i = 0; i != 8; i += 4)
1982
      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1983
          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1984
          !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1985
          !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1986
          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1987
          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1988
          !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1989
          !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1990
        return false;
1991
  }
1992
  return true;
1993
}
1994

1995
/// isVMerge - Common function, used to match vmrg* shuffles.
1996
///
1997
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1998
                     unsigned LHSStart, unsigned RHSStart) {
1999
  if (N->getValueType(0) != MVT::v16i8)
2000
    return false;
2001
  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2002
         "Unsupported merge size!");
2003

2004
  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
2005
    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
2006
      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2007
                             LHSStart+j+i*UnitSize) ||
2008
          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2009
                             RHSStart+j+i*UnitSize))
2010
        return false;
2011
    }
2012
  return true;
2013
}
2014

2015
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2016
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2017
/// The ShuffleKind distinguishes between big-endian merges with two
2018
/// different inputs (0), either-endian merges with two identical inputs (1),
2019
/// and little-endian merges with two different inputs (2).  For the latter,
2020
/// the input operands are swapped (see PPCInstrAltivec.td).
2021
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2022
                             unsigned ShuffleKind, SelectionDAG &DAG) {
2023
  if (DAG.getDataLayout().isLittleEndian()) {
2024
    if (ShuffleKind == 1) // unary
2025
      return isVMerge(N, UnitSize, 0, 0);
2026
    else if (ShuffleKind == 2) // swapped
2027
      return isVMerge(N, UnitSize, 0, 16);
2028
    else
2029
      return false;
2030
  } else {
2031
    if (ShuffleKind == 1) // unary
2032
      return isVMerge(N, UnitSize, 8, 8);
2033
    else if (ShuffleKind == 0) // normal
2034
      return isVMerge(N, UnitSize, 8, 24);
2035
    else
2036
      return false;
2037
  }
2038
}
2039

2040
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2041
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2042
/// The ShuffleKind distinguishes between big-endian merges with two
2043
/// different inputs (0), either-endian merges with two identical inputs (1),
2044
/// and little-endian merges with two different inputs (2).  For the latter,
2045
/// the input operands are swapped (see PPCInstrAltivec.td).
2046
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2047
                             unsigned ShuffleKind, SelectionDAG &DAG) {
2048
  if (DAG.getDataLayout().isLittleEndian()) {
2049
    if (ShuffleKind == 1) // unary
2050
      return isVMerge(N, UnitSize, 8, 8);
2051
    else if (ShuffleKind == 2) // swapped
2052
      return isVMerge(N, UnitSize, 8, 24);
2053
    else
2054
      return false;
2055
  } else {
2056
    if (ShuffleKind == 1) // unary
2057
      return isVMerge(N, UnitSize, 0, 0);
2058
    else if (ShuffleKind == 0) // normal
2059
      return isVMerge(N, UnitSize, 0, 16);
2060
    else
2061
      return false;
2062
  }
2063
}
2064

2065
/**
2066
 * Common function used to match vmrgew and vmrgow shuffles
2067
 *
2068
 * The indexOffset determines whether to look for even or odd words in
2069
 * the shuffle mask. This is based on the of the endianness of the target
2070
 * machine.
2071
 *   - Little Endian:
2072
 *     - Use offset of 0 to check for odd elements
2073
 *     - Use offset of 4 to check for even elements
2074
 *   - Big Endian:
2075
 *     - Use offset of 0 to check for even elements
2076
 *     - Use offset of 4 to check for odd elements
2077
 * A detailed description of the vector element ordering for little endian and
2078
 * big endian can be found at
2079
 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2080
 * Targeting your applications - what little endian and big endian IBM XL C/C++
2081
 * compiler differences mean to you
2082
 *
2083
 * The mask to the shuffle vector instruction specifies the indices of the
2084
 * elements from the two input vectors to place in the result. The elements are
2085
 * numbered in array-access order, starting with the first vector. These vectors
2086
 * are always of type v16i8, thus each vector will contain 16 elements of size
2087
 * 8. More info on the shuffle vector can be found in the
2088
 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2089
 * Language Reference.
2090
 *
2091
 * The RHSStartValue indicates whether the same input vectors are used (unary)
2092
 * or two different input vectors are used, based on the following:
2093
 *   - If the instruction uses the same vector for both inputs, the range of the
2094
 *     indices will be 0 to 15. In this case, the RHSStart value passed should
2095
 *     be 0.
2096
 *   - If the instruction has two different vectors then the range of the
2097
 *     indices will be 0 to 31. In this case, the RHSStart value passed should
2098
 *     be 16 (indices 0-15 specify elements in the first vector while indices 16
2099
 *     to 31 specify elements in the second vector).
2100
 *
2101
 * \param[in] N The shuffle vector SD Node to analyze
2102
 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2103
 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2104
 * vector to the shuffle_vector instruction
2105
 * \return true iff this shuffle vector represents an even or odd word merge
2106
 */
2107
static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2108
                     unsigned RHSStartValue) {
2109
  if (N->getValueType(0) != MVT::v16i8)
2110
    return false;
2111

2112
  for (unsigned i = 0; i < 2; ++i)
2113
    for (unsigned j = 0; j < 4; ++j)
2114
      if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2115
                             i*RHSStartValue+j+IndexOffset) ||
2116
          !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2117
                             i*RHSStartValue+j+IndexOffset+8))
2118
        return false;
2119
  return true;
2120
}
2121

2122
/**
2123
 * Determine if the specified shuffle mask is suitable for the vmrgew or
2124
 * vmrgow instructions.
2125
 *
2126
 * \param[in] N The shuffle vector SD Node to analyze
2127
 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2128
 * \param[in] ShuffleKind Identify the type of merge:
2129
 *   - 0 = big-endian merge with two different inputs;
2130
 *   - 1 = either-endian merge with two identical inputs;
2131
 *   - 2 = little-endian merge with two different inputs (inputs are swapped for
2132
 *     little-endian merges).
2133
 * \param[in] DAG The current SelectionDAG
2134
 * \return true iff this shuffle mask
2135
 */
2136
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2137
                              unsigned ShuffleKind, SelectionDAG &DAG) {
2138
  if (DAG.getDataLayout().isLittleEndian()) {
2139
    unsigned indexOffset = CheckEven ? 4 : 0;
2140
    if (ShuffleKind == 1) // Unary
2141
      return isVMerge(N, indexOffset, 0);
2142
    else if (ShuffleKind == 2) // swapped
2143
      return isVMerge(N, indexOffset, 16);
2144
    else
2145
      return false;
2146
  }
2147
  else {
2148
    unsigned indexOffset = CheckEven ? 0 : 4;
2149
    if (ShuffleKind == 1) // Unary
2150
      return isVMerge(N, indexOffset, 0);
2151
    else if (ShuffleKind == 0) // Normal
2152
      return isVMerge(N, indexOffset, 16);
2153
    else
2154
      return false;
2155
  }
2156
  return false;
2157
}
2158

2159
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2160
/// amount, otherwise return -1.
2161
/// The ShuffleKind distinguishes between big-endian operations with two
2162
/// different inputs (0), either-endian operations with two identical inputs
2163
/// (1), and little-endian operations with two different inputs (2).  For the
2164
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2165
int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2166
                             SelectionDAG &DAG) {
2167
  if (N->getValueType(0) != MVT::v16i8)
2168
    return -1;
2169

2170
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2171

2172
  // Find the first non-undef value in the shuffle mask.
2173
  unsigned i;
2174
  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2175
    /*search*/;
2176

2177
  if (i == 16) return -1;  // all undef.
2178

2179
  // Otherwise, check to see if the rest of the elements are consecutively
2180
  // numbered from this value.
2181
  unsigned ShiftAmt = SVOp->getMaskElt(i);
2182
  if (ShiftAmt < i) return -1;
2183

2184
  ShiftAmt -= i;
2185
  bool isLE = DAG.getDataLayout().isLittleEndian();
2186

2187
  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2188
    // Check the rest of the elements to see if they are consecutive.
2189
    for (++i; i != 16; ++i)
2190
      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2191
        return -1;
2192
  } else if (ShuffleKind == 1) {
2193
    // Check the rest of the elements to see if they are consecutive.
2194
    for (++i; i != 16; ++i)
2195
      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2196
        return -1;
2197
  } else
2198
    return -1;
2199

2200
  if (isLE)
2201
    ShiftAmt = 16 - ShiftAmt;
2202

2203
  return ShiftAmt;
2204
}
2205

2206
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2207
/// specifies a splat of a single element that is suitable for input to
2208
/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2209
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2210
  EVT VT = N->getValueType(0);
2211
  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2212
    return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2213

2214
  assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2215
         EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2216

2217
  // The consecutive indices need to specify an element, not part of two
2218
  // different elements.  So abandon ship early if this isn't the case.
2219
  if (N->getMaskElt(0) % EltSize != 0)
2220
    return false;
2221

2222
  // This is a splat operation if each element of the permute is the same, and
2223
  // if the value doesn't reference the second vector.
2224
  unsigned ElementBase = N->getMaskElt(0);
2225

2226
  // FIXME: Handle UNDEF elements too!
2227
  if (ElementBase >= 16)
2228
    return false;
2229

2230
  // Check that the indices are consecutive, in the case of a multi-byte element
2231
  // splatted with a v16i8 mask.
2232
  for (unsigned i = 1; i != EltSize; ++i)
2233
    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2234
      return false;
2235

2236
  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2237
    if (N->getMaskElt(i) < 0) continue;
2238
    for (unsigned j = 0; j != EltSize; ++j)
2239
      if (N->getMaskElt(i+j) != N->getMaskElt(j))
2240
        return false;
2241
  }
2242
  return true;
2243
}
2244

2245
/// Check that the mask is shuffling N byte elements. Within each N byte
2246
/// element of the mask, the indices could be either in increasing or
2247
/// decreasing order as long as they are consecutive.
2248
/// \param[in] N the shuffle vector SD Node to analyze
2249
/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2250
/// Word/DoubleWord/QuadWord).
2251
/// \param[in] StepLen the delta indices number among the N byte element, if
2252
/// the mask is in increasing/decreasing order then it is 1/-1.
2253
/// \return true iff the mask is shuffling N byte elements.
2254
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2255
                                   int StepLen) {
2256
  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2257
         "Unexpected element width.");
2258
  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2259

2260
  unsigned NumOfElem = 16 / Width;
2261
  unsigned MaskVal[16]; //  Width is never greater than 16
2262
  for (unsigned i = 0; i < NumOfElem; ++i) {
2263
    MaskVal[0] = N->getMaskElt(i * Width);
2264
    if ((StepLen == 1) && (MaskVal[0] % Width)) {
2265
      return false;
2266
    } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2267
      return false;
2268
    }
2269

2270
    for (unsigned int j = 1; j < Width; ++j) {
2271
      MaskVal[j] = N->getMaskElt(i * Width + j);
2272
      if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2273
        return false;
2274
      }
2275
    }
2276
  }
2277

2278
  return true;
2279
}
2280

2281
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2282
                          unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2283
  if (!isNByteElemShuffleMask(N, 4, 1))
2284
    return false;
2285

2286
  // Now we look at mask elements 0,4,8,12
2287
  unsigned M0 = N->getMaskElt(0) / 4;
2288
  unsigned M1 = N->getMaskElt(4) / 4;
2289
  unsigned M2 = N->getMaskElt(8) / 4;
2290
  unsigned M3 = N->getMaskElt(12) / 4;
2291
  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2292
  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2293

2294
  // Below, let H and L be arbitrary elements of the shuffle mask
2295
  // where H is in the range [4,7] and L is in the range [0,3].
2296
  // H, 1, 2, 3 or L, 5, 6, 7
2297
  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2298
      (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2299
    ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2300
    InsertAtByte = IsLE ? 12 : 0;
2301
    Swap = M0 < 4;
2302
    return true;
2303
  }
2304
  // 0, H, 2, 3 or 4, L, 6, 7
2305
  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2306
      (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2307
    ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2308
    InsertAtByte = IsLE ? 8 : 4;
2309
    Swap = M1 < 4;
2310
    return true;
2311
  }
2312
  // 0, 1, H, 3 or 4, 5, L, 7
2313
  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2314
      (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2315
    ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2316
    InsertAtByte = IsLE ? 4 : 8;
2317
    Swap = M2 < 4;
2318
    return true;
2319
  }
2320
  // 0, 1, 2, H or 4, 5, 6, L
2321
  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2322
      (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2323
    ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2324
    InsertAtByte = IsLE ? 0 : 12;
2325
    Swap = M3 < 4;
2326
    return true;
2327
  }
2328

2329
  // If both vector operands for the shuffle are the same vector, the mask will
2330
  // contain only elements from the first one and the second one will be undef.
2331
  if (N->getOperand(1).isUndef()) {
2332
    ShiftElts = 0;
2333
    Swap = true;
2334
    unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2335
    if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2336
      InsertAtByte = IsLE ? 12 : 0;
2337
      return true;
2338
    }
2339
    if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2340
      InsertAtByte = IsLE ? 8 : 4;
2341
      return true;
2342
    }
2343
    if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2344
      InsertAtByte = IsLE ? 4 : 8;
2345
      return true;
2346
    }
2347
    if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2348
      InsertAtByte = IsLE ? 0 : 12;
2349
      return true;
2350
    }
2351
  }
2352

2353
  return false;
2354
}
2355

2356
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2357
                               bool &Swap, bool IsLE) {
2358
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2359
  // Ensure each byte index of the word is consecutive.
2360
  if (!isNByteElemShuffleMask(N, 4, 1))
2361
    return false;
2362

2363
  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2364
  unsigned M0 = N->getMaskElt(0) / 4;
2365
  unsigned M1 = N->getMaskElt(4) / 4;
2366
  unsigned M2 = N->getMaskElt(8) / 4;
2367
  unsigned M3 = N->getMaskElt(12) / 4;
2368

2369
  // If both vector operands for the shuffle are the same vector, the mask will
2370
  // contain only elements from the first one and the second one will be undef.
2371
  if (N->getOperand(1).isUndef()) {
2372
    assert(M0 < 4 && "Indexing into an undef vector?");
2373
    if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2374
      return false;
2375

2376
    ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2377
    Swap = false;
2378
    return true;
2379
  }
2380

2381
  // Ensure each word index of the ShuffleVector Mask is consecutive.
2382
  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2383
    return false;
2384

2385
  if (IsLE) {
2386
    if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2387
      // Input vectors don't need to be swapped if the leading element
2388
      // of the result is one of the 3 left elements of the second vector
2389
      // (or if there is no shift to be done at all).
2390
      Swap = false;
2391
      ShiftElts = (8 - M0) % 8;
2392
    } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2393
      // Input vectors need to be swapped if the leading element
2394
      // of the result is one of the 3 left elements of the first vector
2395
      // (or if we're shifting by 4 - thereby simply swapping the vectors).
2396
      Swap = true;
2397
      ShiftElts = (4 - M0) % 4;
2398
    }
2399

2400
    return true;
2401
  } else {                                          // BE
2402
    if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2403
      // Input vectors don't need to be swapped if the leading element
2404
      // of the result is one of the 4 elements of the first vector.
2405
      Swap = false;
2406
      ShiftElts = M0;
2407
    } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2408
      // Input vectors need to be swapped if the leading element
2409
      // of the result is one of the 4 elements of the right vector.
2410
      Swap = true;
2411
      ShiftElts = M0 - 4;
2412
    }
2413

2414
    return true;
2415
  }
2416
}
2417

2418
bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2419
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2420

2421
  if (!isNByteElemShuffleMask(N, Width, -1))
2422
    return false;
2423

2424
  for (int i = 0; i < 16; i += Width)
2425
    if (N->getMaskElt(i) != i + Width - 1)
2426
      return false;
2427

2428
  return true;
2429
}
2430

2431
bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2432
  return isXXBRShuffleMaskHelper(N, 2);
2433
}
2434

2435
bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2436
  return isXXBRShuffleMaskHelper(N, 4);
2437
}
2438

2439
bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2440
  return isXXBRShuffleMaskHelper(N, 8);
2441
}
2442

2443
bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2444
  return isXXBRShuffleMaskHelper(N, 16);
2445
}
2446

2447
/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2448
/// if the inputs to the instruction should be swapped and set \p DM to the
2449
/// value for the immediate.
2450
/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2451
/// AND element 0 of the result comes from the first input (LE) or second input
2452
/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2453
/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2454
/// mask.
2455
bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2456
                               bool &Swap, bool IsLE) {
2457
  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2458

2459
  // Ensure each byte index of the double word is consecutive.
2460
  if (!isNByteElemShuffleMask(N, 8, 1))
2461
    return false;
2462

2463
  unsigned M0 = N->getMaskElt(0) / 8;
2464
  unsigned M1 = N->getMaskElt(8) / 8;
2465
  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2466

2467
  // If both vector operands for the shuffle are the same vector, the mask will
2468
  // contain only elements from the first one and the second one will be undef.
2469
  if (N->getOperand(1).isUndef()) {
2470
    if ((M0 | M1) < 2) {
2471
      DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2472
      Swap = false;
2473
      return true;
2474
    } else
2475
      return false;
2476
  }
2477

2478
  if (IsLE) {
2479
    if (M0 > 1 && M1 < 2) {
2480
      Swap = false;
2481
    } else if (M0 < 2 && M1 > 1) {
2482
      M0 = (M0 + 2) % 4;
2483
      M1 = (M1 + 2) % 4;
2484
      Swap = true;
2485
    } else
2486
      return false;
2487

2488
    // Note: if control flow comes here that means Swap is already set above
2489
    DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2490
    return true;
2491
  } else { // BE
2492
    if (M0 < 2 && M1 > 1) {
2493
      Swap = false;
2494
    } else if (M0 > 1 && M1 < 2) {
2495
      M0 = (M0 + 2) % 4;
2496
      M1 = (M1 + 2) % 4;
2497
      Swap = true;
2498
    } else
2499
      return false;
2500

2501
    // Note: if control flow comes here that means Swap is already set above
2502
    DM = (M0 << 1) + (M1 & 1);
2503
    return true;
2504
  }
2505
}
2506

2507

2508
/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2509
/// appropriate for PPC mnemonics (which have a big endian bias - namely
2510
/// elements are counted from the left of the vector register).
2511
unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2512
                                         SelectionDAG &DAG) {
2513
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2514
  assert(isSplatShuffleMask(SVOp, EltSize));
2515
  EVT VT = SVOp->getValueType(0);
2516

2517
  if (VT == MVT::v2i64 || VT == MVT::v2f64)
2518
    return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2519
                                                : SVOp->getMaskElt(0);
2520

2521
  if (DAG.getDataLayout().isLittleEndian())
2522
    return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2523
  else
2524
    return SVOp->getMaskElt(0) / EltSize;
2525
}
2526

2527
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2528
/// by using a vspltis[bhw] instruction of the specified element size, return
2529
/// the constant being splatted.  The ByteSize field indicates the number of
2530
/// bytes of each element [124] -> [bhw].
2531
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2532
  SDValue OpVal;
2533

2534
  // If ByteSize of the splat is bigger than the element size of the
2535
  // build_vector, then we have a case where we are checking for a splat where
2536
  // multiple elements of the buildvector are folded together into a single
2537
  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2538
  unsigned EltSize = 16/N->getNumOperands();
2539
  if (EltSize < ByteSize) {
2540
    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2541
    SDValue UniquedVals[4];
2542
    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2543

2544
    // See if all of the elements in the buildvector agree across.
2545
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2546
      if (N->getOperand(i).isUndef()) continue;
2547
      // If the element isn't a constant, bail fully out.
2548
      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2549

2550
      if (!UniquedVals[i&(Multiple-1)].getNode())
2551
        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2552
      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2553
        return SDValue();  // no match.
2554
    }
2555

2556
    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2557
    // either constant or undef values that are identical for each chunk.  See
2558
    // if these chunks can form into a larger vspltis*.
2559

2560
    // Check to see if all of the leading entries are either 0 or -1.  If
2561
    // neither, then this won't fit into the immediate field.
2562
    bool LeadingZero = true;
2563
    bool LeadingOnes = true;
2564
    for (unsigned i = 0; i != Multiple-1; ++i) {
2565
      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2566

2567
      LeadingZero &= isNullConstant(UniquedVals[i]);
2568
      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2569
    }
2570
    // Finally, check the least significant entry.
2571
    if (LeadingZero) {
2572
      if (!UniquedVals[Multiple-1].getNode())
2573
        return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2574
      int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2575
      if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2576
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2577
    }
2578
    if (LeadingOnes) {
2579
      if (!UniquedVals[Multiple-1].getNode())
2580
        return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2581
      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2582
      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2583
        return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2584
    }
2585

2586
    return SDValue();
2587
  }
2588

2589
  // Check to see if this buildvec has a single non-undef value in its elements.
2590
  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2591
    if (N->getOperand(i).isUndef()) continue;
2592
    if (!OpVal.getNode())
2593
      OpVal = N->getOperand(i);
2594
    else if (OpVal != N->getOperand(i))
2595
      return SDValue();
2596
  }
2597

2598
  if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2599

2600
  unsigned ValSizeInBytes = EltSize;
2601
  uint64_t Value = 0;
2602
  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2603
    Value = CN->getZExtValue();
2604
  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2605
    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2606
    Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2607
  }
2608

2609
  // If the splat value is larger than the element value, then we can never do
2610
  // this splat.  The only case that we could fit the replicated bits into our
2611
  // immediate field for would be zero, and we prefer to use vxor for it.
2612
  if (ValSizeInBytes < ByteSize) return SDValue();
2613

2614
  // If the element value is larger than the splat value, check if it consists
2615
  // of a repeated bit pattern of size ByteSize.
2616
  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2617
    return SDValue();
2618

2619
  // Properly sign extend the value.
2620
  int MaskVal = SignExtend32(Value, ByteSize * 8);
2621

2622
  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2623
  if (MaskVal == 0) return SDValue();
2624

2625
  // Finally, if this value fits in a 5 bit sext field, return it
2626
  if (SignExtend32<5>(MaskVal) == MaskVal)
2627
    return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2628
  return SDValue();
2629
}
2630

2631
//===----------------------------------------------------------------------===//
2632
//  Addressing Mode Selection
2633
//===----------------------------------------------------------------------===//
2634

2635
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2636
/// or 64-bit immediate, and if the value can be accurately represented as a
2637
/// sign extension from a 16-bit value.  If so, this returns true and the
2638
/// immediate.
2639
bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2640
  if (!isa<ConstantSDNode>(N))
2641
    return false;
2642

2643
  Imm = (int16_t)N->getAsZExtVal();
2644
  if (N->getValueType(0) == MVT::i32)
2645
    return Imm == (int32_t)N->getAsZExtVal();
2646
  else
2647
    return Imm == (int64_t)N->getAsZExtVal();
2648
}
2649
bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2650
  return isIntS16Immediate(Op.getNode(), Imm);
2651
}
2652

2653
/// Used when computing address flags for selecting loads and stores.
2654
/// If we have an OR, check if the LHS and RHS are provably disjoint.
2655
/// An OR of two provably disjoint values is equivalent to an ADD.
2656
/// Most PPC load/store instructions compute the effective address as a sum,
2657
/// so doing this conversion is useful.
2658
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2659
  if (N.getOpcode() != ISD::OR)
2660
    return false;
2661
  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2662
  if (!LHSKnown.Zero.getBoolValue())
2663
    return false;
2664
  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2665
  return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2666
}
2667

2668
/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2669
/// be represented as an indexed [r+r] operation.
2670
bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2671
                                               SDValue &Index,
2672
                                               SelectionDAG &DAG) const {
2673
  for (SDNode *U : N->uses()) {
2674
    if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2675
      if (Memop->getMemoryVT() == MVT::f64) {
2676
          Base = N.getOperand(0);
2677
          Index = N.getOperand(1);
2678
          return true;
2679
      }
2680
    }
2681
  }
2682
  return false;
2683
}
2684

2685
/// isIntS34Immediate - This method tests if value of node given can be
2686
/// accurately represented as a sign extension from a 34-bit value.  If so,
2687
/// this returns true and the immediate.
2688
bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2689
  if (!isa<ConstantSDNode>(N))
2690
    return false;
2691

2692
  Imm = (int64_t)N->getAsZExtVal();
2693
  return isInt<34>(Imm);
2694
}
2695
bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2696
  return isIntS34Immediate(Op.getNode(), Imm);
2697
}
2698

2699
/// SelectAddressRegReg - Given the specified addressed, check to see if it
2700
/// can be represented as an indexed [r+r] operation.  Returns false if it
2701
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2702
/// non-zero and N can be represented by a base register plus a signed 16-bit
2703
/// displacement, make a more precise judgement by checking (displacement % \p
2704
/// EncodingAlignment).
2705
bool PPCTargetLowering::SelectAddressRegReg(
2706
    SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2707
    MaybeAlign EncodingAlignment) const {
2708
  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2709
  // a [pc+imm].
2710
  if (SelectAddressPCRel(N, Base))
2711
    return false;
2712

2713
  int16_t Imm = 0;
2714
  if (N.getOpcode() == ISD::ADD) {
2715
    // Is there any SPE load/store (f64), which can't handle 16bit offset?
2716
    // SPE load/store can only handle 8-bit offsets.
2717
    if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2718
        return true;
2719
    if (isIntS16Immediate(N.getOperand(1), Imm) &&
2720
        (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2721
      return false; // r+i
2722
    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2723
      return false;    // r+i
2724

2725
    Base = N.getOperand(0);
2726
    Index = N.getOperand(1);
2727
    return true;
2728
  } else if (N.getOpcode() == ISD::OR) {
2729
    if (isIntS16Immediate(N.getOperand(1), Imm) &&
2730
        (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2731
      return false; // r+i can fold it if we can.
2732

2733
    // If this is an or of disjoint bitfields, we can codegen this as an add
2734
    // (for better address arithmetic) if the LHS and RHS of the OR are provably
2735
    // disjoint.
2736
    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2737

2738
    if (LHSKnown.Zero.getBoolValue()) {
2739
      KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2740
      // If all of the bits are known zero on the LHS or RHS, the add won't
2741
      // carry.
2742
      if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2743
        Base = N.getOperand(0);
2744
        Index = N.getOperand(1);
2745
        return true;
2746
      }
2747
    }
2748
  }
2749

2750
  return false;
2751
}
2752

2753
// If we happen to be doing an i64 load or store into a stack slot that has
2754
// less than a 4-byte alignment, then the frame-index elimination may need to
2755
// use an indexed load or store instruction (because the offset may not be a
2756
// multiple of 4). The extra register needed to hold the offset comes from the
2757
// register scavenger, and it is possible that the scavenger will need to use
2758
// an emergency spill slot. As a result, we need to make sure that a spill slot
2759
// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2760
// stack slot.
2761
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2762
  // FIXME: This does not handle the LWA case.
2763
  if (VT != MVT::i64)
2764
    return;
2765

2766
  // NOTE: We'll exclude negative FIs here, which come from argument
2767
  // lowering, because there are no known test cases triggering this problem
2768
  // using packed structures (or similar). We can remove this exclusion if
2769
  // we find such a test case. The reason why this is so test-case driven is
2770
  // because this entire 'fixup' is only to prevent crashes (from the
2771
  // register scavenger) on not-really-valid inputs. For example, if we have:
2772
  //   %a = alloca i1
2773
  //   %b = bitcast i1* %a to i64*
2774
  //   store i64* a, i64 b
2775
  // then the store should really be marked as 'align 1', but is not. If it
2776
  // were marked as 'align 1' then the indexed form would have been
2777
  // instruction-selected initially, and the problem this 'fixup' is preventing
2778
  // won't happen regardless.
2779
  if (FrameIdx < 0)
2780
    return;
2781

2782
  MachineFunction &MF = DAG.getMachineFunction();
2783
  MachineFrameInfo &MFI = MF.getFrameInfo();
2784

2785
  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2786
    return;
2787

2788
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2789
  FuncInfo->setHasNonRISpills();
2790
}
2791

2792
/// Returns true if the address N can be represented by a base register plus
2793
/// a signed 16-bit displacement [r+imm], and if it is not better
2794
/// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2795
/// displacements that are multiples of that value.
2796
bool PPCTargetLowering::SelectAddressRegImm(
2797
    SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2798
    MaybeAlign EncodingAlignment) const {
2799
  // FIXME dl should come from parent load or store, not from address
2800
  SDLoc dl(N);
2801

2802
  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2803
  // a [pc+imm].
2804
  if (SelectAddressPCRel(N, Base))
2805
    return false;
2806

2807
  // If this can be more profitably realized as r+r, fail.
2808
  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2809
    return false;
2810

2811
  if (N.getOpcode() == ISD::ADD) {
2812
    int16_t imm = 0;
2813
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2814
        (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2815
      Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2816
      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2817
        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2818
        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2819
      } else {
2820
        Base = N.getOperand(0);
2821
      }
2822
      return true; // [r+i]
2823
    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2824
      // Match LOAD (ADD (X, Lo(G))).
2825
      assert(!N.getOperand(1).getConstantOperandVal(1) &&
2826
             "Cannot handle constant offsets yet!");
2827
      Disp = N.getOperand(1).getOperand(0);  // The global address.
2828
      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2829
             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2830
             Disp.getOpcode() == ISD::TargetConstantPool ||
2831
             Disp.getOpcode() == ISD::TargetJumpTable);
2832
      Base = N.getOperand(0);
2833
      return true;  // [&g+r]
2834
    }
2835
  } else if (N.getOpcode() == ISD::OR) {
2836
    int16_t imm = 0;
2837
    if (isIntS16Immediate(N.getOperand(1), imm) &&
2838
        (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2839
      // If this is an or of disjoint bitfields, we can codegen this as an add
2840
      // (for better address arithmetic) if the LHS and RHS of the OR are
2841
      // provably disjoint.
2842
      KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2843

2844
      if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2845
        // If all of the bits are known zero on the LHS or RHS, the add won't
2846
        // carry.
2847
        if (FrameIndexSDNode *FI =
2848
              dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2849
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2850
          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2851
        } else {
2852
          Base = N.getOperand(0);
2853
        }
2854
        Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2855
        return true;
2856
      }
2857
    }
2858
  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2859
    // Loading from a constant address.
2860

2861
    // If this address fits entirely in a 16-bit sext immediate field, codegen
2862
    // this as "d, 0"
2863
    int16_t Imm;
2864
    if (isIntS16Immediate(CN, Imm) &&
2865
        (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2866
      Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2867
      Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2868
                             CN->getValueType(0));
2869
      return true;
2870
    }
2871

2872
    // Handle 32-bit sext immediates with LIS + addr mode.
2873
    if ((CN->getValueType(0) == MVT::i32 ||
2874
         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2875
        (!EncodingAlignment ||
2876
         isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2877
      int Addr = (int)CN->getZExtValue();
2878

2879
      // Otherwise, break this down into an LIS + disp.
2880
      Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2881

2882
      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2883
                                   MVT::i32);
2884
      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2885
      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2886
      return true;
2887
    }
2888
  }
2889

2890
  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2891
  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2892
    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2893
    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2894
  } else
2895
    Base = N;
2896
  return true;      // [r+0]
2897
}
2898

2899
/// Similar to the 16-bit case but for instructions that take a 34-bit
2900
/// displacement field (prefixed loads/stores).
2901
bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2902
                                              SDValue &Base,
2903
                                              SelectionDAG &DAG) const {
2904
  // Only on 64-bit targets.
2905
  if (N.getValueType() != MVT::i64)
2906
    return false;
2907

2908
  SDLoc dl(N);
2909
  int64_t Imm = 0;
2910

2911
  if (N.getOpcode() == ISD::ADD) {
2912
    if (!isIntS34Immediate(N.getOperand(1), Imm))
2913
      return false;
2914
    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2915
    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2916
      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2917
    else
2918
      Base = N.getOperand(0);
2919
    return true;
2920
  }
2921

2922
  if (N.getOpcode() == ISD::OR) {
2923
    if (!isIntS34Immediate(N.getOperand(1), Imm))
2924
      return false;
2925
    // If this is an or of disjoint bitfields, we can codegen this as an add
2926
    // (for better address arithmetic) if the LHS and RHS of the OR are
2927
    // provably disjoint.
2928
    KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2929
    if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2930
      return false;
2931
    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2932
      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2933
    else
2934
      Base = N.getOperand(0);
2935
    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2936
    return true;
2937
  }
2938

2939
  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2940
    Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2941
    Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2942
    return true;
2943
  }
2944

2945
  return false;
2946
}
2947

2948
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2949
/// represented as an indexed [r+r] operation.
2950
bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2951
                                                SDValue &Index,
2952
                                                SelectionDAG &DAG) const {
2953
  // Check to see if we can easily represent this as an [r+r] address.  This
2954
  // will fail if it thinks that the address is more profitably represented as
2955
  // reg+imm, e.g. where imm = 0.
2956
  if (SelectAddressRegReg(N, Base, Index, DAG))
2957
    return true;
2958

2959
  // If the address is the result of an add, we will utilize the fact that the
2960
  // address calculation includes an implicit add.  However, we can reduce
2961
  // register pressure if we do not materialize a constant just for use as the
2962
  // index register.  We only get rid of the add if it is not an add of a
2963
  // value and a 16-bit signed constant and both have a single use.
2964
  int16_t imm = 0;
2965
  if (N.getOpcode() == ISD::ADD &&
2966
      (!isIntS16Immediate(N.getOperand(1), imm) ||
2967
       !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2968
    Base = N.getOperand(0);
2969
    Index = N.getOperand(1);
2970
    return true;
2971
  }
2972

2973
  // Otherwise, do it the hard way, using R0 as the base register.
2974
  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2975
                         N.getValueType());
2976
  Index = N;
2977
  return true;
2978
}
2979

2980
template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2981
  Ty *PCRelCand = dyn_cast<Ty>(N);
2982
  return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2983
}
2984

2985
/// Returns true if this address is a PC Relative address.
2986
/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2987
/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2988
bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2989
  // This is a materialize PC Relative node. Always select this as PC Relative.
2990
  Base = N;
2991
  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2992
    return true;
2993
  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2994
      isValidPCRelNode<GlobalAddressSDNode>(N) ||
2995
      isValidPCRelNode<JumpTableSDNode>(N) ||
2996
      isValidPCRelNode<BlockAddressSDNode>(N))
2997
    return true;
2998
  return false;
2999
}
3000

3001
/// Returns true if we should use a direct load into vector instruction
3002
/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3003
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3004

3005
  // If there are any other uses other than scalar to vector, then we should
3006
  // keep it as a scalar load -> direct move pattern to prevent multiple
3007
  // loads.
3008
  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3009
  if (!LD)
3010
    return false;
3011

3012
  EVT MemVT = LD->getMemoryVT();
3013
  if (!MemVT.isSimple())
3014
    return false;
3015
  switch(MemVT.getSimpleVT().SimpleTy) {
3016
  case MVT::i64:
3017
    break;
3018
  case MVT::i32:
3019
    if (!ST.hasP8Vector())
3020
      return false;
3021
    break;
3022
  case MVT::i16:
3023
  case MVT::i8:
3024
    if (!ST.hasP9Vector())
3025
      return false;
3026
    break;
3027
  default:
3028
    return false;
3029
  }
3030

3031
  SDValue LoadedVal(N, 0);
3032
  if (!LoadedVal.hasOneUse())
3033
    return false;
3034

3035
  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3036
       UI != UE; ++UI)
3037
    if (UI.getUse().get().getResNo() == 0 &&
3038
        UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3039
        UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3040
      return false;
3041

3042
  return true;
3043
}
3044

3045
/// getPreIndexedAddressParts - returns true by value, base pointer and
3046
/// offset pointer and addressing mode by reference if the node's address
3047
/// can be legally represented as pre-indexed load / store address.
3048
bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3049
                                                  SDValue &Offset,
3050
                                                  ISD::MemIndexedMode &AM,
3051
                                                  SelectionDAG &DAG) const {
3052
  if (DisablePPCPreinc) return false;
3053

3054
  bool isLoad = true;
3055
  SDValue Ptr;
3056
  EVT VT;
3057
  Align Alignment;
3058
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3059
    Ptr = LD->getBasePtr();
3060
    VT = LD->getMemoryVT();
3061
    Alignment = LD->getAlign();
3062
  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3063
    Ptr = ST->getBasePtr();
3064
    VT  = ST->getMemoryVT();
3065
    Alignment = ST->getAlign();
3066
    isLoad = false;
3067
  } else
3068
    return false;
3069

3070
  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3071
  // instructions because we can fold these into a more efficient instruction
3072
  // instead, (such as LXSD).
3073
  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3074
    return false;
3075
  }
3076

3077
  // PowerPC doesn't have preinc load/store instructions for vectors
3078
  if (VT.isVector())
3079
    return false;
3080

3081
  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3082
    // Common code will reject creating a pre-inc form if the base pointer
3083
    // is a frame index, or if N is a store and the base pointer is either
3084
    // the same as or a predecessor of the value being stored.  Check for
3085
    // those situations here, and try with swapped Base/Offset instead.
3086
    bool Swap = false;
3087

3088
    if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3089
      Swap = true;
3090
    else if (!isLoad) {
3091
      SDValue Val = cast<StoreSDNode>(N)->getValue();
3092
      if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3093
        Swap = true;
3094
    }
3095

3096
    if (Swap)
3097
      std::swap(Base, Offset);
3098

3099
    AM = ISD::PRE_INC;
3100
    return true;
3101
  }
3102

3103
  // LDU/STU can only handle immediates that are a multiple of 4.
3104
  if (VT != MVT::i64) {
3105
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3106
      return false;
3107
  } else {
3108
    // LDU/STU need an address with at least 4-byte alignment.
3109
    if (Alignment < Align(4))
3110
      return false;
3111

3112
    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3113
      return false;
3114
  }
3115

3116
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3117
    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
3118
    // sext i32 to i64 when addr mode is r+i.
3119
    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3120
        LD->getExtensionType() == ISD::SEXTLOAD &&
3121
        isa<ConstantSDNode>(Offset))
3122
      return false;
3123
  }
3124

3125
  AM = ISD::PRE_INC;
3126
  return true;
3127
}
3128

3129
//===----------------------------------------------------------------------===//
3130
//  LowerOperation implementation
3131
//===----------------------------------------------------------------------===//
3132

3133
/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3134
/// and LoOpFlags to the target MO flags.
3135
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3136
                               unsigned &HiOpFlags, unsigned &LoOpFlags,
3137
                               const GlobalValue *GV = nullptr) {
3138
  HiOpFlags = PPCII::MO_HA;
3139
  LoOpFlags = PPCII::MO_LO;
3140

3141
  // Don't use the pic base if not in PIC relocation model.
3142
  if (IsPIC) {
3143
    HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3144
    LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3145
  }
3146
}
3147

3148
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3149
                             SelectionDAG &DAG) {
3150
  SDLoc DL(HiPart);
3151
  EVT PtrVT = HiPart.getValueType();
3152
  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3153

3154
  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3155
  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3156

3157
  // With PIC, the first instruction is actually "GR+hi(&G)".
3158
  if (isPIC)
3159
    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3160
                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3161

3162
  // Generate non-pic code that has direct accesses to the constant pool.
3163
  // The address of the global is just (hi(&g)+lo(&g)).
3164
  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3165
}
3166

3167
static void setUsesTOCBasePtr(MachineFunction &MF) {
3168
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3169
  FuncInfo->setUsesTOCBasePtr();
3170
}
3171

3172
static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3173
  setUsesTOCBasePtr(DAG.getMachineFunction());
3174
}
3175

3176
SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3177
                                       SDValue GA) const {
3178
  const bool Is64Bit = Subtarget.isPPC64();
3179
  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3180
  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3181
                        : Subtarget.isAIXABI()
3182
                              ? DAG.getRegister(PPC::R2, VT)
3183
                              : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3184
  SDValue Ops[] = { GA, Reg };
3185
  return DAG.getMemIntrinsicNode(
3186
      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3187
      MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3188
      MachineMemOperand::MOLoad);
3189
}
3190

3191
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3192
                                             SelectionDAG &DAG) const {
3193
  EVT PtrVT = Op.getValueType();
3194
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3195
  const Constant *C = CP->getConstVal();
3196

3197
  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3198
  // The actual address of the GlobalValue is stored in the TOC.
3199
  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3200
    if (Subtarget.isUsingPCRelativeCalls()) {
3201
      SDLoc DL(CP);
3202
      EVT Ty = getPointerTy(DAG.getDataLayout());
3203
      SDValue ConstPool = DAG.getTargetConstantPool(
3204
          C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3205
      return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3206
    }
3207
    setUsesTOCBasePtr(DAG);
3208
    SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3209
    return getTOCEntry(DAG, SDLoc(CP), GA);
3210
  }
3211

3212
  unsigned MOHiFlag, MOLoFlag;
3213
  bool IsPIC = isPositionIndependent();
3214
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3215

3216
  if (IsPIC && Subtarget.isSVR4ABI()) {
3217
    SDValue GA =
3218
        DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3219
    return getTOCEntry(DAG, SDLoc(CP), GA);
3220
  }
3221

3222
  SDValue CPIHi =
3223
      DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3224
  SDValue CPILo =
3225
      DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3226
  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3227
}
3228

3229
// For 64-bit PowerPC, prefer the more compact relative encodings.
3230
// This trades 32 bits per jump table entry for one or two instructions
3231
// on the jump site.
3232
unsigned PPCTargetLowering::getJumpTableEncoding() const {
3233
  if (isJumpTableRelative())
3234
    return MachineJumpTableInfo::EK_LabelDifference32;
3235

3236
  return TargetLowering::getJumpTableEncoding();
3237
}
3238

3239
bool PPCTargetLowering::isJumpTableRelative() const {
3240
  if (UseAbsoluteJumpTables)
3241
    return false;
3242
  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3243
    return true;
3244
  return TargetLowering::isJumpTableRelative();
3245
}
3246

3247
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3248
                                                    SelectionDAG &DAG) const {
3249
  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3250
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3251

3252
  switch (getTargetMachine().getCodeModel()) {
3253
  case CodeModel::Small:
3254
  case CodeModel::Medium:
3255
    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3256
  default:
3257
    return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3258
                       getPointerTy(DAG.getDataLayout()));
3259
  }
3260
}
3261

3262
const MCExpr *
3263
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3264
                                                unsigned JTI,
3265
                                                MCContext &Ctx) const {
3266
  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3267
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3268

3269
  switch (getTargetMachine().getCodeModel()) {
3270
  case CodeModel::Small:
3271
  case CodeModel::Medium:
3272
    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3273
  default:
3274
    return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3275
  }
3276
}
3277

3278
SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3279
  EVT PtrVT = Op.getValueType();
3280
  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3281

3282
  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3283
  if (Subtarget.isUsingPCRelativeCalls()) {
3284
    SDLoc DL(JT);
3285
    EVT Ty = getPointerTy(DAG.getDataLayout());
3286
    SDValue GA =
3287
        DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3288
    SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3289
    return MatAddr;
3290
  }
3291

3292
  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3293
  // The actual address of the GlobalValue is stored in the TOC.
3294
  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3295
    setUsesTOCBasePtr(DAG);
3296
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3297
    return getTOCEntry(DAG, SDLoc(JT), GA);
3298
  }
3299

3300
  unsigned MOHiFlag, MOLoFlag;
3301
  bool IsPIC = isPositionIndependent();
3302
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3303

3304
  if (IsPIC && Subtarget.isSVR4ABI()) {
3305
    SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3306
                                        PPCII::MO_PIC_FLAG);
3307
    return getTOCEntry(DAG, SDLoc(GA), GA);
3308
  }
3309

3310
  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3311
  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3312
  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3313
}
3314

3315
SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3316
                                             SelectionDAG &DAG) const {
3317
  EVT PtrVT = Op.getValueType();
3318
  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3319
  const BlockAddress *BA = BASDN->getBlockAddress();
3320

3321
  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3322
  if (Subtarget.isUsingPCRelativeCalls()) {
3323
    SDLoc DL(BASDN);
3324
    EVT Ty = getPointerTy(DAG.getDataLayout());
3325
    SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3326
                                           PPCII::MO_PCREL_FLAG);
3327
    SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3328
    return MatAddr;
3329
  }
3330

3331
  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3332
  // The actual BlockAddress is stored in the TOC.
3333
  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3334
    setUsesTOCBasePtr(DAG);
3335
    SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3336
    return getTOCEntry(DAG, SDLoc(BASDN), GA);
3337
  }
3338

3339
  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3340
  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3341
    return getTOCEntry(
3342
        DAG, SDLoc(BASDN),
3343
        DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3344

3345
  unsigned MOHiFlag, MOLoFlag;
3346
  bool IsPIC = isPositionIndependent();
3347
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3348
  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3349
  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3350
  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3351
}
3352

3353
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3354
                                              SelectionDAG &DAG) const {
3355
  if (Subtarget.isAIXABI())
3356
    return LowerGlobalTLSAddressAIX(Op, DAG);
3357

3358
  return LowerGlobalTLSAddressLinux(Op, DAG);
3359
}
3360

3361
/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3362
/// and then apply the update.
3363
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3364
                                         SelectionDAG &DAG,
3365
                                         const TargetMachine &TM) {
3366
  // Initialize TLS model opt setting lazily:
3367
  // (1) Use initial-exec for single TLS var references within current function.
3368
  // (2) Use local-dynamic for multiple TLS var references within current
3369
  // function.
3370
  PPCFunctionInfo *FuncInfo =
3371
      DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3372
  if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3373
    SmallPtrSet<const GlobalValue *, 8> TLSGV;
3374
    // Iterate over all instructions within current function, collect all TLS
3375
    // global variables (global variables taken as the first parameter to
3376
    // Intrinsic::threadlocal_address).
3377
    const Function &Func = DAG.getMachineFunction().getFunction();
3378
    for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3379
         ++BI)
3380
      for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3381
           II != IE; ++II)
3382
        if (II->getOpcode() == Instruction::Call)
3383
          if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3384
            if (Function *CF = CI->getCalledFunction())
3385
              if (CF->isDeclaration() &&
3386
                  CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3387
                if (const GlobalValue *GV =
3388
                        dyn_cast<GlobalValue>(II->getOperand(0))) {
3389
                  TLSModel::Model GVModel = TM.getTLSModel(GV);
3390
                  if (GVModel == TLSModel::LocalDynamic)
3391
                    TLSGV.insert(GV);
3392
                }
3393

3394
    unsigned TLSGVCnt = TLSGV.size();
3395
    LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3396
    if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3397
      FuncInfo->setAIXFuncUseTLSIEForLD();
3398
    FuncInfo->setAIXFuncTLSModelOptInitDone();
3399
  }
3400

3401
  if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3402
    LLVM_DEBUG(
3403
        dbgs() << DAG.getMachineFunction().getName()
3404
               << " function is using the TLS-IE model for TLS-LD access.\n");
3405
    Model = TLSModel::InitialExec;
3406
  }
3407
}
3408

3409
SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3410
                                                    SelectionDAG &DAG) const {
3411
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3412

3413
  if (DAG.getTarget().useEmulatedTLS())
3414
    report_fatal_error("Emulated TLS is not yet supported on AIX");
3415

3416
  SDLoc dl(GA);
3417
  const GlobalValue *GV = GA->getGlobal();
3418
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3419
  bool Is64Bit = Subtarget.isPPC64();
3420
  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3421

3422
  // Apply update to the TLS model.
3423
  if (Subtarget.hasAIXShLibTLSModelOpt())
3424
    updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());
3425

3426
  bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3427

3428
  if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3429
    bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3430
    bool HasAIXSmallTLSGlobalAttr = false;
3431
    SDValue VariableOffsetTGA =
3432
        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3433
    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3434
    SDValue TLSReg;
3435

3436
    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3437
      if (GVar->hasAttribute("aix-small-tls"))
3438
        HasAIXSmallTLSGlobalAttr = true;
3439

3440
    if (Is64Bit) {
3441
      // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3442
      // involves a load of the variable offset (from the TOC), followed by an
3443
      // add of the loaded variable offset to R13 (the thread pointer).
3444
      // This code sequence looks like:
3445
      //    ld reg1,var[TC](2)
3446
      //    add reg2, reg1, r13     // r13 contains the thread pointer
3447
      TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3448

3449
      // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3450
      // global variable attribute, produce a faster access sequence for
3451
      // local-exec TLS variables where the offset from the TLS base is encoded
3452
      // as an immediate operand.
3453
      //
3454
      // We only utilize the faster local-exec access sequence when the TLS
3455
      // variable has a size within the policy limit. We treat types that are
3456
      // not sized or are empty as being over the policy size limit.
3457
      if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3458
          IsTLSLocalExecModel) {
3459
        Type *GVType = GV->getValueType();
3460
        if (GVType->isSized() && !GVType->isEmptyTy() &&
3461
            GV->getDataLayout().getTypeAllocSize(GVType) <=
3462
                AIXSmallTlsPolicySizeLimit)
3463
          return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3464
      }
3465
    } else {
3466
      // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3467
      // involves loading the variable offset from the TOC, generating a call to
3468
      // .__get_tpointer to get the thread pointer (which will be in R3), and
3469
      // adding the two together:
3470
      //    lwz reg1,var[TC](2)
3471
      //    bla .__get_tpointer
3472
      //    add reg2, reg1, r3
3473
      TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3474

3475
      // We do not implement the 32-bit version of the faster access sequence
3476
      // for local-exec that is controlled by the -maix-small-local-exec-tls
3477
      // option, or the "aix-small-tls" global variable attribute.
3478
      if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3479
        report_fatal_error("The small-local-exec TLS access sequence is "
3480
                           "currently only supported on AIX (64-bit mode).");
3481
    }
3482
    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3483
  }
3484

3485
  if (Model == TLSModel::LocalDynamic) {
3486
    bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3487

3488
    // We do not implement the 32-bit version of the faster access sequence
3489
    // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3490
    if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3491
      report_fatal_error("The small-local-dynamic TLS access sequence is "
3492
                         "currently only supported on AIX (64-bit mode).");
3493

3494
    // For local-dynamic on AIX, we need to generate one TOC entry for each
3495
    // variable offset, and a single module-handle TOC entry for the entire
3496
    // file.
3497

3498
    SDValue VariableOffsetTGA =
3499
        DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3500
    SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3501

3502
    Module *M = DAG.getMachineFunction().getFunction().getParent();
3503
    GlobalVariable *TLSGV =
3504
        dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3505
            StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3506
    TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3507
    assert(TLSGV && "Not able to create GV for _$TLSML.");
3508
    SDValue ModuleHandleTGA =
3509
        DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3510
    SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3511
    SDValue ModuleHandle =
3512
        DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3513

3514
    // With the -maix-small-local-dynamic-tls option, produce a faster access
3515
    // sequence for local-dynamic TLS variables where the offset from the
3516
    // module-handle is encoded as an immediate operand.
3517
    //
3518
    // We only utilize the faster local-dynamic access sequence when the TLS
3519
    // variable has a size within the policy limit. We treat types that are
3520
    // not sized or are empty as being over the policy size limit.
3521
    if (HasAIXSmallLocalDynamicTLS) {
3522
      Type *GVType = GV->getValueType();
3523
      if (GVType->isSized() && !GVType->isEmptyTy() &&
3524
          GV->getDataLayout().getTypeAllocSize(GVType) <=
3525
              AIXSmallTlsPolicySizeLimit)
3526
        return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3527
                           ModuleHandle);
3528
    }
3529

3530
    return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3531
  }
3532

3533
  // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3534
  // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3535
  // need to generate two TOC entries, one for the variable offset, one for the
3536
  // region handle. The global address for the TOC entry of the region handle is
3537
  // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3538
  // entry of the variable offset is created with MO_TLSGD_FLAG.
3539
  SDValue VariableOffsetTGA =
3540
      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3541
  SDValue RegionHandleTGA =
3542
      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3543
  SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3544
  SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3545
  return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3546
                     RegionHandle);
3547
}
3548

3549
SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3550
                                                      SelectionDAG &DAG) const {
3551
  // FIXME: TLS addresses currently use medium model code sequences,
3552
  // which is the most useful form.  Eventually support for small and
3553
  // large models could be added if users need it, at the cost of
3554
  // additional complexity.
3555
  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3556
  if (DAG.getTarget().useEmulatedTLS())
3557
    return LowerToTLSEmulatedModel(GA, DAG);
3558

3559
  SDLoc dl(GA);
3560
  const GlobalValue *GV = GA->getGlobal();
3561
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3562
  bool is64bit = Subtarget.isPPC64();
3563
  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3564
  PICLevel::Level picLevel = M->getPICLevel();
3565

3566
  const TargetMachine &TM = getTargetMachine();
3567
  TLSModel::Model Model = TM.getTLSModel(GV);
3568

3569
  if (Model == TLSModel::LocalExec) {
3570
    if (Subtarget.isUsingPCRelativeCalls()) {
3571
      SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3572
      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3573
                                               PPCII::MO_TPREL_PCREL_FLAG);
3574
      SDValue MatAddr =
3575
          DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3576
      return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3577
    }
3578

3579
    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3580
                                               PPCII::MO_TPREL_HA);
3581
    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3582
                                               PPCII::MO_TPREL_LO);
3583
    SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3584
                             : DAG.getRegister(PPC::R2, MVT::i32);
3585

3586
    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3587
    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3588
  }
3589

3590
  if (Model == TLSModel::InitialExec) {
3591
    bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3592
    SDValue TGA = DAG.getTargetGlobalAddress(
3593
        GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3594
    SDValue TGATLS = DAG.getTargetGlobalAddress(
3595
        GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3596
    SDValue TPOffset;
3597
    if (IsPCRel) {
3598
      SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3599
      TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3600
                             MachinePointerInfo());
3601
    } else {
3602
      SDValue GOTPtr;
3603
      if (is64bit) {
3604
        setUsesTOCBasePtr(DAG);
3605
        SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3606
        GOTPtr =
3607
            DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3608
      } else {
3609
        if (!TM.isPositionIndependent())
3610
          GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3611
        else if (picLevel == PICLevel::SmallPIC)
3612
          GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3613
        else
3614
          GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3615
      }
3616
      TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3617
    }
3618
    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3619
  }
3620

3621
  if (Model == TLSModel::GeneralDynamic) {
3622
    if (Subtarget.isUsingPCRelativeCalls()) {
3623
      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3624
                                               PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3625
      return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3626
    }
3627

3628
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3629
    SDValue GOTPtr;
3630
    if (is64bit) {
3631
      setUsesTOCBasePtr(DAG);
3632
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3633
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3634
                                   GOTReg, TGA);
3635
    } else {
3636
      if (picLevel == PICLevel::SmallPIC)
3637
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3638
      else
3639
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3640
    }
3641
    return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3642
                       GOTPtr, TGA, TGA);
3643
  }
3644

3645
  if (Model == TLSModel::LocalDynamic) {
3646
    if (Subtarget.isUsingPCRelativeCalls()) {
3647
      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3648
                                               PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3649
      SDValue MatPCRel =
3650
          DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3651
      return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3652
    }
3653

3654
    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3655
    SDValue GOTPtr;
3656
    if (is64bit) {
3657
      setUsesTOCBasePtr(DAG);
3658
      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3659
      GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3660
                           GOTReg, TGA);
3661
    } else {
3662
      if (picLevel == PICLevel::SmallPIC)
3663
        GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3664
      else
3665
        GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3666
    }
3667
    SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3668
                                  PtrVT, GOTPtr, TGA, TGA);
3669
    SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3670
                                      PtrVT, TLSAddr, TGA);
3671
    return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3672
  }
3673

3674
  llvm_unreachable("Unknown TLS model!");
3675
}
3676

3677
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3678
                                              SelectionDAG &DAG) const {
3679
  EVT PtrVT = Op.getValueType();
3680
  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3681
  SDLoc DL(GSDN);
3682
  const GlobalValue *GV = GSDN->getGlobal();
3683

3684
  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3685
  // The actual address of the GlobalValue is stored in the TOC.
3686
  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3687
    if (Subtarget.isUsingPCRelativeCalls()) {
3688
      EVT Ty = getPointerTy(DAG.getDataLayout());
3689
      if (isAccessedAsGotIndirect(Op)) {
3690
        SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3691
                                                PPCII::MO_GOT_PCREL_FLAG);
3692
        SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3693
        SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3694
                                   MachinePointerInfo());
3695
        return Load;
3696
      } else {
3697
        SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3698
                                                PPCII::MO_PCREL_FLAG);
3699
        return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3700
      }
3701
    }
3702
    setUsesTOCBasePtr(DAG);
3703
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3704
    return getTOCEntry(DAG, DL, GA);
3705
  }
3706

3707
  unsigned MOHiFlag, MOLoFlag;
3708
  bool IsPIC = isPositionIndependent();
3709
  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3710

3711
  if (IsPIC && Subtarget.isSVR4ABI()) {
3712
    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3713
                                            GSDN->getOffset(),
3714
                                            PPCII::MO_PIC_FLAG);
3715
    return getTOCEntry(DAG, DL, GA);
3716
  }
3717

3718
  SDValue GAHi =
3719
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3720
  SDValue GALo =
3721
    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3722

3723
  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3724
}
3725

3726
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3727
  bool IsStrict = Op->isStrictFPOpcode();
3728
  ISD::CondCode CC =
3729
      cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3730
  SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3731
  SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3732
  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3733
  EVT LHSVT = LHS.getValueType();
3734
  SDLoc dl(Op);
3735

3736
  // Soften the setcc with libcall if it is fp128.
3737
  if (LHSVT == MVT::f128) {
3738
    assert(!Subtarget.hasP9Vector() &&
3739
           "SETCC for f128 is already legal under Power9!");
3740
    softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3741
                        Op->getOpcode() == ISD::STRICT_FSETCCS);
3742
    if (RHS.getNode())
3743
      LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3744
                        DAG.getCondCode(CC));
3745
    if (IsStrict)
3746
      return DAG.getMergeValues({LHS, Chain}, dl);
3747
    return LHS;
3748
  }
3749

3750
  assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3751

3752
  if (Op.getValueType() == MVT::v2i64) {
3753
    // When the operands themselves are v2i64 values, we need to do something
3754
    // special because VSX has no underlying comparison operations for these.
3755
    if (LHS.getValueType() == MVT::v2i64) {
3756
      // Equality can be handled by casting to the legal type for Altivec
3757
      // comparisons, everything else needs to be expanded.
3758
      if (CC != ISD::SETEQ && CC != ISD::SETNE)
3759
        return SDValue();
3760
      SDValue SetCC32 = DAG.getSetCC(
3761
          dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3762
          DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3763
      int ShuffV[] = {1, 0, 3, 2};
3764
      SDValue Shuff =
3765
          DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3766
      return DAG.getBitcast(MVT::v2i64,
3767
                            DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3768
                                        dl, MVT::v4i32, Shuff, SetCC32));
3769
    }
3770

3771
    // We handle most of these in the usual way.
3772
    return Op;
3773
  }
3774

3775
  // If we're comparing for equality to zero, expose the fact that this is
3776
  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3777
  // fold the new nodes.
3778
  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3779
    return V;
3780

3781
  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3782
    // Leave comparisons against 0 and -1 alone for now, since they're usually
3783
    // optimized.  FIXME: revisit this when we can custom lower all setcc
3784
    // optimizations.
3785
    if (C->isAllOnes() || C->isZero())
3786
      return SDValue();
3787
  }
3788

3789
  // If we have an integer seteq/setne, turn it into a compare against zero
3790
  // by xor'ing the rhs with the lhs, which is faster than setting a
3791
  // condition register, reading it back out, and masking the correct bit.  The
3792
  // normal approach here uses sub to do this instead of xor.  Using xor exposes
3793
  // the result to other bit-twiddling opportunities.
3794
  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3795
    EVT VT = Op.getValueType();
3796
    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3797
    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3798
  }
3799
  return SDValue();
3800
}
3801

3802
SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3803
  SDNode *Node = Op.getNode();
3804
  EVT VT = Node->getValueType(0);
3805
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3806
  SDValue InChain = Node->getOperand(0);
3807
  SDValue VAListPtr = Node->getOperand(1);
3808
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3809
  SDLoc dl(Node);
3810

3811
  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3812

3813
  // gpr_index
3814
  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3815
                                    VAListPtr, MachinePointerInfo(SV), MVT::i8);
3816
  InChain = GprIndex.getValue(1);
3817

3818
  if (VT == MVT::i64) {
3819
    // Check if GprIndex is even
3820
    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3821
                                 DAG.getConstant(1, dl, MVT::i32));
3822
    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3823
                                DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3824
    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3825
                                          DAG.getConstant(1, dl, MVT::i32));
3826
    // Align GprIndex to be even if it isn't
3827
    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3828
                           GprIndex);
3829
  }
3830

3831
  // fpr index is 1 byte after gpr
3832
  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3833
                               DAG.getConstant(1, dl, MVT::i32));
3834

3835
  // fpr
3836
  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3837
                                    FprPtr, MachinePointerInfo(SV), MVT::i8);
3838
  InChain = FprIndex.getValue(1);
3839

3840
  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3841
                                       DAG.getConstant(8, dl, MVT::i32));
3842

3843
  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3844
                                        DAG.getConstant(4, dl, MVT::i32));
3845

3846
  // areas
3847
  SDValue OverflowArea =
3848
      DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3849
  InChain = OverflowArea.getValue(1);
3850

3851
  SDValue RegSaveArea =
3852
      DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3853
  InChain = RegSaveArea.getValue(1);
3854

3855
  // select overflow_area if index > 8
3856
  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3857
                            DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3858

3859
  // adjustment constant gpr_index * 4/8
3860
  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3861
                                    VT.isInteger() ? GprIndex : FprIndex,
3862
                                    DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3863
                                                    MVT::i32));
3864

3865
  // OurReg = RegSaveArea + RegConstant
3866
  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3867
                               RegConstant);
3868

3869
  // Floating types are 32 bytes into RegSaveArea
3870
  if (VT.isFloatingPoint())
3871
    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3872
                         DAG.getConstant(32, dl, MVT::i32));
3873

3874
  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3875
  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3876
                                   VT.isInteger() ? GprIndex : FprIndex,
3877
                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3878
                                                   MVT::i32));
3879

3880
  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3881
                              VT.isInteger() ? VAListPtr : FprPtr,
3882
                              MachinePointerInfo(SV), MVT::i8);
3883

3884
  // determine if we should load from reg_save_area or overflow_area
3885
  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3886

3887
  // increase overflow_area by 4/8 if gpr/fpr > 8
3888
  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3889
                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
3890
                                          dl, MVT::i32));
3891

3892
  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3893
                             OverflowAreaPlusN);
3894

3895
  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3896
                              MachinePointerInfo(), MVT::i32);
3897

3898
  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3899
}
3900

3901
SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3902
  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3903

3904
  // We have to copy the entire va_list struct:
3905
  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3906
  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3907
                       DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3908
                       false, true, /*CI=*/nullptr, std::nullopt,
3909
                       MachinePointerInfo(), MachinePointerInfo());
3910
}
3911

3912
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3913
                                                  SelectionDAG &DAG) const {
3914
  if (Subtarget.isAIXABI())
3915
    report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3916

3917
  return Op.getOperand(0);
3918
}
3919

3920
SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3921
  MachineFunction &MF = DAG.getMachineFunction();
3922
  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3923

3924
  assert((Op.getOpcode() == ISD::INLINEASM ||
3925
          Op.getOpcode() == ISD::INLINEASM_BR) &&
3926
         "Expecting Inline ASM node.");
3927

3928
  // If an LR store is already known to be required then there is not point in
3929
  // checking this ASM as well.
3930
  if (MFI.isLRStoreRequired())
3931
    return Op;
3932

3933
  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3934
  // type MVT::Glue. We want to ignore this last operand if that is the case.
3935
  unsigned NumOps = Op.getNumOperands();
3936
  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3937
    --NumOps;
3938

3939
  // Check all operands that may contain the LR.
3940
  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3941
    const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3942
    unsigned NumVals = Flags.getNumOperandRegisters();
3943
    ++i; // Skip the ID value.
3944

3945
    switch (Flags.getKind()) {
3946
    default:
3947
      llvm_unreachable("Bad flags!");
3948
    case InlineAsm::Kind::RegUse:
3949
    case InlineAsm::Kind::Imm:
3950
    case InlineAsm::Kind::Mem:
3951
      i += NumVals;
3952
      break;
3953
    case InlineAsm::Kind::Clobber:
3954
    case InlineAsm::Kind::RegDef:
3955
    case InlineAsm::Kind::RegDefEarlyClobber: {
3956
      for (; NumVals; --NumVals, ++i) {
3957
        Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3958
        if (Reg != PPC::LR && Reg != PPC::LR8)
3959
          continue;
3960
        MFI.setLRStoreRequired();
3961
        return Op;
3962
      }
3963
      break;
3964
    }
3965
    }
3966
  }
3967

3968
  return Op;
3969
}
3970

3971
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3972
                                                SelectionDAG &DAG) const {
3973
  if (Subtarget.isAIXABI())
3974
    report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3975

3976
  SDValue Chain = Op.getOperand(0);
3977
  SDValue Trmp = Op.getOperand(1); // trampoline
3978
  SDValue FPtr = Op.getOperand(2); // nested function
3979
  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3980
  SDLoc dl(Op);
3981

3982
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3983
  bool isPPC64 = (PtrVT == MVT::i64);
3984
  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3985

3986
  TargetLowering::ArgListTy Args;
3987
  TargetLowering::ArgListEntry Entry;
3988

3989
  Entry.Ty = IntPtrTy;
3990
  Entry.Node = Trmp; Args.push_back(Entry);
3991

3992
  // TrampSize == (isPPC64 ? 48 : 40);
3993
  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3994
                               isPPC64 ? MVT::i64 : MVT::i32);
3995
  Args.push_back(Entry);
3996

3997
  Entry.Node = FPtr; Args.push_back(Entry);
3998
  Entry.Node = Nest; Args.push_back(Entry);
3999

4000
  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4001
  TargetLowering::CallLoweringInfo CLI(DAG);
4002
  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4003
      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
4004
      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4005

4006
  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4007
  return CallResult.second;
4008
}
4009

4010
SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4011
  MachineFunction &MF = DAG.getMachineFunction();
4012
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4013
  EVT PtrVT = getPointerTy(MF.getDataLayout());
4014

4015
  SDLoc dl(Op);
4016

4017
  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4018
    // vastart just stores the address of the VarArgsFrameIndex slot into the
4019
    // memory location argument.
4020
    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4021
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4022
    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4023
                        MachinePointerInfo(SV));
4024
  }
4025

4026
  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4027
  // We suppose the given va_list is already allocated.
4028
  //
4029
  // typedef struct {
4030
  //  char gpr;     /* index into the array of 8 GPRs
4031
  //                 * stored in the register save area
4032
  //                 * gpr=0 corresponds to r3,
4033
  //                 * gpr=1 to r4, etc.
4034
  //                 */
4035
  //  char fpr;     /* index into the array of 8 FPRs
4036
  //                 * stored in the register save area
4037
  //                 * fpr=0 corresponds to f1,
4038
  //                 * fpr=1 to f2, etc.
4039
  //                 */
4040
  //  char *overflow_arg_area;
4041
  //                /* location on stack that holds
4042
  //                 * the next overflow argument
4043
  //                 */
4044
  //  char *reg_save_area;
4045
  //               /* where r3:r10 and f1:f8 (if saved)
4046
  //                * are stored
4047
  //                */
4048
  // } va_list[1];
4049

4050
  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4051
  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4052
  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4053
                                            PtrVT);
4054
  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4055
                                 PtrVT);
4056

4057
  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4058
  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4059

4060
  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4061
  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4062

4063
  uint64_t FPROffset = 1;
4064
  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4065

4066
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4067

4068
  // Store first byte : number of int regs
4069
  SDValue firstStore =
4070
      DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4071
                        MachinePointerInfo(SV), MVT::i8);
4072
  uint64_t nextOffset = FPROffset;
4073
  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4074
                                  ConstFPROffset);
4075

4076
  // Store second byte : number of float regs
4077
  SDValue secondStore =
4078
      DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4079
                        MachinePointerInfo(SV, nextOffset), MVT::i8);
4080
  nextOffset += StackOffset;
4081
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4082

4083
  // Store second word : arguments given on stack
4084
  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4085
                                    MachinePointerInfo(SV, nextOffset));
4086
  nextOffset += FrameOffset;
4087
  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4088

4089
  // Store third word : arguments given in registers
4090
  return DAG.getStore(thirdStore, dl, FR, nextPtr,
4091
                      MachinePointerInfo(SV, nextOffset));
4092
}
4093

4094
/// FPR - The set of FP registers that should be allocated for arguments
4095
/// on Darwin and AIX.
4096
static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
4097
                                PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
4098
                                PPC::F11, PPC::F12, PPC::F13};
4099

4100
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4101
/// the stack.
4102
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4103
                                       unsigned PtrByteSize) {
4104
  unsigned ArgSize = ArgVT.getStoreSize();
4105
  if (Flags.isByVal())
4106
    ArgSize = Flags.getByValSize();
4107

4108
  // Round up to multiples of the pointer size, except for array members,
4109
  // which are always packed.
4110
  if (!Flags.isInConsecutiveRegs())
4111
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4112

4113
  return ArgSize;
4114
}
4115

4116
/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4117
/// on the stack.
4118
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4119
                                         ISD::ArgFlagsTy Flags,
4120
                                         unsigned PtrByteSize) {
4121
  Align Alignment(PtrByteSize);
4122

4123
  // Altivec parameters are padded to a 16 byte boundary.
4124
  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4125
      ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4126
      ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4127
      ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4128
    Alignment = Align(16);
4129

4130
  // ByVal parameters are aligned as requested.
4131
  if (Flags.isByVal()) {
4132
    auto BVAlign = Flags.getNonZeroByValAlign();
4133
    if (BVAlign > PtrByteSize) {
4134
      if (BVAlign.value() % PtrByteSize != 0)
4135
        llvm_unreachable(
4136
            "ByVal alignment is not a multiple of the pointer size");
4137

4138
      Alignment = BVAlign;
4139
    }
4140
  }
4141

4142
  // Array members are always packed to their original alignment.
4143
  if (Flags.isInConsecutiveRegs()) {
4144
    // If the array member was split into multiple registers, the first
4145
    // needs to be aligned to the size of the full type.  (Except for
4146
    // ppcf128, which is only aligned as its f64 components.)
4147
    if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4148
      Alignment = Align(OrigVT.getStoreSize());
4149
    else
4150
      Alignment = Align(ArgVT.getStoreSize());
4151
  }
4152

4153
  return Alignment;
4154
}
4155

4156
/// CalculateStackSlotUsed - Return whether this argument will use its
4157
/// stack slot (instead of being passed in registers).  ArgOffset,
4158
/// AvailableFPRs, and AvailableVRs must hold the current argument
4159
/// position, and will be updated to account for this argument.
4160
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4161
                                   unsigned PtrByteSize, unsigned LinkageSize,
4162
                                   unsigned ParamAreaSize, unsigned &ArgOffset,
4163
                                   unsigned &AvailableFPRs,
4164
                                   unsigned &AvailableVRs) {
4165
  bool UseMemory = false;
4166

4167
  // Respect alignment of argument on the stack.
4168
  Align Alignment =
4169
      CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4170
  ArgOffset = alignTo(ArgOffset, Alignment);
4171
  // If there's no space left in the argument save area, we must
4172
  // use memory (this check also catches zero-sized arguments).
4173
  if (ArgOffset >= LinkageSize + ParamAreaSize)
4174
    UseMemory = true;
4175

4176
  // Allocate argument on the stack.
4177
  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4178
  if (Flags.isInConsecutiveRegsLast())
4179
    ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4180
  // If we overran the argument save area, we must use memory
4181
  // (this check catches arguments passed partially in memory)
4182
  if (ArgOffset > LinkageSize + ParamAreaSize)
4183
    UseMemory = true;
4184

4185
  // However, if the argument is actually passed in an FPR or a VR,
4186
  // we don't use memory after all.
4187
  if (!Flags.isByVal()) {
4188
    if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4189
      if (AvailableFPRs > 0) {
4190
        --AvailableFPRs;
4191
        return false;
4192
      }
4193
    if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4194
        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4195
        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4196
        ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4197
      if (AvailableVRs > 0) {
4198
        --AvailableVRs;
4199
        return false;
4200
      }
4201
  }
4202

4203
  return UseMemory;
4204
}
4205

4206
/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4207
/// ensure minimum alignment required for target.
4208
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4209
                                     unsigned NumBytes) {
4210
  return alignTo(NumBytes, Lowering->getStackAlign());
4211
}
4212

4213
SDValue PPCTargetLowering::LowerFormalArguments(
4214
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4215
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4216
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4217
  if (Subtarget.isAIXABI())
4218
    return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4219
                                    InVals);
4220
  if (Subtarget.is64BitELFABI())
4221
    return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4222
                                       InVals);
4223
  assert(Subtarget.is32BitELFABI());
4224
  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4225
                                     InVals);
4226
}
4227

4228
SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4229
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4230
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4231
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4232

4233
  // 32-bit SVR4 ABI Stack Frame Layout:
4234
  //              +-----------------------------------+
4235
  //        +-->  |            Back chain             |
4236
  //        |     +-----------------------------------+
4237
  //        |     | Floating-point register save area |
4238
  //        |     +-----------------------------------+
4239
  //        |     |    General register save area     |
4240
  //        |     +-----------------------------------+
4241
  //        |     |          CR save word             |
4242
  //        |     +-----------------------------------+
4243
  //        |     |         VRSAVE save word          |
4244
  //        |     +-----------------------------------+
4245
  //        |     |         Alignment padding         |
4246
  //        |     +-----------------------------------+
4247
  //        |     |     Vector register save area     |
4248
  //        |     +-----------------------------------+
4249
  //        |     |       Local variable space        |
4250
  //        |     +-----------------------------------+
4251
  //        |     |        Parameter list area        |
4252
  //        |     +-----------------------------------+
4253
  //        |     |           LR save word            |
4254
  //        |     +-----------------------------------+
4255
  // SP-->  +---  |            Back chain             |
4256
  //              +-----------------------------------+
4257
  //
4258
  // Specifications:
4259
  //   System V Application Binary Interface PowerPC Processor Supplement
4260
  //   AltiVec Technology Programming Interface Manual
4261

4262
  MachineFunction &MF = DAG.getMachineFunction();
4263
  MachineFrameInfo &MFI = MF.getFrameInfo();
4264
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4265

4266
  EVT PtrVT = getPointerTy(MF.getDataLayout());
4267
  // Potential tail calls could cause overwriting of argument stack slots.
4268
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4269
                       (CallConv == CallingConv::Fast));
4270
  const Align PtrAlign(4);
4271

4272
  // Assign locations to all of the incoming arguments.
4273
  SmallVector<CCValAssign, 16> ArgLocs;
4274
  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4275
                 *DAG.getContext());
4276

4277
  // Reserve space for the linkage area on the stack.
4278
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4279
  CCInfo.AllocateStack(LinkageSize, PtrAlign);
4280
  if (useSoftFloat())
4281
    CCInfo.PreAnalyzeFormalArguments(Ins);
4282

4283
  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4284
  CCInfo.clearWasPPCF128();
4285

4286
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4287
    CCValAssign &VA = ArgLocs[i];
4288

4289
    // Arguments stored in registers.
4290
    if (VA.isRegLoc()) {
4291
      const TargetRegisterClass *RC;
4292
      EVT ValVT = VA.getValVT();
4293

4294
      switch (ValVT.getSimpleVT().SimpleTy) {
4295
        default:
4296
          llvm_unreachable("ValVT not supported by formal arguments Lowering");
4297
        case MVT::i1:
4298
        case MVT::i32:
4299
          RC = &PPC::GPRCRegClass;
4300
          break;
4301
        case MVT::f32:
4302
          if (Subtarget.hasP8Vector())
4303
            RC = &PPC::VSSRCRegClass;
4304
          else if (Subtarget.hasSPE())
4305
            RC = &PPC::GPRCRegClass;
4306
          else
4307
            RC = &PPC::F4RCRegClass;
4308
          break;
4309
        case MVT::f64:
4310
          if (Subtarget.hasVSX())
4311
            RC = &PPC::VSFRCRegClass;
4312
          else if (Subtarget.hasSPE())
4313
            // SPE passes doubles in GPR pairs.
4314
            RC = &PPC::GPRCRegClass;
4315
          else
4316
            RC = &PPC::F8RCRegClass;
4317
          break;
4318
        case MVT::v16i8:
4319
        case MVT::v8i16:
4320
        case MVT::v4i32:
4321
          RC = &PPC::VRRCRegClass;
4322
          break;
4323
        case MVT::v4f32:
4324
          RC = &PPC::VRRCRegClass;
4325
          break;
4326
        case MVT::v2f64:
4327
        case MVT::v2i64:
4328
          RC = &PPC::VRRCRegClass;
4329
          break;
4330
      }
4331

4332
      SDValue ArgValue;
4333
      // Transform the arguments stored in physical registers into
4334
      // virtual ones.
4335
      if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4336
        assert(i + 1 < e && "No second half of double precision argument");
4337
        Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4338
        Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4339
        SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4340
        SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4341
        if (!Subtarget.isLittleEndian())
4342
          std::swap (ArgValueLo, ArgValueHi);
4343
        ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4344
                               ArgValueHi);
4345
      } else {
4346
        Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4347
        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4348
                                      ValVT == MVT::i1 ? MVT::i32 : ValVT);
4349
        if (ValVT == MVT::i1)
4350
          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4351
      }
4352

4353
      InVals.push_back(ArgValue);
4354
    } else {
4355
      // Argument stored in memory.
4356
      assert(VA.isMemLoc());
4357

4358
      // Get the extended size of the argument type in stack
4359
      unsigned ArgSize = VA.getLocVT().getStoreSize();
4360
      // Get the actual size of the argument type
4361
      unsigned ObjSize = VA.getValVT().getStoreSize();
4362
      unsigned ArgOffset = VA.getLocMemOffset();
4363
      // Stack objects in PPC32 are right justified.
4364
      ArgOffset += ArgSize - ObjSize;
4365
      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4366

4367
      // Create load nodes to retrieve arguments from the stack.
4368
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4369
      InVals.push_back(
4370
          DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4371
    }
4372
  }
4373

4374
  // Assign locations to all of the incoming aggregate by value arguments.
4375
  // Aggregates passed by value are stored in the local variable space of the
4376
  // caller's stack frame, right above the parameter list area.
4377
  SmallVector<CCValAssign, 16> ByValArgLocs;
4378
  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4379
                      ByValArgLocs, *DAG.getContext());
4380

4381
  // Reserve stack space for the allocations in CCInfo.
4382
  CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4383

4384
  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4385

4386
  // Area that is at least reserved in the caller of this function.
4387
  unsigned MinReservedArea = CCByValInfo.getStackSize();
4388
  MinReservedArea = std::max(MinReservedArea, LinkageSize);
4389

4390
  // Set the size that is at least reserved in caller of this function.  Tail
4391
  // call optimized function's reserved stack space needs to be aligned so that
4392
  // taking the difference between two stack areas will result in an aligned
4393
  // stack.
4394
  MinReservedArea =
4395
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4396
  FuncInfo->setMinReservedArea(MinReservedArea);
4397

4398
  SmallVector<SDValue, 8> MemOps;
4399

4400
  // If the function takes variable number of arguments, make a frame index for
4401
  // the start of the first vararg value... for expansion of llvm.va_start.
4402
  if (isVarArg) {
4403
    static const MCPhysReg GPArgRegs[] = {
4404
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4405
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4406
    };
4407
    const unsigned NumGPArgRegs = std::size(GPArgRegs);
4408

4409
    static const MCPhysReg FPArgRegs[] = {
4410
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4411
      PPC::F8
4412
    };
4413
    unsigned NumFPArgRegs = std::size(FPArgRegs);
4414

4415
    if (useSoftFloat() || hasSPE())
4416
       NumFPArgRegs = 0;
4417

4418
    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4419
    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4420

4421
    // Make room for NumGPArgRegs and NumFPArgRegs.
4422
    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4423
                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4424

4425
    FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4426
        PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4427

4428
    FuncInfo->setVarArgsFrameIndex(
4429
        MFI.CreateStackObject(Depth, Align(8), false));
4430
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4431

4432
    // The fixed integer arguments of a variadic function are stored to the
4433
    // VarArgsFrameIndex on the stack so that they may be loaded by
4434
    // dereferencing the result of va_next.
4435
    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4436
      // Get an existing live-in vreg, or add a new one.
4437
      Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4438
      if (!VReg)
4439
        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4440

4441
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4442
      SDValue Store =
4443
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4444
      MemOps.push_back(Store);
4445
      // Increment the address by four for the next argument to store
4446
      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4447
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4448
    }
4449

4450
    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4451
    // is set.
4452
    // The double arguments are stored to the VarArgsFrameIndex
4453
    // on the stack.
4454
    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4455
      // Get an existing live-in vreg, or add a new one.
4456
      Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4457
      if (!VReg)
4458
        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4459

4460
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4461
      SDValue Store =
4462
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4463
      MemOps.push_back(Store);
4464
      // Increment the address by eight for the next argument to store
4465
      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4466
                                         PtrVT);
4467
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4468
    }
4469
  }
4470

4471
  if (!MemOps.empty())
4472
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4473

4474
  return Chain;
4475
}
4476

4477
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4478
// value to MVT::i64 and then truncate to the correct register size.
4479
SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4480
                                             EVT ObjectVT, SelectionDAG &DAG,
4481
                                             SDValue ArgVal,
4482
                                             const SDLoc &dl) const {
4483
  if (Flags.isSExt())
4484
    ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4485
                         DAG.getValueType(ObjectVT));
4486
  else if (Flags.isZExt())
4487
    ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4488
                         DAG.getValueType(ObjectVT));
4489

4490
  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4491
}
4492

4493
SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4494
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4495
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4496
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4497
  // TODO: add description of PPC stack frame format, or at least some docs.
4498
  //
4499
  bool isELFv2ABI = Subtarget.isELFv2ABI();
4500
  bool isLittleEndian = Subtarget.isLittleEndian();
4501
  MachineFunction &MF = DAG.getMachineFunction();
4502
  MachineFrameInfo &MFI = MF.getFrameInfo();
4503
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4504

4505
  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4506
         "fastcc not supported on varargs functions");
4507

4508
  EVT PtrVT = getPointerTy(MF.getDataLayout());
4509
  // Potential tail calls could cause overwriting of argument stack slots.
4510
  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4511
                       (CallConv == CallingConv::Fast));
4512
  unsigned PtrByteSize = 8;
4513
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4514

4515
  static const MCPhysReg GPR[] = {
4516
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4517
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4518
  };
4519
  static const MCPhysReg VR[] = {
4520
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4521
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4522
  };
4523

4524
  const unsigned Num_GPR_Regs = std::size(GPR);
4525
  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4526
  const unsigned Num_VR_Regs = std::size(VR);
4527

4528
  // Do a first pass over the arguments to determine whether the ABI
4529
  // guarantees that our caller has allocated the parameter save area
4530
  // on its stack frame.  In the ELFv1 ABI, this is always the case;
4531
  // in the ELFv2 ABI, it is true if this is a vararg function or if
4532
  // any parameter is located in a stack slot.
4533

4534
  bool HasParameterArea = !isELFv2ABI || isVarArg;
4535
  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4536
  unsigned NumBytes = LinkageSize;
4537
  unsigned AvailableFPRs = Num_FPR_Regs;
4538
  unsigned AvailableVRs = Num_VR_Regs;
4539
  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4540
    if (Ins[i].Flags.isNest())
4541
      continue;
4542

4543
    if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4544
                               PtrByteSize, LinkageSize, ParamAreaSize,
4545
                               NumBytes, AvailableFPRs, AvailableVRs))
4546
      HasParameterArea = true;
4547
  }
4548

4549
  // Add DAG nodes to load the arguments or copy them out of registers.  On
4550
  // entry to a function on PPC, the arguments start after the linkage area,
4551
  // although the first ones are often in registers.
4552

4553
  unsigned ArgOffset = LinkageSize;
4554
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4555
  SmallVector<SDValue, 8> MemOps;
4556
  Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4557
  unsigned CurArgIdx = 0;
4558
  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4559
    SDValue ArgVal;
4560
    bool needsLoad = false;
4561
    EVT ObjectVT = Ins[ArgNo].VT;
4562
    EVT OrigVT = Ins[ArgNo].ArgVT;
4563
    unsigned ObjSize = ObjectVT.getStoreSize();
4564
    unsigned ArgSize = ObjSize;
4565
    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4566
    if (Ins[ArgNo].isOrigArg()) {
4567
      std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4568
      CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4569
    }
4570
    // We re-align the argument offset for each argument, except when using the
4571
    // fast calling convention, when we need to make sure we do that only when
4572
    // we'll actually use a stack slot.
4573
    unsigned CurArgOffset;
4574
    Align Alignment;
4575
    auto ComputeArgOffset = [&]() {
4576
      /* Respect alignment of argument on the stack.  */
4577
      Alignment =
4578
          CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4579
      ArgOffset = alignTo(ArgOffset, Alignment);
4580
      CurArgOffset = ArgOffset;
4581
    };
4582

4583
    if (CallConv != CallingConv::Fast) {
4584
      ComputeArgOffset();
4585

4586
      /* Compute GPR index associated with argument offset.  */
4587
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4588
      GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4589
    }
4590

4591
    // FIXME the codegen can be much improved in some cases.
4592
    // We do not have to keep everything in memory.
4593
    if (Flags.isByVal()) {
4594
      assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4595

4596
      if (CallConv == CallingConv::Fast)
4597
        ComputeArgOffset();
4598

4599
      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4600
      ObjSize = Flags.getByValSize();
4601
      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4602
      // Empty aggregate parameters do not take up registers.  Examples:
4603
      //   struct { } a;
4604
      //   union  { } b;
4605
      //   int c[0];
4606
      // etc.  However, we have to provide a place-holder in InVals, so
4607
      // pretend we have an 8-byte item at the current address for that
4608
      // purpose.
4609
      if (!ObjSize) {
4610
        int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4611
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4612
        InVals.push_back(FIN);
4613
        continue;
4614
      }
4615

4616
      // Create a stack object covering all stack doublewords occupied
4617
      // by the argument.  If the argument is (fully or partially) on
4618
      // the stack, or if the argument is fully in registers but the
4619
      // caller has allocated the parameter save anyway, we can refer
4620
      // directly to the caller's stack frame.  Otherwise, create a
4621
      // local copy in our own frame.
4622
      int FI;
4623
      if (HasParameterArea ||
4624
          ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4625
        FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4626
      else
4627
        FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4628
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629

4630
      // Handle aggregates smaller than 8 bytes.
4631
      if (ObjSize < PtrByteSize) {
4632
        // The value of the object is its address, which differs from the
4633
        // address of the enclosing doubleword on big-endian systems.
4634
        SDValue Arg = FIN;
4635
        if (!isLittleEndian) {
4636
          SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4637
          Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4638
        }
4639
        InVals.push_back(Arg);
4640

4641
        if (GPR_idx != Num_GPR_Regs) {
4642
          Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4643
          FuncInfo->addLiveInAttr(VReg, Flags);
4644
          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4645
          EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4646
          SDValue Store =
4647
              DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4648
                                MachinePointerInfo(&*FuncArg), ObjType);
4649
          MemOps.push_back(Store);
4650
        }
4651
        // Whether we copied from a register or not, advance the offset
4652
        // into the parameter save area by a full doubleword.
4653
        ArgOffset += PtrByteSize;
4654
        continue;
4655
      }
4656

4657
      // The value of the object is its address, which is the address of
4658
      // its first stack doubleword.
4659
      InVals.push_back(FIN);
4660

4661
      // Store whatever pieces of the object are in registers to memory.
4662
      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4663
        if (GPR_idx == Num_GPR_Regs)
4664
          break;
4665

4666
        Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4667
        FuncInfo->addLiveInAttr(VReg, Flags);
4668
        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4669
        SDValue Addr = FIN;
4670
        if (j) {
4671
          SDValue Off = DAG.getConstant(j, dl, PtrVT);
4672
          Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4673
        }
4674
        unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4675
        EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4676
        SDValue Store =
4677
            DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4678
                              MachinePointerInfo(&*FuncArg, j), ObjType);
4679
        MemOps.push_back(Store);
4680
        ++GPR_idx;
4681
      }
4682
      ArgOffset += ArgSize;
4683
      continue;
4684
    }
4685

4686
    switch (ObjectVT.getSimpleVT().SimpleTy) {
4687
    default: llvm_unreachable("Unhandled argument type!");
4688
    case MVT::i1:
4689
    case MVT::i32:
4690
    case MVT::i64:
4691
      if (Flags.isNest()) {
4692
        // The 'nest' parameter, if any, is passed in R11.
4693
        Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4694
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4695

4696
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4697
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4698

4699
        break;
4700
      }
4701

4702
      // These can be scalar arguments or elements of an integer array type
4703
      // passed directly.  Clang may use those instead of "byval" aggregate
4704
      // types to avoid forcing arguments to memory unnecessarily.
4705
      if (GPR_idx != Num_GPR_Regs) {
4706
        Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4707
        FuncInfo->addLiveInAttr(VReg, Flags);
4708
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4709

4710
        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4711
          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4712
          // value to MVT::i64 and then truncate to the correct register size.
4713
          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4714
      } else {
4715
        if (CallConv == CallingConv::Fast)
4716
          ComputeArgOffset();
4717

4718
        needsLoad = true;
4719
        ArgSize = PtrByteSize;
4720
      }
4721
      if (CallConv != CallingConv::Fast || needsLoad)
4722
        ArgOffset += 8;
4723
      break;
4724

4725
    case MVT::f32:
4726
    case MVT::f64:
4727
      // These can be scalar arguments or elements of a float array type
4728
      // passed directly.  The latter are used to implement ELFv2 homogenous
4729
      // float aggregates.
4730
      if (FPR_idx != Num_FPR_Regs) {
4731
        unsigned VReg;
4732

4733
        if (ObjectVT == MVT::f32)
4734
          VReg = MF.addLiveIn(FPR[FPR_idx],
4735
                              Subtarget.hasP8Vector()
4736
                                  ? &PPC::VSSRCRegClass
4737
                                  : &PPC::F4RCRegClass);
4738
        else
4739
          VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4740
                                                ? &PPC::VSFRCRegClass
4741
                                                : &PPC::F8RCRegClass);
4742

4743
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4744
        ++FPR_idx;
4745
      } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4746
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4747
        // once we support fp <-> gpr moves.
4748

4749
        // This can only ever happen in the presence of f32 array types,
4750
        // since otherwise we never run out of FPRs before running out
4751
        // of GPRs.
4752
        Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4753
        FuncInfo->addLiveInAttr(VReg, Flags);
4754
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4755

4756
        if (ObjectVT == MVT::f32) {
4757
          if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4758
            ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4759
                                 DAG.getConstant(32, dl, MVT::i32));
4760
          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4761
        }
4762

4763
        ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4764
      } else {
4765
        if (CallConv == CallingConv::Fast)
4766
          ComputeArgOffset();
4767

4768
        needsLoad = true;
4769
      }
4770

4771
      // When passing an array of floats, the array occupies consecutive
4772
      // space in the argument area; only round up to the next doubleword
4773
      // at the end of the array.  Otherwise, each float takes 8 bytes.
4774
      if (CallConv != CallingConv::Fast || needsLoad) {
4775
        ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4776
        ArgOffset += ArgSize;
4777
        if (Flags.isInConsecutiveRegsLast())
4778
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4779
      }
4780
      break;
4781
    case MVT::v4f32:
4782
    case MVT::v4i32:
4783
    case MVT::v8i16:
4784
    case MVT::v16i8:
4785
    case MVT::v2f64:
4786
    case MVT::v2i64:
4787
    case MVT::v1i128:
4788
    case MVT::f128:
4789
      // These can be scalar arguments or elements of a vector array type
4790
      // passed directly.  The latter are used to implement ELFv2 homogenous
4791
      // vector aggregates.
4792
      if (VR_idx != Num_VR_Regs) {
4793
        Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4794
        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4795
        ++VR_idx;
4796
      } else {
4797
        if (CallConv == CallingConv::Fast)
4798
          ComputeArgOffset();
4799
        needsLoad = true;
4800
      }
4801
      if (CallConv != CallingConv::Fast || needsLoad)
4802
        ArgOffset += 16;
4803
      break;
4804
    }
4805

4806
    // We need to load the argument to a virtual register if we determined
4807
    // above that we ran out of physical registers of the appropriate type.
4808
    if (needsLoad) {
4809
      if (ObjSize < ArgSize && !isLittleEndian)
4810
        CurArgOffset += ArgSize - ObjSize;
4811
      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4812
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4813
      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4814
    }
4815

4816
    InVals.push_back(ArgVal);
4817
  }
4818

4819
  // Area that is at least reserved in the caller of this function.
4820
  unsigned MinReservedArea;
4821
  if (HasParameterArea)
4822
    MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4823
  else
4824
    MinReservedArea = LinkageSize;
4825

4826
  // Set the size that is at least reserved in caller of this function.  Tail
4827
  // call optimized functions' reserved stack space needs to be aligned so that
4828
  // taking the difference between two stack areas will result in an aligned
4829
  // stack.
4830
  MinReservedArea =
4831
      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4832
  FuncInfo->setMinReservedArea(MinReservedArea);
4833

4834
  // If the function takes variable number of arguments, make a frame index for
4835
  // the start of the first vararg value... for expansion of llvm.va_start.
4836
  // On ELFv2ABI spec, it writes:
4837
  // C programs that are intended to be *portable* across different compilers
4838
  // and architectures must use the header file <stdarg.h> to deal with variable
4839
  // argument lists.
4840
  if (isVarArg && MFI.hasVAStart()) {
4841
    int Depth = ArgOffset;
4842

4843
    FuncInfo->setVarArgsFrameIndex(
4844
      MFI.CreateFixedObject(PtrByteSize, Depth, true));
4845
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4846

4847
    // If this function is vararg, store any remaining integer argument regs
4848
    // to their spots on the stack so that they may be loaded by dereferencing
4849
    // the result of va_next.
4850
    for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4851
         GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4852
      Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4853
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4854
      SDValue Store =
4855
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4856
      MemOps.push_back(Store);
4857
      // Increment the address by four for the next argument to store
4858
      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4859
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4860
    }
4861
  }
4862

4863
  if (!MemOps.empty())
4864
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4865

4866
  return Chain;
4867
}
4868

4869
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4870
/// adjusted to accommodate the arguments for the tailcall.
4871
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4872
                                   unsigned ParamSize) {
4873

4874
  if (!isTailCall) return 0;
4875

4876
  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4877
  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4878
  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4879
  // Remember only if the new adjustment is bigger.
4880
  if (SPDiff < FI->getTailCallSPDelta())
4881
    FI->setTailCallSPDelta(SPDiff);
4882

4883
  return SPDiff;
4884
}
4885

4886
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4887

4888
static bool callsShareTOCBase(const Function *Caller,
4889
                              const GlobalValue *CalleeGV,
4890
                              const TargetMachine &TM) {
4891
  // It does not make sense to call callsShareTOCBase() with a caller that
4892
  // is PC Relative since PC Relative callers do not have a TOC.
4893
#ifndef NDEBUG
4894
  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4895
  assert(!STICaller->isUsingPCRelativeCalls() &&
4896
         "PC Relative callers do not have a TOC and cannot share a TOC Base");
4897
#endif
4898

4899
  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4900
  // don't have enough information to determine if the caller and callee share
4901
  // the same  TOC base, so we have to pessimistically assume they don't for
4902
  // correctness.
4903
  if (!CalleeGV)
4904
    return false;
4905

4906
  // If the callee is preemptable, then the static linker will use a plt-stub
4907
  // which saves the toc to the stack, and needs a nop after the call
4908
  // instruction to convert to a toc-restore.
4909
  if (!TM.shouldAssumeDSOLocal(CalleeGV))
4910
    return false;
4911

4912
  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4913
  // We may need a TOC restore in the situation where the caller requires a
4914
  // valid TOC but the callee is PC Relative and does not.
4915
  const Function *F = dyn_cast<Function>(CalleeGV);
4916
  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4917

4918
  // If we have an Alias we can try to get the function from there.
4919
  if (Alias) {
4920
    const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4921
    F = dyn_cast<Function>(GlobalObj);
4922
  }
4923

4924
  // If we still have no valid function pointer we do not have enough
4925
  // information to determine if the callee uses PC Relative calls so we must
4926
  // assume that it does.
4927
  if (!F)
4928
    return false;
4929

4930
  // If the callee uses PC Relative we cannot guarantee that the callee won't
4931
  // clobber the TOC of the caller and so we must assume that the two
4932
  // functions do not share a TOC base.
4933
  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4934
  if (STICallee->isUsingPCRelativeCalls())
4935
    return false;
4936

4937
  // If the GV is not a strong definition then we need to assume it can be
4938
  // replaced by another function at link time. The function that replaces
4939
  // it may not share the same TOC as the caller since the callee may be
4940
  // replaced by a PC Relative version of the same function.
4941
  if (!CalleeGV->isStrongDefinitionForLinker())
4942
    return false;
4943

4944
  // The medium and large code models are expected to provide a sufficiently
4945
  // large TOC to provide all data addressing needs of a module with a
4946
  // single TOC.
4947
  if (CodeModel::Medium == TM.getCodeModel() ||
4948
      CodeModel::Large == TM.getCodeModel())
4949
    return true;
4950

4951
  // Any explicitly-specified sections and section prefixes must also match.
4952
  // Also, if we're using -ffunction-sections, then each function is always in
4953
  // a different section (the same is true for COMDAT functions).
4954
  if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4955
      Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4956
    return false;
4957
  if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4958
    if (F->getSectionPrefix() != Caller->getSectionPrefix())
4959
      return false;
4960
  }
4961

4962
  return true;
4963
}
4964

4965
static bool
4966
needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4967
                            const SmallVectorImpl<ISD::OutputArg> &Outs) {
4968
  assert(Subtarget.is64BitELFABI());
4969

4970
  const unsigned PtrByteSize = 8;
4971
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4972

4973
  static const MCPhysReg GPR[] = {
4974
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4975
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4976
  };
4977
  static const MCPhysReg VR[] = {
4978
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4979
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4980
  };
4981

4982
  const unsigned NumGPRs = std::size(GPR);
4983
  const unsigned NumFPRs = 13;
4984
  const unsigned NumVRs = std::size(VR);
4985
  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4986

4987
  unsigned NumBytes = LinkageSize;
4988
  unsigned AvailableFPRs = NumFPRs;
4989
  unsigned AvailableVRs = NumVRs;
4990

4991
  for (const ISD::OutputArg& Param : Outs) {
4992
    if (Param.Flags.isNest()) continue;
4993

4994
    if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4995
                               LinkageSize, ParamAreaSize, NumBytes,
4996
                               AvailableFPRs, AvailableVRs))
4997
      return true;
4998
  }
4999
  return false;
5000
}
5001

5002
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5003
  if (CB.arg_size() != CallerFn->arg_size())
5004
    return false;
5005

5006
  auto CalleeArgIter = CB.arg_begin();
5007
  auto CalleeArgEnd = CB.arg_end();
5008
  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5009

5010
  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5011
    const Value* CalleeArg = *CalleeArgIter;
5012
    const Value* CallerArg = &(*CallerArgIter);
5013
    if (CalleeArg == CallerArg)
5014
      continue;
5015

5016
    // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5017
    //        tail call @callee([4 x i64] undef, [4 x i64] %b)
5018
    //      }
5019
    // 1st argument of callee is undef and has the same type as caller.
5020
    if (CalleeArg->getType() == CallerArg->getType() &&
5021
        isa<UndefValue>(CalleeArg))
5022
      continue;
5023

5024
    return false;
5025
  }
5026

5027
  return true;
5028
}
5029

5030
// Returns true if TCO is possible between the callers and callees
5031
// calling conventions.
5032
static bool
5033
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
5034
                                    CallingConv::ID CalleeCC) {
5035
  // Tail calls are possible with fastcc and ccc.
5036
  auto isTailCallableCC  = [] (CallingConv::ID CC){
5037
      return  CC == CallingConv::C || CC == CallingConv::Fast;
5038
  };
5039
  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5040
    return false;
5041

5042
  // We can safely tail call both fastcc and ccc callees from a c calling
5043
  // convention caller. If the caller is fastcc, we may have less stack space
5044
  // than a non-fastcc caller with the same signature so disable tail-calls in
5045
  // that case.
5046
  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5047
}
5048

5049
bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5050
    const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5051
    CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5052
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5053
    const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5054
    bool isCalleeExternalSymbol) const {
5055
  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5056

5057
  if (DisableSCO && !TailCallOpt) return false;
5058

5059
  // Variadic argument functions are not supported.
5060
  if (isVarArg) return false;
5061

5062
  // Check that the calling conventions are compatible for tco.
5063
  if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5064
    return false;
5065

5066
  // Caller contains any byval parameter is not supported.
5067
  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5068
    return false;
5069

5070
  // Callee contains any byval parameter is not supported, too.
5071
  // Note: This is a quick work around, because in some cases, e.g.
5072
  // caller's stack size > callee's stack size, we are still able to apply
5073
  // sibling call optimization. For example, gcc is able to do SCO for caller1
5074
  // in the following example, but not for caller2.
5075
  //   struct test {
5076
  //     long int a;
5077
  //     char ary[56];
5078
  //   } gTest;
5079
  //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
5080
  //     b->a = v.a;
5081
  //     return 0;
5082
  //   }
5083
  //   void caller1(struct test a, struct test c, struct test *b) {
5084
  //     callee(gTest, b); }
5085
  //   void caller2(struct test *b) { callee(gTest, b); }
5086
  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5087
    return false;
5088

5089
  // If callee and caller use different calling conventions, we cannot pass
5090
  // parameters on stack since offsets for the parameter area may be different.
5091
  if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5092
    return false;
5093

5094
  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5095
  // the caller and callee share the same TOC for TCO/SCO. If the caller and
5096
  // callee potentially have different TOC bases then we cannot tail call since
5097
  // we need to restore the TOC pointer after the call.
5098
  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5099
  // We cannot guarantee this for indirect calls or calls to external functions.
5100
  // When PC-Relative addressing is used, the concept of the TOC is no longer
5101
  // applicable so this check is not required.
5102
  // Check first for indirect calls.
5103
  if (!Subtarget.isUsingPCRelativeCalls() &&
5104
      !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5105
    return false;
5106

5107
  // Check if we share the TOC base.
5108
  if (!Subtarget.isUsingPCRelativeCalls() &&
5109
      !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5110
    return false;
5111

5112
  // TCO allows altering callee ABI, so we don't have to check further.
5113
  if (CalleeCC == CallingConv::Fast && TailCallOpt)
5114
    return true;
5115

5116
  if (DisableSCO) return false;
5117

5118
  // If callee use the same argument list that caller is using, then we can
5119
  // apply SCO on this case. If it is not, then we need to check if callee needs
5120
  // stack for passing arguments.
5121
  // PC Relative tail calls may not have a CallBase.
5122
  // If there is no CallBase we cannot verify if we have the same argument
5123
  // list so assume that we don't have the same argument list.
5124
  if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5125
      needStackSlotPassParameters(Subtarget, Outs))
5126
    return false;
5127
  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5128
    return false;
5129

5130
  return true;
5131
}
5132

5133
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5134
/// for tail call optimization. Targets which want to do tail call
5135
/// optimization should implement this function.
5136
bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5137
    const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5138
    CallingConv::ID CallerCC, bool isVarArg,
5139
    const SmallVectorImpl<ISD::InputArg> &Ins) const {
5140
  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5141
    return false;
5142

5143
  // Variable argument functions are not supported.
5144
  if (isVarArg)
5145
    return false;
5146

5147
  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5148
    // Functions containing by val parameters are not supported.
5149
    if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5150
      return false;
5151

5152
    // Non-PIC/GOT tail calls are supported.
5153
    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5154
      return true;
5155

5156
    // At the moment we can only do local tail calls (in same module, hidden
5157
    // or protected) if we are generating PIC.
5158
    if (CalleeGV)
5159
      return CalleeGV->hasHiddenVisibility() ||
5160
             CalleeGV->hasProtectedVisibility();
5161
  }
5162

5163
  return false;
5164
}
5165

5166
/// isCallCompatibleAddress - Return the immediate to use if the specified
5167
/// 32-bit value is representable in the immediate field of a BxA instruction.
5168
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5169
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5170
  if (!C) return nullptr;
5171

5172
  int Addr = C->getZExtValue();
5173
  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5174
      SignExtend32<26>(Addr) != Addr)
5175
    return nullptr;  // Top 6 bits have to be sext of immediate.
5176

5177
  return DAG
5178
      .getConstant(
5179
          (int)C->getZExtValue() >> 2, SDLoc(Op),
5180
          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5181
      .getNode();
5182
}
5183

5184
namespace {
5185

5186
struct TailCallArgumentInfo {
5187
  SDValue Arg;
5188
  SDValue FrameIdxOp;
5189
  int FrameIdx = 0;
5190

5191
  TailCallArgumentInfo() = default;
5192
};
5193

5194
} // end anonymous namespace
5195

5196
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5197
static void StoreTailCallArgumentsToStackSlot(
5198
    SelectionDAG &DAG, SDValue Chain,
5199
    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5200
    SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5201
  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5202
    SDValue Arg = TailCallArgs[i].Arg;
5203
    SDValue FIN = TailCallArgs[i].FrameIdxOp;
5204
    int FI = TailCallArgs[i].FrameIdx;
5205
    // Store relative to framepointer.
5206
    MemOpChains.push_back(DAG.getStore(
5207
        Chain, dl, Arg, FIN,
5208
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5209
  }
5210
}
5211

5212
/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5213
/// the appropriate stack slot for the tail call optimized function call.
5214
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5215
                                             SDValue OldRetAddr, SDValue OldFP,
5216
                                             int SPDiff, const SDLoc &dl) {
5217
  if (SPDiff) {
5218
    // Calculate the new stack slot for the return address.
5219
    MachineFunction &MF = DAG.getMachineFunction();
5220
    const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5221
    const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5222
    bool isPPC64 = Subtarget.isPPC64();
5223
    int SlotSize = isPPC64 ? 8 : 4;
5224
    int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5225
    int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5226
                                                         NewRetAddrLoc, true);
5227
    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5228
    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5229
    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5230
                         MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5231
  }
5232
  return Chain;
5233
}
5234

5235
/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5236
/// the position of the argument.
5237
static void
5238
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5239
                         SDValue Arg, int SPDiff, unsigned ArgOffset,
5240
                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5241
  int Offset = ArgOffset + SPDiff;
5242
  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5243
  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5244
  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5245
  SDValue FIN = DAG.getFrameIndex(FI, VT);
5246
  TailCallArgumentInfo Info;
5247
  Info.Arg = Arg;
5248
  Info.FrameIdxOp = FIN;
5249
  Info.FrameIdx = FI;
5250
  TailCallArguments.push_back(Info);
5251
}
5252

5253
/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5254
/// stack slot. Returns the chain as result and the loaded frame pointers in
5255
/// LROpOut/FPOpout. Used when tail calling.
5256
SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5257
    SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5258
    SDValue &FPOpOut, const SDLoc &dl) const {
5259
  if (SPDiff) {
5260
    // Load the LR and FP stack slot for later adjusting.
5261
    EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5262
    LROpOut = getReturnAddrFrameIndex(DAG);
5263
    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5264
    Chain = SDValue(LROpOut.getNode(), 1);
5265
  }
5266
  return Chain;
5267
}
5268

5269
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5270
/// by "Src" to address "Dst" of size "Size".  Alignment information is
5271
/// specified by the specific parameter attribute. The copy will be passed as
5272
/// a byval function parameter.
5273
/// Sometimes what we are copying is the end of a larger object, the part that
5274
/// does not fit in registers.
5275
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5276
                                         SDValue Chain, ISD::ArgFlagsTy Flags,
5277
                                         SelectionDAG &DAG, const SDLoc &dl) {
5278
  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5279
  return DAG.getMemcpy(
5280
      Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5281
      /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5282
}
5283

5284
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5285
/// tail calls.
5286
static void LowerMemOpCallTo(
5287
    SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5288
    SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5289
    bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5290
    SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5291
  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5292
  if (!isTailCall) {
5293
    if (isVector) {
5294
      SDValue StackPtr;
5295
      if (isPPC64)
5296
        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5297
      else
5298
        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5299
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5300
                           DAG.getConstant(ArgOffset, dl, PtrVT));
5301
    }
5302
    MemOpChains.push_back(
5303
        DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5304
    // Calculate and remember argument location.
5305
  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5306
                                  TailCallArguments);
5307
}
5308

5309
static void
5310
PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5311
                const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5312
                SDValue FPOp,
5313
                SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5314
  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5315
  // might overwrite each other in case of tail call optimization.
5316
  SmallVector<SDValue, 8> MemOpChains2;
5317
  // Do not flag preceding copytoreg stuff together with the following stuff.
5318
  InGlue = SDValue();
5319
  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5320
                                    MemOpChains2, dl);
5321
  if (!MemOpChains2.empty())
5322
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5323

5324
  // Store the return address to the appropriate stack slot.
5325
  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5326

5327
  // Emit callseq_end just before tailcall node.
5328
  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5329
  InGlue = Chain.getValue(1);
5330
}
5331

5332
// Is this global address that of a function that can be called by name? (as
5333
// opposed to something that must hold a descriptor for an indirect call).
5334
static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5335
  if (GV) {
5336
    if (GV->isThreadLocal())
5337
      return false;
5338

5339
    return GV->getValueType()->isFunctionTy();
5340
  }
5341

5342
  return false;
5343
}
5344

5345
SDValue PPCTargetLowering::LowerCallResult(
5346
    SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5347
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5348
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5349
  SmallVector<CCValAssign, 16> RVLocs;
5350
  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5351
                    *DAG.getContext());
5352

5353
  CCRetInfo.AnalyzeCallResult(
5354
      Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5355
               ? RetCC_PPC_Cold
5356
               : RetCC_PPC);
5357

5358
  // Copy all of the result registers out of their specified physreg.
5359
  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5360
    CCValAssign &VA = RVLocs[i];
5361
    assert(VA.isRegLoc() && "Can only return in registers!");
5362

5363
    SDValue Val;
5364

5365
    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5366
      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5367
                                      InGlue);
5368
      Chain = Lo.getValue(1);
5369
      InGlue = Lo.getValue(2);
5370
      VA = RVLocs[++i]; // skip ahead to next loc
5371
      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5372
                                      InGlue);
5373
      Chain = Hi.getValue(1);
5374
      InGlue = Hi.getValue(2);
5375
      if (!Subtarget.isLittleEndian())
5376
        std::swap (Lo, Hi);
5377
      Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5378
    } else {
5379
      Val = DAG.getCopyFromReg(Chain, dl,
5380
                               VA.getLocReg(), VA.getLocVT(), InGlue);
5381
      Chain = Val.getValue(1);
5382
      InGlue = Val.getValue(2);
5383
    }
5384

5385
    switch (VA.getLocInfo()) {
5386
    default: llvm_unreachable("Unknown loc info!");
5387
    case CCValAssign::Full: break;
5388
    case CCValAssign::AExt:
5389
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5390
      break;
5391
    case CCValAssign::ZExt:
5392
      Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5393
                        DAG.getValueType(VA.getValVT()));
5394
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5395
      break;
5396
    case CCValAssign::SExt:
5397
      Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5398
                        DAG.getValueType(VA.getValVT()));
5399
      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5400
      break;
5401
    }
5402

5403
    InVals.push_back(Val);
5404
  }
5405

5406
  return Chain;
5407
}
5408

5409
static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5410
                           const PPCSubtarget &Subtarget, bool isPatchPoint) {
5411
  auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5412
  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5413

5414
  // PatchPoint calls are not indirect.
5415
  if (isPatchPoint)
5416
    return false;
5417

5418
  if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5419
    return false;
5420

5421
  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5422
  // becuase the immediate function pointer points to a descriptor instead of
5423
  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5424
  // pointer immediate points to the global entry point, while the BLA would
5425
  // need to jump to the local entry point (see rL211174).
5426
  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5427
      isBLACompatibleAddress(Callee, DAG))
5428
    return false;
5429

5430
  return true;
5431
}
5432

5433
// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5434
static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5435
  return Subtarget.isAIXABI() ||
5436
         (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5437
}
5438

5439
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5440
                              const Function &Caller, const SDValue &Callee,
5441
                              const PPCSubtarget &Subtarget,
5442
                              const TargetMachine &TM,
5443
                              bool IsStrictFPCall = false) {
5444
  if (CFlags.IsTailCall)
5445
    return PPCISD::TC_RETURN;
5446

5447
  unsigned RetOpc = 0;
5448
  // This is a call through a function pointer.
5449
  if (CFlags.IsIndirect) {
5450
    // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5451
    // indirect calls. The save of the caller's TOC pointer to the stack will be
5452
    // inserted into the DAG as part of call lowering. The restore of the TOC
5453
    // pointer is modeled by using a pseudo instruction for the call opcode that
5454
    // represents the 2 instruction sequence of an indirect branch and link,
5455
    // immediately followed by a load of the TOC pointer from the stack save
5456
    // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5457
    // as it is not saved or used.
5458
    RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5459
                                                 : PPCISD::BCTRL;
5460
  } else if (Subtarget.isUsingPCRelativeCalls()) {
5461
    assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5462
    RetOpc = PPCISD::CALL_NOTOC;
5463
  } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5464
    // The ABIs that maintain a TOC pointer accross calls need to have a nop
5465
    // immediately following the call instruction if the caller and callee may
5466
    // have different TOC bases. At link time if the linker determines the calls
5467
    // may not share a TOC base, the call is redirected to a trampoline inserted
5468
    // by the linker. The trampoline will (among other things) save the callers
5469
    // TOC pointer at an ABI designated offset in the linkage area and the
5470
    // linker will rewrite the nop to be a load of the TOC pointer from the
5471
    // linkage area into gpr2.
5472
    auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5473
    const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5474
    RetOpc =
5475
        callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5476
  } else
5477
    RetOpc = PPCISD::CALL;
5478
  if (IsStrictFPCall) {
5479
    switch (RetOpc) {
5480
    default:
5481
      llvm_unreachable("Unknown call opcode");
5482
    case PPCISD::BCTRL_LOAD_TOC:
5483
      RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5484
      break;
5485
    case PPCISD::BCTRL:
5486
      RetOpc = PPCISD::BCTRL_RM;
5487
      break;
5488
    case PPCISD::CALL_NOTOC:
5489
      RetOpc = PPCISD::CALL_NOTOC_RM;
5490
      break;
5491
    case PPCISD::CALL:
5492
      RetOpc = PPCISD::CALL_RM;
5493
      break;
5494
    case PPCISD::CALL_NOP:
5495
      RetOpc = PPCISD::CALL_NOP_RM;
5496
      break;
5497
    }
5498
  }
5499
  return RetOpc;
5500
}
5501

5502
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5503
                               const SDLoc &dl, const PPCSubtarget &Subtarget) {
5504
  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5505
    if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5506
      return SDValue(Dest, 0);
5507

5508
  // Returns true if the callee is local, and false otherwise.
5509
  auto isLocalCallee = [&]() {
5510
    const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5511
    const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5512

5513
    return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5514
           !isa_and_nonnull<GlobalIFunc>(GV);
5515
  };
5516

5517
  // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5518
  // a static relocation model causes some versions of GNU LD (2.17.50, at
5519
  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5520
  // built with secure-PLT.
5521
  bool UsePlt =
5522
      Subtarget.is32BitELFABI() && !isLocalCallee() &&
5523
      Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5524

5525
  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5526
    const TargetMachine &TM = Subtarget.getTargetMachine();
5527
    const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5528
    MCSymbolXCOFF *S =
5529
        cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5530

5531
    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5532
    return DAG.getMCSymbol(S, PtrVT);
5533
  };
5534

5535
  auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5536
  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5537
  if (isFunctionGlobalAddress(GV)) {
5538
    const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5539

5540
    if (Subtarget.isAIXABI()) {
5541
      assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5542
      return getAIXFuncEntryPointSymbolSDNode(GV);
5543
    }
5544
    return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5545
                                      UsePlt ? PPCII::MO_PLT : 0);
5546
  }
5547

5548
  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5549
    const char *SymName = S->getSymbol();
5550
    if (Subtarget.isAIXABI()) {
5551
      // If there exists a user-declared function whose name is the same as the
5552
      // ExternalSymbol's, then we pick up the user-declared version.
5553
      const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5554
      if (const Function *F =
5555
              dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5556
        return getAIXFuncEntryPointSymbolSDNode(F);
5557

5558
      // On AIX, direct function calls reference the symbol for the function's
5559
      // entry point, which is named by prepending a "." before the function's
5560
      // C-linkage name. A Qualname is returned here because an external
5561
      // function entry point is a csect with XTY_ER property.
5562
      const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5563
        auto &Context = DAG.getMachineFunction().getContext();
5564
        MCSectionXCOFF *Sec = Context.getXCOFFSection(
5565
            (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5566
            XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5567
        return Sec->getQualNameSymbol();
5568
      };
5569

5570
      SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5571
    }
5572
    return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5573
                                       UsePlt ? PPCII::MO_PLT : 0);
5574
  }
5575

5576
  // No transformation needed.
5577
  assert(Callee.getNode() && "What no callee?");
5578
  return Callee;
5579
}
5580

5581
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5582
  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5583
         "Expected a CALLSEQ_STARTSDNode.");
5584

5585
  // The last operand is the chain, except when the node has glue. If the node
5586
  // has glue, then the last operand is the glue, and the chain is the second
5587
  // last operand.
5588
  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5589
  if (LastValue.getValueType() != MVT::Glue)
5590
    return LastValue;
5591

5592
  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5593
}
5594

5595
// Creates the node that moves a functions address into the count register
5596
// to prepare for an indirect call instruction.
5597
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5598
                                SDValue &Glue, SDValue &Chain,
5599
                                const SDLoc &dl) {
5600
  SDValue MTCTROps[] = {Chain, Callee, Glue};
5601
  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5602
  Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5603
                      ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5604
  // The glue is the second value produced.
5605
  Glue = Chain.getValue(1);
5606
}
5607

5608
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5609
                                          SDValue &Glue, SDValue &Chain,
5610
                                          SDValue CallSeqStart,
5611
                                          const CallBase *CB, const SDLoc &dl,
5612
                                          bool hasNest,
5613
                                          const PPCSubtarget &Subtarget) {
5614
  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5615
  // entry point, but to the function descriptor (the function entry point
5616
  // address is part of the function descriptor though).
5617
  // The function descriptor is a three doubleword structure with the
5618
  // following fields: function entry point, TOC base address and
5619
  // environment pointer.
5620
  // Thus for a call through a function pointer, the following actions need
5621
  // to be performed:
5622
  //   1. Save the TOC of the caller in the TOC save area of its stack
5623
  //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5624
  //   2. Load the address of the function entry point from the function
5625
  //      descriptor.
5626
  //   3. Load the TOC of the callee from the function descriptor into r2.
5627
  //   4. Load the environment pointer from the function descriptor into
5628
  //      r11.
5629
  //   5. Branch to the function entry point address.
5630
  //   6. On return of the callee, the TOC of the caller needs to be
5631
  //      restored (this is done in FinishCall()).
5632
  //
5633
  // The loads are scheduled at the beginning of the call sequence, and the
5634
  // register copies are flagged together to ensure that no other
5635
  // operations can be scheduled in between. E.g. without flagging the
5636
  // copies together, a TOC access in the caller could be scheduled between
5637
  // the assignment of the callee TOC and the branch to the callee, which leads
5638
  // to incorrect code.
5639

5640
  // Start by loading the function address from the descriptor.
5641
  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5642
  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5643
                      ? (MachineMemOperand::MODereferenceable |
5644
                         MachineMemOperand::MOInvariant)
5645
                      : MachineMemOperand::MONone;
5646

5647
  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5648

5649
  // Registers used in building the DAG.
5650
  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5651
  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5652

5653
  // Offsets of descriptor members.
5654
  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5655
  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5656

5657
  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5658
  const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5659

5660
  // One load for the functions entry point address.
5661
  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5662
                                    Alignment, MMOFlags);
5663

5664
  // One for loading the TOC anchor for the module that contains the called
5665
  // function.
5666
  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5667
  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5668
  SDValue TOCPtr =
5669
      DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5670
                  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5671

5672
  // One for loading the environment pointer.
5673
  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5674
  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5675
  SDValue LoadEnvPtr =
5676
      DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5677
                  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5678

5679

5680
  // Then copy the newly loaded TOC anchor to the TOC pointer.
5681
  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5682
  Chain = TOCVal.getValue(0);
5683
  Glue = TOCVal.getValue(1);
5684

5685
  // If the function call has an explicit 'nest' parameter, it takes the
5686
  // place of the environment pointer.
5687
  assert((!hasNest || !Subtarget.isAIXABI()) &&
5688
         "Nest parameter is not supported on AIX.");
5689
  if (!hasNest) {
5690
    SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5691
    Chain = EnvVal.getValue(0);
5692
    Glue = EnvVal.getValue(1);
5693
  }
5694

5695
  // The rest of the indirect call sequence is the same as the non-descriptor
5696
  // DAG.
5697
  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5698
}
5699

5700
static void
5701
buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5702
                  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5703
                  SelectionDAG &DAG,
5704
                  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5705
                  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5706
                  const PPCSubtarget &Subtarget) {
5707
  const bool IsPPC64 = Subtarget.isPPC64();
5708
  // MVT for a general purpose register.
5709
  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5710

5711
  // First operand is always the chain.
5712
  Ops.push_back(Chain);
5713

5714
  // If it's a direct call pass the callee as the second operand.
5715
  if (!CFlags.IsIndirect)
5716
    Ops.push_back(Callee);
5717
  else {
5718
    assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5719

5720
    // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5721
    // on the stack (this would have been done in `LowerCall_64SVR4` or
5722
    // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5723
    // represents both the indirect branch and a load that restores the TOC
5724
    // pointer from the linkage area. The operand for the TOC restore is an add
5725
    // of the TOC save offset to the stack pointer. This must be the second
5726
    // operand: after the chain input but before any other variadic arguments.
5727
    // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5728
    // saved or used.
5729
    if (isTOCSaveRestoreRequired(Subtarget)) {
5730
      const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5731

5732
      SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5733
      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5734
      SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5735
      SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5736
      Ops.push_back(AddTOC);
5737
    }
5738

5739
    // Add the register used for the environment pointer.
5740
    if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5741
      Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5742
                                    RegVT));
5743

5744

5745
    // Add CTR register as callee so a bctr can be emitted later.
5746
    if (CFlags.IsTailCall)
5747
      Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5748
  }
5749

5750
  // If this is a tail call add stack pointer delta.
5751
  if (CFlags.IsTailCall)
5752
    Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5753

5754
  // Add argument registers to the end of the list so that they are known live
5755
  // into the call.
5756
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5757
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5758
                                  RegsToPass[i].second.getValueType()));
5759

5760
  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5761
  // no way to mark dependencies as implicit here.
5762
  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5763
  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5764
       !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5765
    Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5766

5767
  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5768
  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5769
    Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5770

5771
  // Add a register mask operand representing the call-preserved registers.
5772
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5773
  const uint32_t *Mask =
5774
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5775
  assert(Mask && "Missing call preserved mask for calling convention");
5776
  Ops.push_back(DAG.getRegisterMask(Mask));
5777

5778
  // If the glue is valid, it is the last operand.
5779
  if (Glue.getNode())
5780
    Ops.push_back(Glue);
5781
}
5782

5783
SDValue PPCTargetLowering::FinishCall(
5784
    CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5785
    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5786
    SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5787
    unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5788
    SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5789

5790
  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5791
      Subtarget.isAIXABI())
5792
    setUsesTOCBasePtr(DAG);
5793

5794
  unsigned CallOpc =
5795
      getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5796
                    Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5797

5798
  if (!CFlags.IsIndirect)
5799
    Callee = transformCallee(Callee, DAG, dl, Subtarget);
5800
  else if (Subtarget.usesFunctionDescriptors())
5801
    prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5802
                                  dl, CFlags.HasNest, Subtarget);
5803
  else
5804
    prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5805

5806
  // Build the operand list for the call instruction.
5807
  SmallVector<SDValue, 8> Ops;
5808
  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5809
                    SPDiff, Subtarget);
5810

5811
  // Emit tail call.
5812
  if (CFlags.IsTailCall) {
5813
    // Indirect tail call when using PC Relative calls do not have the same
5814
    // constraints.
5815
    assert(((Callee.getOpcode() == ISD::Register &&
5816
             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5817
            Callee.getOpcode() == ISD::TargetExternalSymbol ||
5818
            Callee.getOpcode() == ISD::TargetGlobalAddress ||
5819
            isa<ConstantSDNode>(Callee) ||
5820
            (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5821
           "Expecting a global address, external symbol, absolute value, "
5822
           "register or an indirect tail call when PC Relative calls are "
5823
           "used.");
5824
    // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5825
    assert(CallOpc == PPCISD::TC_RETURN &&
5826
           "Unexpected call opcode for a tail call.");
5827
    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5828
    SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5829
    DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5830
    return Ret;
5831
  }
5832

5833
  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5834
  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5835
  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5836
  Glue = Chain.getValue(1);
5837

5838
  // When performing tail call optimization the callee pops its arguments off
5839
  // the stack. Account for this here so these bytes can be pushed back on in
5840
  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5841
  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5842
                         getTargetMachine().Options.GuaranteedTailCallOpt)
5843
                            ? NumBytes
5844
                            : 0;
5845

5846
  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5847
  Glue = Chain.getValue(1);
5848

5849
  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5850
                         DAG, InVals);
5851
}
5852

5853
bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5854
  CallingConv::ID CalleeCC = CB->getCallingConv();
5855
  const Function *CallerFunc = CB->getCaller();
5856
  CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5857
  const Function *CalleeFunc = CB->getCalledFunction();
5858
  if (!CalleeFunc)
5859
    return false;
5860
  const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5861

5862
  SmallVector<ISD::OutputArg, 2> Outs;
5863
  SmallVector<ISD::InputArg, 2> Ins;
5864

5865
  GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5866
                CalleeFunc->getAttributes(), Outs, *this,
5867
                CalleeFunc->getDataLayout());
5868

5869
  return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5870
                          CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5871
                          false /*isCalleeExternalSymbol*/);
5872
}
5873

5874
bool PPCTargetLowering::isEligibleForTCO(
5875
    const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5876
    CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5877
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5878
    const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5879
    bool isCalleeExternalSymbol) const {
5880
  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5881
    return false;
5882

5883
  if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5884
    return IsEligibleForTailCallOptimization_64SVR4(
5885
        CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5886
        isCalleeExternalSymbol);
5887
  else
5888
    return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5889
                                             isVarArg, Ins);
5890
}
5891

5892
SDValue
5893
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5894
                             SmallVectorImpl<SDValue> &InVals) const {
5895
  SelectionDAG &DAG                     = CLI.DAG;
5896
  SDLoc &dl                             = CLI.DL;
5897
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5898
  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5899
  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5900
  SDValue Chain                         = CLI.Chain;
5901
  SDValue Callee                        = CLI.Callee;
5902
  bool &isTailCall                      = CLI.IsTailCall;
5903
  CallingConv::ID CallConv              = CLI.CallConv;
5904
  bool isVarArg                         = CLI.IsVarArg;
5905
  bool isPatchPoint                     = CLI.IsPatchPoint;
5906
  const CallBase *CB                    = CLI.CB;
5907

5908
  if (isTailCall) {
5909
    MachineFunction &MF = DAG.getMachineFunction();
5910
    CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5911
    auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5912
    const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5913
    bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5914

5915
    isTailCall =
5916
        isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5917
                         &(MF.getFunction()), IsCalleeExternalSymbol);
5918
    if (isTailCall) {
5919
      ++NumTailCalls;
5920
      if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5921
        ++NumSiblingCalls;
5922

5923
      // PC Relative calls no longer guarantee that the callee is a Global
5924
      // Address Node. The callee could be an indirect tail call in which
5925
      // case the SDValue for the callee could be a load (to load the address
5926
      // of a function pointer) or it may be a register copy (to move the
5927
      // address of the callee from a function parameter into a virtual
5928
      // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5929
      assert((Subtarget.isUsingPCRelativeCalls() ||
5930
              isa<GlobalAddressSDNode>(Callee)) &&
5931
             "Callee should be an llvm::Function object.");
5932

5933
      LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5934
                        << "\nTCO callee: ");
5935
      LLVM_DEBUG(Callee.dump());
5936
    }
5937
  }
5938

5939
  if (!isTailCall && CB && CB->isMustTailCall())
5940
    report_fatal_error("failed to perform tail call elimination on a call "
5941
                       "site marked musttail");
5942

5943
  // When long calls (i.e. indirect calls) are always used, calls are always
5944
  // made via function pointer. If we have a function name, first translate it
5945
  // into a pointer.
5946
  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5947
      !isTailCall)
5948
    Callee = LowerGlobalAddress(Callee, DAG);
5949

5950
  CallFlags CFlags(
5951
      CallConv, isTailCall, isVarArg, isPatchPoint,
5952
      isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5953
      // hasNest
5954
      Subtarget.is64BitELFABI() &&
5955
          any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5956
      CLI.NoMerge);
5957

5958
  if (Subtarget.isAIXABI())
5959
    return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5960
                         InVals, CB);
5961

5962
  assert(Subtarget.isSVR4ABI());
5963
  if (Subtarget.isPPC64())
5964
    return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5965
                            InVals, CB);
5966
  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5967
                          InVals, CB);
5968
}
5969

5970
SDValue PPCTargetLowering::LowerCall_32SVR4(
5971
    SDValue Chain, SDValue Callee, CallFlags CFlags,
5972
    const SmallVectorImpl<ISD::OutputArg> &Outs,
5973
    const SmallVectorImpl<SDValue> &OutVals,
5974
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5975
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5976
    const CallBase *CB) const {
5977
  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5978
  // of the 32-bit SVR4 ABI stack frame layout.
5979

5980
  const CallingConv::ID CallConv = CFlags.CallConv;
5981
  const bool IsVarArg = CFlags.IsVarArg;
5982
  const bool IsTailCall = CFlags.IsTailCall;
5983

5984
  assert((CallConv == CallingConv::C ||
5985
          CallConv == CallingConv::Cold ||
5986
          CallConv == CallingConv::Fast) && "Unknown calling convention!");
5987

5988
  const Align PtrAlign(4);
5989

5990
  MachineFunction &MF = DAG.getMachineFunction();
5991

5992
  // Mark this function as potentially containing a function that contains a
5993
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5994
  // and restoring the callers stack pointer in this functions epilog. This is
5995
  // done because by tail calling the called function might overwrite the value
5996
  // in this function's (MF) stack pointer stack slot 0(SP).
5997
  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5998
      CallConv == CallingConv::Fast)
5999
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6000

6001
  // Count how many bytes are to be pushed on the stack, including the linkage
6002
  // area, parameter list area and the part of the local variable space which
6003
  // contains copies of aggregates which are passed by value.
6004

6005
  // Assign locations to all of the outgoing arguments.
6006
  SmallVector<CCValAssign, 16> ArgLocs;
6007
  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6008

6009
  // Reserve space for the linkage area on the stack.
6010
  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6011
                       PtrAlign);
6012
  if (useSoftFloat())
6013
    CCInfo.PreAnalyzeCallOperands(Outs);
6014

6015
  if (IsVarArg) {
6016
    // Handle fixed and variable vector arguments differently.
6017
    // Fixed vector arguments go into registers as long as registers are
6018
    // available. Variable vector arguments always go into memory.
6019
    unsigned NumArgs = Outs.size();
6020

6021
    for (unsigned i = 0; i != NumArgs; ++i) {
6022
      MVT ArgVT = Outs[i].VT;
6023
      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6024
      bool Result;
6025

6026
      if (Outs[i].IsFixed) {
6027
        Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6028
                               CCInfo);
6029
      } else {
6030
        Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
6031
                                      ArgFlags, CCInfo);
6032
      }
6033

6034
      if (Result) {
6035
#ifndef NDEBUG
6036
        errs() << "Call operand #" << i << " has unhandled type "
6037
               << ArgVT << "\n";
6038
#endif
6039
        llvm_unreachable(nullptr);
6040
      }
6041
    }
6042
  } else {
6043
    // All arguments are treated the same.
6044
    CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6045
  }
6046
  CCInfo.clearWasPPCF128();
6047

6048
  // Assign locations to all of the outgoing aggregate by value arguments.
6049
  SmallVector<CCValAssign, 16> ByValArgLocs;
6050
  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6051

6052
  // Reserve stack space for the allocations in CCInfo.
6053
  CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6054

6055
  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6056

6057
  // Size of the linkage area, parameter list area and the part of the local
6058
  // space variable where copies of aggregates which are passed by value are
6059
  // stored.
6060
  unsigned NumBytes = CCByValInfo.getStackSize();
6061

6062
  // Calculate by how many bytes the stack has to be adjusted in case of tail
6063
  // call optimization.
6064
  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6065

6066
  // Adjust the stack pointer for the new arguments...
6067
  // These operations are automatically eliminated by the prolog/epilog pass
6068
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6069
  SDValue CallSeqStart = Chain;
6070

6071
  // Load the return address and frame pointer so it can be moved somewhere else
6072
  // later.
6073
  SDValue LROp, FPOp;
6074
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6075

6076
  // Set up a copy of the stack pointer for use loading and storing any
6077
  // arguments that may not fit in the registers available for argument
6078
  // passing.
6079
  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6080

6081
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6082
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6083
  SmallVector<SDValue, 8> MemOpChains;
6084

6085
  bool seenFloatArg = false;
6086
  // Walk the register/memloc assignments, inserting copies/loads.
6087
  // i - Tracks the index into the list of registers allocated for the call
6088
  // RealArgIdx - Tracks the index into the list of actual function arguments
6089
  // j - Tracks the index into the list of byval arguments
6090
  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6091
       i != e;
6092
       ++i, ++RealArgIdx) {
6093
    CCValAssign &VA = ArgLocs[i];
6094
    SDValue Arg = OutVals[RealArgIdx];
6095
    ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6096

6097
    if (Flags.isByVal()) {
6098
      // Argument is an aggregate which is passed by value, thus we need to
6099
      // create a copy of it in the local variable space of the current stack
6100
      // frame (which is the stack frame of the caller) and pass the address of
6101
      // this copy to the callee.
6102
      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6103
      CCValAssign &ByValVA = ByValArgLocs[j++];
6104
      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6105

6106
      // Memory reserved in the local variable space of the callers stack frame.
6107
      unsigned LocMemOffset = ByValVA.getLocMemOffset();
6108

6109
      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6110
      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6111
                           StackPtr, PtrOff);
6112

6113
      // Create a copy of the argument in the local area of the current
6114
      // stack frame.
6115
      SDValue MemcpyCall =
6116
        CreateCopyOfByValArgument(Arg, PtrOff,
6117
                                  CallSeqStart.getNode()->getOperand(0),
6118
                                  Flags, DAG, dl);
6119

6120
      // This must go outside the CALLSEQ_START..END.
6121
      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6122
                                                     SDLoc(MemcpyCall));
6123
      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6124
                             NewCallSeqStart.getNode());
6125
      Chain = CallSeqStart = NewCallSeqStart;
6126

6127
      // Pass the address of the aggregate copy on the stack either in a
6128
      // physical register or in the parameter list area of the current stack
6129
      // frame to the callee.
6130
      Arg = PtrOff;
6131
    }
6132

6133
    // When useCRBits() is true, there can be i1 arguments.
6134
    // It is because getRegisterType(MVT::i1) => MVT::i1,
6135
    // and for other integer types getRegisterType() => MVT::i32.
6136
    // Extend i1 and ensure callee will get i32.
6137
    if (Arg.getValueType() == MVT::i1)
6138
      Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6139
                        dl, MVT::i32, Arg);
6140

6141
    if (VA.isRegLoc()) {
6142
      seenFloatArg |= VA.getLocVT().isFloatingPoint();
6143
      // Put argument in a physical register.
6144
      if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6145
        bool IsLE = Subtarget.isLittleEndian();
6146
        SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6147
                        DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6148
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6149
        SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6150
                           DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6151
        RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6152
                             SVal.getValue(0)));
6153
      } else
6154
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6155
    } else {
6156
      // Put argument in the parameter list area of the current stack frame.
6157
      assert(VA.isMemLoc());
6158
      unsigned LocMemOffset = VA.getLocMemOffset();
6159

6160
      if (!IsTailCall) {
6161
        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6162
        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6163
                             StackPtr, PtrOff);
6164

6165
        MemOpChains.push_back(
6166
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6167
      } else {
6168
        // Calculate and remember argument location.
6169
        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6170
                                 TailCallArguments);
6171
      }
6172
    }
6173
  }
6174

6175
  if (!MemOpChains.empty())
6176
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6177

6178
  // Build a sequence of copy-to-reg nodes chained together with token chain
6179
  // and flag operands which copy the outgoing args into the appropriate regs.
6180
  SDValue InGlue;
6181
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6182
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6183
                             RegsToPass[i].second, InGlue);
6184
    InGlue = Chain.getValue(1);
6185
  }
6186

6187
  // Set CR bit 6 to true if this is a vararg call with floating args passed in
6188
  // registers.
6189
  if (IsVarArg) {
6190
    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6191
    SDValue Ops[] = { Chain, InGlue };
6192

6193
    Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6194
                        VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6195

6196
    InGlue = Chain.getValue(1);
6197
  }
6198

6199
  if (IsTailCall)
6200
    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6201
                    TailCallArguments);
6202

6203
  return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6204
                    Callee, SPDiff, NumBytes, Ins, InVals, CB);
6205
}
6206

6207
// Copy an argument into memory, being careful to do this outside the
6208
// call sequence for the call to which the argument belongs.
6209
SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6210
    SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6211
    SelectionDAG &DAG, const SDLoc &dl) const {
6212
  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6213
                        CallSeqStart.getNode()->getOperand(0),
6214
                        Flags, DAG, dl);
6215
  // The MEMCPY must go outside the CALLSEQ_START..END.
6216
  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6217
  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6218
                                                 SDLoc(MemcpyCall));
6219
  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6220
                         NewCallSeqStart.getNode());
6221
  return NewCallSeqStart;
6222
}
6223

6224
SDValue PPCTargetLowering::LowerCall_64SVR4(
6225
    SDValue Chain, SDValue Callee, CallFlags CFlags,
6226
    const SmallVectorImpl<ISD::OutputArg> &Outs,
6227
    const SmallVectorImpl<SDValue> &OutVals,
6228
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6229
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6230
    const CallBase *CB) const {
6231
  bool isELFv2ABI = Subtarget.isELFv2ABI();
6232
  bool isLittleEndian = Subtarget.isLittleEndian();
6233
  unsigned NumOps = Outs.size();
6234
  bool IsSibCall = false;
6235
  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6236

6237
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6238
  unsigned PtrByteSize = 8;
6239

6240
  MachineFunction &MF = DAG.getMachineFunction();
6241

6242
  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6243
    IsSibCall = true;
6244

6245
  // Mark this function as potentially containing a function that contains a
6246
  // tail call. As a consequence the frame pointer will be used for dynamicalloc
6247
  // and restoring the callers stack pointer in this functions epilog. This is
6248
  // done because by tail calling the called function might overwrite the value
6249
  // in this function's (MF) stack pointer stack slot 0(SP).
6250
  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6251
    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6252

6253
  assert(!(IsFastCall && CFlags.IsVarArg) &&
6254
         "fastcc not supported on varargs functions");
6255

6256
  // Count how many bytes are to be pushed on the stack, including the linkage
6257
  // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6258
  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6259
  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6260
  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6261
  unsigned NumBytes = LinkageSize;
6262
  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6263

6264
  static const MCPhysReg GPR[] = {
6265
    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6266
    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6267
  };
6268
  static const MCPhysReg VR[] = {
6269
    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6270
    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6271
  };
6272

6273
  const unsigned NumGPRs = std::size(GPR);
6274
  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6275
  const unsigned NumVRs = std::size(VR);
6276

6277
  // On ELFv2, we can avoid allocating the parameter area if all the arguments
6278
  // can be passed to the callee in registers.
6279
  // For the fast calling convention, there is another check below.
6280
  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6281
  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6282
  if (!HasParameterArea) {
6283
    unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6284
    unsigned AvailableFPRs = NumFPRs;
6285
    unsigned AvailableVRs = NumVRs;
6286
    unsigned NumBytesTmp = NumBytes;
6287
    for (unsigned i = 0; i != NumOps; ++i) {
6288
      if (Outs[i].Flags.isNest()) continue;
6289
      if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6290
                                 PtrByteSize, LinkageSize, ParamAreaSize,
6291
                                 NumBytesTmp, AvailableFPRs, AvailableVRs))
6292
        HasParameterArea = true;
6293
    }
6294
  }
6295

6296
  // When using the fast calling convention, we don't provide backing for
6297
  // arguments that will be in registers.
6298
  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6299

6300
  // Avoid allocating parameter area for fastcc functions if all the arguments
6301
  // can be passed in the registers.
6302
  if (IsFastCall)
6303
    HasParameterArea = false;
6304

6305
  // Add up all the space actually used.
6306
  for (unsigned i = 0; i != NumOps; ++i) {
6307
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6308
    EVT ArgVT = Outs[i].VT;
6309
    EVT OrigVT = Outs[i].ArgVT;
6310

6311
    if (Flags.isNest())
6312
      continue;
6313

6314
    if (IsFastCall) {
6315
      if (Flags.isByVal()) {
6316
        NumGPRsUsed += (Flags.getByValSize()+7)/8;
6317
        if (NumGPRsUsed > NumGPRs)
6318
          HasParameterArea = true;
6319
      } else {
6320
        switch (ArgVT.getSimpleVT().SimpleTy) {
6321
        default: llvm_unreachable("Unexpected ValueType for argument!");
6322
        case MVT::i1:
6323
        case MVT::i32:
6324
        case MVT::i64:
6325
          if (++NumGPRsUsed <= NumGPRs)
6326
            continue;
6327
          break;
6328
        case MVT::v4i32:
6329
        case MVT::v8i16:
6330
        case MVT::v16i8:
6331
        case MVT::v2f64:
6332
        case MVT::v2i64:
6333
        case MVT::v1i128:
6334
        case MVT::f128:
6335
          if (++NumVRsUsed <= NumVRs)
6336
            continue;
6337
          break;
6338
        case MVT::v4f32:
6339
          if (++NumVRsUsed <= NumVRs)
6340
            continue;
6341
          break;
6342
        case MVT::f32:
6343
        case MVT::f64:
6344
          if (++NumFPRsUsed <= NumFPRs)
6345
            continue;
6346
          break;
6347
        }
6348
        HasParameterArea = true;
6349
      }
6350
    }
6351

6352
    /* Respect alignment of argument on the stack.  */
6353
    auto Alignement =
6354
        CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6355
    NumBytes = alignTo(NumBytes, Alignement);
6356

6357
    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6358
    if (Flags.isInConsecutiveRegsLast())
6359
      NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6360
  }
6361

6362
  unsigned NumBytesActuallyUsed = NumBytes;
6363

6364
  // In the old ELFv1 ABI,
6365
  // the prolog code of the callee may store up to 8 GPR argument registers to
6366
  // the stack, allowing va_start to index over them in memory if its varargs.
6367
  // Because we cannot tell if this is needed on the caller side, we have to
6368
  // conservatively assume that it is needed.  As such, make sure we have at
6369
  // least enough stack space for the caller to store the 8 GPRs.
6370
  // In the ELFv2 ABI, we allocate the parameter area iff a callee
6371
  // really requires memory operands, e.g. a vararg function.
6372
  if (HasParameterArea)
6373
    NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6374
  else
6375
    NumBytes = LinkageSize;
6376

6377
  // Tail call needs the stack to be aligned.
6378
  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6379
    NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6380

6381
  int SPDiff = 0;
6382

6383
  // Calculate by how many bytes the stack has to be adjusted in case of tail
6384
  // call optimization.
6385
  if (!IsSibCall)
6386
    SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6387

6388
  // To protect arguments on the stack from being clobbered in a tail call,
6389
  // force all the loads to happen before doing any other lowering.
6390
  if (CFlags.IsTailCall)
6391
    Chain = DAG.getStackArgumentTokenFactor(Chain);
6392

6393
  // Adjust the stack pointer for the new arguments...
6394
  // These operations are automatically eliminated by the prolog/epilog pass
6395
  if (!IsSibCall)
6396
    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6397
  SDValue CallSeqStart = Chain;
6398

6399
  // Load the return address and frame pointer so it can be move somewhere else
6400
  // later.
6401
  SDValue LROp, FPOp;
6402
  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6403

6404
  // Set up a copy of the stack pointer for use loading and storing any
6405
  // arguments that may not fit in the registers available for argument
6406
  // passing.
6407
  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6408

6409
  // Figure out which arguments are going to go in registers, and which in
6410
  // memory.  Also, if this is a vararg function, floating point operations
6411
  // must be stored to our stack, and loaded into integer regs as well, if
6412
  // any integer regs are available for argument passing.
6413
  unsigned ArgOffset = LinkageSize;
6414

6415
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6416
  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6417

6418
  SmallVector<SDValue, 8> MemOpChains;
6419
  for (unsigned i = 0; i != NumOps; ++i) {
6420
    SDValue Arg = OutVals[i];
6421
    ISD::ArgFlagsTy Flags = Outs[i].Flags;
6422
    EVT ArgVT = Outs[i].VT;
6423
    EVT OrigVT = Outs[i].ArgVT;
6424

6425
    // PtrOff will be used to store the current argument to the stack if a
6426
    // register cannot be found for it.
6427
    SDValue PtrOff;
6428

6429
    // We re-align the argument offset for each argument, except when using the
6430
    // fast calling convention, when we need to make sure we do that only when
6431
    // we'll actually use a stack slot.
6432
    auto ComputePtrOff = [&]() {
6433
      /* Respect alignment of argument on the stack.  */
6434
      auto Alignment =
6435
          CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6436
      ArgOffset = alignTo(ArgOffset, Alignment);
6437

6438
      PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6439

6440
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6441
    };
6442

6443
    if (!IsFastCall) {
6444
      ComputePtrOff();
6445

6446
      /* Compute GPR index associated with argument offset.  */
6447
      GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6448
      GPR_idx = std::min(GPR_idx, NumGPRs);
6449
    }
6450

6451
    // Promote integers to 64-bit values.
6452
    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6453
      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6454
      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6455
      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6456
    }
6457

6458
    // FIXME memcpy is used way more than necessary.  Correctness first.
6459
    // Note: "by value" is code for passing a structure by value, not
6460
    // basic types.
6461
    if (Flags.isByVal()) {
6462
      // Note: Size includes alignment padding, so
6463
      //   struct x { short a; char b; }
6464
      // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6465
      // These are the proper values we need for right-justifying the
6466
      // aggregate in a parameter register.
6467
      unsigned Size = Flags.getByValSize();
6468

6469
      // An empty aggregate parameter takes up no storage and no
6470
      // registers.
6471
      if (Size == 0)
6472
        continue;
6473

6474
      if (IsFastCall)
6475
        ComputePtrOff();
6476

6477
      // All aggregates smaller than 8 bytes must be passed right-justified.
6478
      if (Size==1 || Size==2 || Size==4) {
6479
        EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6480
        if (GPR_idx != NumGPRs) {
6481
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6482
                                        MachinePointerInfo(), VT);
6483
          MemOpChains.push_back(Load.getValue(1));
6484
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6485

6486
          ArgOffset += PtrByteSize;
6487
          continue;
6488
        }
6489
      }
6490

6491
      if (GPR_idx == NumGPRs && Size < 8) {
6492
        SDValue AddPtr = PtrOff;
6493
        if (!isLittleEndian) {
6494
          SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6495
                                          PtrOff.getValueType());
6496
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6497
        }
6498
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6499
                                                          CallSeqStart,
6500
                                                          Flags, DAG, dl);
6501
        ArgOffset += PtrByteSize;
6502
        continue;
6503
      }
6504
      // Copy the object to parameter save area if it can not be entirely passed 
6505
      // by registers.
6506
      // FIXME: we only need to copy the parts which need to be passed in
6507
      // parameter save area. For the parts passed by registers, we don't need
6508
      // to copy them to the stack although we need to allocate space for them
6509
      // in parameter save area.
6510
      if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6511
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6512
                                                          CallSeqStart,
6513
                                                          Flags, DAG, dl);
6514

6515
      // When a register is available, pass a small aggregate right-justified.
6516
      if (Size < 8 && GPR_idx != NumGPRs) {
6517
        // The easiest way to get this right-justified in a register
6518
        // is to copy the structure into the rightmost portion of a
6519
        // local variable slot, then load the whole slot into the
6520
        // register.
6521
        // FIXME: The memcpy seems to produce pretty awful code for
6522
        // small aggregates, particularly for packed ones.
6523
        // FIXME: It would be preferable to use the slot in the
6524
        // parameter save area instead of a new local variable.
6525
        SDValue AddPtr = PtrOff;
6526
        if (!isLittleEndian) {
6527
          SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6528
          AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6529
        }
6530
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6531
                                                          CallSeqStart,
6532
                                                          Flags, DAG, dl);
6533

6534
        // Load the slot into the register.
6535
        SDValue Load =
6536
            DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6537
        MemOpChains.push_back(Load.getValue(1));
6538
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6539

6540
        // Done with this argument.
6541
        ArgOffset += PtrByteSize;
6542
        continue;
6543
      }
6544

6545
      // For aggregates larger than PtrByteSize, copy the pieces of the
6546
      // object that fit into registers from the parameter save area.
6547
      for (unsigned j=0; j<Size; j+=PtrByteSize) {
6548
        SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6549
        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6550
        if (GPR_idx != NumGPRs) {
6551
          unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6552
          EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6553
          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6554
                                        MachinePointerInfo(), ObjType);
6555

6556
          MemOpChains.push_back(Load.getValue(1));
6557
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6558
          ArgOffset += PtrByteSize;
6559
        } else {
6560
          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6561
          break;
6562
        }
6563
      }
6564
      continue;
6565
    }
6566

6567
    switch (Arg.getSimpleValueType().SimpleTy) {
6568
    default: llvm_unreachable("Unexpected ValueType for argument!");
6569
    case MVT::i1:
6570
    case MVT::i32:
6571
    case MVT::i64:
6572
      if (Flags.isNest()) {
6573
        // The 'nest' parameter, if any, is passed in R11.
6574
        RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6575
        break;
6576
      }
6577

6578
      // These can be scalar arguments or elements of an integer array type
6579
      // passed directly.  Clang may use those instead of "byval" aggregate
6580
      // types to avoid forcing arguments to memory unnecessarily.
6581
      if (GPR_idx != NumGPRs) {
6582
        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6583
      } else {
6584
        if (IsFastCall)
6585
          ComputePtrOff();
6586

6587
        assert(HasParameterArea &&
6588
               "Parameter area must exist to pass an argument in memory.");
6589
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6590
                         true, CFlags.IsTailCall, false, MemOpChains,
6591
                         TailCallArguments, dl);
6592
        if (IsFastCall)
6593
          ArgOffset += PtrByteSize;
6594
      }
6595
      if (!IsFastCall)
6596
        ArgOffset += PtrByteSize;
6597
      break;
6598
    case MVT::f32:
6599
    case MVT::f64: {
6600
      // These can be scalar arguments or elements of a float array type
6601
      // passed directly.  The latter are used to implement ELFv2 homogenous
6602
      // float aggregates.
6603

6604
      // Named arguments go into FPRs first, and once they overflow, the
6605
      // remaining arguments go into GPRs and then the parameter save area.
6606
      // Unnamed arguments for vararg functions always go to GPRs and
6607
      // then the parameter save area.  For now, put all arguments to vararg
6608
      // routines always in both locations (FPR *and* GPR or stack slot).
6609
      bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6610
      bool NeededLoad = false;
6611

6612
      // First load the argument into the next available FPR.
6613
      if (FPR_idx != NumFPRs)
6614
        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6615

6616
      // Next, load the argument into GPR or stack slot if needed.
6617
      if (!NeedGPROrStack)
6618
        ;
6619
      else if (GPR_idx != NumGPRs && !IsFastCall) {
6620
        // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6621
        // once we support fp <-> gpr moves.
6622

6623
        // In the non-vararg case, this can only ever happen in the
6624
        // presence of f32 array types, since otherwise we never run
6625
        // out of FPRs before running out of GPRs.
6626
        SDValue ArgVal;
6627

6628
        // Double values are always passed in a single GPR.
6629
        if (Arg.getValueType() != MVT::f32) {
6630
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6631

6632
        // Non-array float values are extended and passed in a GPR.
6633
        } else if (!Flags.isInConsecutiveRegs()) {
6634
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6635
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6636

6637
        // If we have an array of floats, we collect every odd element
6638
        // together with its predecessor into one GPR.
6639
        } else if (ArgOffset % PtrByteSize != 0) {
6640
          SDValue Lo, Hi;
6641
          Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6642
          Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6643
          if (!isLittleEndian)
6644
            std::swap(Lo, Hi);
6645
          ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6646

6647
        // The final element, if even, goes into the first half of a GPR.
6648
        } else if (Flags.isInConsecutiveRegsLast()) {
6649
          ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6650
          ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6651
          if (!isLittleEndian)
6652
            ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6653
                                 DAG.getConstant(32, dl, MVT::i32));
6654

6655
        // Non-final even elements are skipped; they will be handled
6656
        // together the with subsequent argument on the next go-around.
6657
        } else
6658
          ArgVal = SDValue();
6659

6660
        if (ArgVal.getNode())
6661
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6662
      } else {
6663
        if (IsFastCall)
6664
          ComputePtrOff();
6665

6666
        // Single-precision floating-point values are mapped to the
6667
        // second (rightmost) word of the stack doubleword.
6668
        if (Arg.getValueType() == MVT::f32 &&
6669
            !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6670
          SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6671
          PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6672
        }
6673

6674
        assert(HasParameterArea &&
6675
               "Parameter area must exist to pass an argument in memory.");
6676
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6677
                         true, CFlags.IsTailCall, false, MemOpChains,
6678
                         TailCallArguments, dl);
6679

6680
        NeededLoad = true;
6681
      }
6682
      // When passing an array of floats, the array occupies consecutive
6683
      // space in the argument area; only round up to the next doubleword
6684
      // at the end of the array.  Otherwise, each float takes 8 bytes.
6685
      if (!IsFastCall || NeededLoad) {
6686
        ArgOffset += (Arg.getValueType() == MVT::f32 &&
6687
                      Flags.isInConsecutiveRegs()) ? 4 : 8;
6688
        if (Flags.isInConsecutiveRegsLast())
6689
          ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6690
      }
6691
      break;
6692
    }
6693
    case MVT::v4f32:
6694
    case MVT::v4i32:
6695
    case MVT::v8i16:
6696
    case MVT::v16i8:
6697
    case MVT::v2f64:
6698
    case MVT::v2i64:
6699
    case MVT::v1i128:
6700
    case MVT::f128:
6701
      // These can be scalar arguments or elements of a vector array type
6702
      // passed directly.  The latter are used to implement ELFv2 homogenous
6703
      // vector aggregates.
6704

6705
      // For a varargs call, named arguments go into VRs or on the stack as
6706
      // usual; unnamed arguments always go to the stack or the corresponding
6707
      // GPRs when within range.  For now, we always put the value in both
6708
      // locations (or even all three).
6709
      if (CFlags.IsVarArg) {
6710
        assert(HasParameterArea &&
6711
               "Parameter area must exist if we have a varargs call.");
6712
        // We could elide this store in the case where the object fits
6713
        // entirely in R registers.  Maybe later.
6714
        SDValue Store =
6715
            DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6716
        MemOpChains.push_back(Store);
6717
        if (VR_idx != NumVRs) {
6718
          SDValue Load =
6719
              DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6720
          MemOpChains.push_back(Load.getValue(1));
6721
          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6722
        }
6723
        ArgOffset += 16;
6724
        for (unsigned i=0; i<16; i+=PtrByteSize) {
6725
          if (GPR_idx == NumGPRs)
6726
            break;
6727
          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6728
                                   DAG.getConstant(i, dl, PtrVT));
6729
          SDValue Load =
6730
              DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6731
          MemOpChains.push_back(Load.getValue(1));
6732
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6733
        }
6734
        break;
6735
      }
6736

6737
      // Non-varargs Altivec params go into VRs or on the stack.
6738
      if (VR_idx != NumVRs) {
6739
        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6740
      } else {
6741
        if (IsFastCall)
6742
          ComputePtrOff();
6743

6744
        assert(HasParameterArea &&
6745
               "Parameter area must exist to pass an argument in memory.");
6746
        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6747
                         true, CFlags.IsTailCall, true, MemOpChains,
6748
                         TailCallArguments, dl);
6749
        if (IsFastCall)
6750
          ArgOffset += 16;
6751
      }
6752

6753
      if (!IsFastCall)
6754
        ArgOffset += 16;
6755
      break;
6756
    }
6757
  }
6758

6759
  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6760
         "mismatch in size of parameter area");
6761
  (void)NumBytesActuallyUsed;
6762

6763
  if (!MemOpChains.empty())
6764
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6765

6766
  // Check if this is an indirect call (MTCTR/BCTRL).
6767
  // See prepareDescriptorIndirectCall and buildCallOperands for more
6768
  // information about calls through function pointers in the 64-bit SVR4 ABI.
6769
  if (CFlags.IsIndirect) {
6770
    // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6771
    // caller in the TOC save area.
6772
    if (isTOCSaveRestoreRequired(Subtarget)) {
6773
      assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6774
      // Load r2 into a virtual register and store it to the TOC save area.
6775
      setUsesTOCBasePtr(DAG);
6776
      SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6777
      // TOC save area offset.
6778
      unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6779
      SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6780
      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6781
      Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6782
                           MachinePointerInfo::getStack(
6783
                               DAG.getMachineFunction(), TOCSaveOffset));
6784
    }
6785
    // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6786
    // This does not mean the MTCTR instruction must use R12; it's easier
6787
    // to model this as an extra parameter, so do that.
6788
    if (isELFv2ABI && !CFlags.IsPatchPoint)
6789
      RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6790
  }
6791

6792
  // Build a sequence of copy-to-reg nodes chained together with token chain
6793
  // and flag operands which copy the outgoing args into the appropriate regs.
6794
  SDValue InGlue;
6795
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6796
    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6797
                             RegsToPass[i].second, InGlue);
6798
    InGlue = Chain.getValue(1);
6799
  }
6800

6801
  if (CFlags.IsTailCall && !IsSibCall)
6802
    PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6803
                    TailCallArguments);
6804

6805
  return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6806
                    Callee, SPDiff, NumBytes, Ins, InVals, CB);
6807
}
6808

6809
// Returns true when the shadow of a general purpose argument register
6810
// in the parameter save area is aligned to at least 'RequiredAlign'.
6811
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6812
  assert(RequiredAlign.value() <= 16 &&
6813
         "Required alignment greater than stack alignment.");
6814
  switch (Reg) {
6815
  default:
6816
    report_fatal_error("called on invalid register.");
6817
  case PPC::R5:
6818
  case PPC::R9:
6819
  case PPC::X3:
6820
  case PPC::X5:
6821
  case PPC::X7:
6822
  case PPC::X9:
6823
    // These registers are 16 byte aligned which is the most strict aligment
6824
    // we can support.
6825
    return true;
6826
  case PPC::R3:
6827
  case PPC::R7:
6828
  case PPC::X4:
6829
  case PPC::X6:
6830
  case PPC::X8:
6831
  case PPC::X10:
6832
    // The shadow of these registers in the PSA is 8 byte aligned.
6833
    return RequiredAlign <= 8;
6834
  case PPC::R4:
6835
  case PPC::R6:
6836
  case PPC::R8:
6837
  case PPC::R10:
6838
    return RequiredAlign <= 4;
6839
  }
6840
}
6841

6842
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6843
                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6844
                   CCState &S) {
6845
  AIXCCState &State = static_cast<AIXCCState &>(S);
6846
  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6847
      State.getMachineFunction().getSubtarget());
6848
  const bool IsPPC64 = Subtarget.isPPC64();
6849
  const unsigned PtrSize = IsPPC64 ? 8 : 4;
6850
  const Align PtrAlign(PtrSize);
6851
  const Align StackAlign(16);
6852
  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6853

6854
  if (ValVT == MVT::f128)
6855
    report_fatal_error("f128 is unimplemented on AIX.");
6856

6857
  if (ArgFlags.isNest())
6858
    report_fatal_error("Nest arguments are unimplemented.");
6859

6860
  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6861
                                     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6862
                                     PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6863
  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6864
                                     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6865
                                     PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6866

6867
  static const MCPhysReg VR[] = {// Vector registers.
6868
                                 PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6869
                                 PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6870
                                 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6871

6872
  const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6873

6874
  if (ArgFlags.isByVal()) {
6875
    const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6876
    if (ByValAlign > StackAlign)
6877
      report_fatal_error("Pass-by-value arguments with alignment greater than "
6878
                         "16 are not supported.");
6879

6880
    const unsigned ByValSize = ArgFlags.getByValSize();
6881
    const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6882

6883
    // An empty aggregate parameter takes up no storage and no registers,
6884
    // but needs a MemLoc for a stack slot for the formal arguments side.
6885
    if (ByValSize == 0) {
6886
      State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6887
                                       State.getStackSize(), RegVT, LocInfo));
6888
      return false;
6889
    }
6890

6891
    // Shadow allocate any registers that are not properly aligned.
6892
    unsigned NextReg = State.getFirstUnallocated(GPRs);
6893
    while (NextReg != GPRs.size() &&
6894
           !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6895
      // Shadow allocate next registers since its aligment is not strict enough.
6896
      unsigned Reg = State.AllocateReg(GPRs);
6897
      // Allocate the stack space shadowed by said register.
6898
      State.AllocateStack(PtrSize, PtrAlign);
6899
      assert(Reg && "Alocating register unexpectedly failed.");
6900
      (void)Reg;
6901
      NextReg = State.getFirstUnallocated(GPRs);
6902
    }
6903

6904
    const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6905
    unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6906
    for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6907
      if (unsigned Reg = State.AllocateReg(GPRs))
6908
        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6909
      else {
6910
        State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6911
                                         Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6912
                                         LocInfo));
6913
        break;
6914
      }
6915
    }
6916
    return false;
6917
  }
6918

6919
  // Arguments always reserve parameter save area.
6920
  switch (ValVT.SimpleTy) {
6921
  default:
6922
    report_fatal_error("Unhandled value type for argument.");
6923
  case MVT::i64:
6924
    // i64 arguments should have been split to i32 for PPC32.
6925
    assert(IsPPC64 && "PPC32 should have split i64 values.");
6926
    [[fallthrough]];
6927
  case MVT::i1:
6928
  case MVT::i32: {
6929
    const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6930
    // AIX integer arguments are always passed in register width.
6931
    if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6932
      LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6933
                                  : CCValAssign::LocInfo::ZExt;
6934
    if (unsigned Reg = State.AllocateReg(GPRs))
6935
      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6936
    else
6937
      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6938

6939
    return false;
6940
  }
6941
  case MVT::f32:
6942
  case MVT::f64: {
6943
    // Parameter save area (PSA) is reserved even if the float passes in fpr.
6944
    const unsigned StoreSize = LocVT.getStoreSize();
6945
    // Floats are always 4-byte aligned in the PSA on AIX.
6946
    // This includes f64 in 64-bit mode for ABI compatibility.
6947
    const unsigned Offset =
6948
        State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6949
    unsigned FReg = State.AllocateReg(FPR);
6950
    if (FReg)
6951
      State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6952

6953
    // Reserve and initialize GPRs or initialize the PSA as required.
6954
    for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6955
      if (unsigned Reg = State.AllocateReg(GPRs)) {
6956
        assert(FReg && "An FPR should be available when a GPR is reserved.");
6957
        if (State.isVarArg()) {
6958
          // Successfully reserved GPRs are only initialized for vararg calls.
6959
          // Custom handling is required for:
6960
          //   f64 in PPC32 needs to be split into 2 GPRs.
6961
          //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6962
          State.addLoc(
6963
              CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6964
        }
6965
      } else {
6966
        // If there are insufficient GPRs, the PSA needs to be initialized.
6967
        // Initialization occurs even if an FPR was initialized for
6968
        // compatibility with the AIX XL compiler. The full memory for the
6969
        // argument will be initialized even if a prior word is saved in GPR.
6970
        // A custom memLoc is used when the argument also passes in FPR so
6971
        // that the callee handling can skip over it easily.
6972
        State.addLoc(
6973
            FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6974
                                             LocInfo)
6975
                 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6976
        break;
6977
      }
6978
    }
6979

6980
    return false;
6981
  }
6982
  case MVT::v4f32:
6983
  case MVT::v4i32:
6984
  case MVT::v8i16:
6985
  case MVT::v16i8:
6986
  case MVT::v2i64:
6987
  case MVT::v2f64:
6988
  case MVT::v1i128: {
6989
    const unsigned VecSize = 16;
6990
    const Align VecAlign(VecSize);
6991

6992
    if (!State.isVarArg()) {
6993
      // If there are vector registers remaining we don't consume any stack
6994
      // space.
6995
      if (unsigned VReg = State.AllocateReg(VR)) {
6996
        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6997
        return false;
6998
      }
6999
      // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7000
      // might be allocated in the portion of the PSA that is shadowed by the
7001
      // GPRs.
7002
      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7003
      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7004
      return false;
7005
    }
7006

7007
    unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7008
    // Burn any underaligned registers and their shadowed stack space until
7009
    // we reach the required alignment.
7010
    while (NextRegIndex != GPRs.size() &&
7011
           !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7012
      // Shadow allocate register and its stack shadow.
7013
      unsigned Reg = State.AllocateReg(GPRs);
7014
      State.AllocateStack(PtrSize, PtrAlign);
7015
      assert(Reg && "Allocating register unexpectedly failed.");
7016
      (void)Reg;
7017
      NextRegIndex = State.getFirstUnallocated(GPRs);
7018
    }
7019

7020
    // Vectors that are passed as fixed arguments are handled differently.
7021
    // They are passed in VRs if any are available (unlike arguments passed
7022
    // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7023
    // functions)
7024
    if (State.isFixed(ValNo)) {
7025
      if (unsigned VReg = State.AllocateReg(VR)) {
7026
        State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7027
        // Shadow allocate GPRs and stack space even though we pass in a VR.
7028
        for (unsigned I = 0; I != VecSize; I += PtrSize)
7029
          State.AllocateReg(GPRs);
7030
        State.AllocateStack(VecSize, VecAlign);
7031
        return false;
7032
      }
7033
      // No vector registers remain so pass on the stack.
7034
      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7035
      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7036
      return false;
7037
    }
7038

7039
    // If all GPRS are consumed then we pass the argument fully on the stack.
7040
    if (NextRegIndex == GPRs.size()) {
7041
      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7042
      State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7043
      return false;
7044
    }
7045

7046
    // Corner case for 32-bit codegen. We have 2 registers to pass the first
7047
    // half of the argument, and then need to pass the remaining half on the
7048
    // stack.
7049
    if (GPRs[NextRegIndex] == PPC::R9) {
7050
      const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7051
      State.addLoc(
7052
          CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053

7054
      const unsigned FirstReg = State.AllocateReg(PPC::R9);
7055
      const unsigned SecondReg = State.AllocateReg(PPC::R10);
7056
      assert(FirstReg && SecondReg &&
7057
             "Allocating R9 or R10 unexpectedly failed.");
7058
      State.addLoc(
7059
          CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7060
      State.addLoc(
7061
          CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7062
      return false;
7063
    }
7064

7065
    // We have enough GPRs to fully pass the vector argument, and we have
7066
    // already consumed any underaligned registers. Start with the custom
7067
    // MemLoc and then the custom RegLocs.
7068
    const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7069
    State.addLoc(
7070
        CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7071
    for (unsigned I = 0; I != VecSize; I += PtrSize) {
7072
      const unsigned Reg = State.AllocateReg(GPRs);
7073
      assert(Reg && "Failed to allocated register for vararg vector argument");
7074
      State.addLoc(
7075
          CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7076
    }
7077
    return false;
7078
  }
7079
  }
7080
  return true;
7081
}
7082

7083
// So far, this function is only used by LowerFormalArguments_AIX()
7084
static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7085
                                                    bool IsPPC64,
7086
                                                    bool HasP8Vector,
7087
                                                    bool HasVSX) {
7088
  assert((IsPPC64 || SVT != MVT::i64) &&
7089
         "i64 should have been split for 32-bit codegen.");
7090

7091
  switch (SVT) {
7092
  default:
7093
    report_fatal_error("Unexpected value type for formal argument");
7094
  case MVT::i1:
7095
  case MVT::i32:
7096
  case MVT::i64:
7097
    return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7098
  case MVT::f32:
7099
    return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7100
  case MVT::f64:
7101
    return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7102
  case MVT::v4f32:
7103
  case MVT::v4i32:
7104
  case MVT::v8i16:
7105
  case MVT::v16i8:
7106
  case MVT::v2i64:
7107
  case MVT::v2f64:
7108
  case MVT::v1i128:
7109
    return &PPC::VRRCRegClass;
7110
  }
7111
}
7112

7113
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7114
                                        SelectionDAG &DAG, SDValue ArgValue,
7115
                                        MVT LocVT, const SDLoc &dl) {
7116
  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7117
  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7118

7119
  if (Flags.isSExt())
7120
    ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7121
                           DAG.getValueType(ValVT));
7122
  else if (Flags.isZExt())
7123
    ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7124
                           DAG.getValueType(ValVT));
7125

7126
  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7127
}
7128

7129
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7130
  const unsigned LASize = FL->getLinkageSize();
7131

7132
  if (PPC::GPRCRegClass.contains(Reg)) {
7133
    assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7134
           "Reg must be a valid argument register!");
7135
    return LASize + 4 * (Reg - PPC::R3);
7136
  }
7137

7138
  if (PPC::G8RCRegClass.contains(Reg)) {
7139
    assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7140
           "Reg must be a valid argument register!");
7141
    return LASize + 8 * (Reg - PPC::X3);
7142
  }
7143

7144
  llvm_unreachable("Only general purpose registers expected.");
7145
}
7146

7147
//   AIX ABI Stack Frame Layout:
7148
//
7149
//   Low Memory +--------------------------------------------+
7150
//   SP   +---> | Back chain                                 | ---+
7151
//        |     +--------------------------------------------+    |   
7152
//        |     | Saved Condition Register                   |    |
7153
//        |     +--------------------------------------------+    |
7154
//        |     | Saved Linkage Register                     |    |
7155
//        |     +--------------------------------------------+    | Linkage Area
7156
//        |     | Reserved for compilers                     |    |
7157
//        |     +--------------------------------------------+    |
7158
//        |     | Reserved for binders                       |    |
7159
//        |     +--------------------------------------------+    |
7160
//        |     | Saved TOC pointer                          | ---+
7161
//        |     +--------------------------------------------+
7162
//        |     | Parameter save area                        |
7163
//        |     +--------------------------------------------+
7164
//        |     | Alloca space                               |
7165
//        |     +--------------------------------------------+
7166
//        |     | Local variable space                       |
7167
//        |     +--------------------------------------------+
7168
//        |     | Float/int conversion temporary             |
7169
//        |     +--------------------------------------------+
7170
//        |     | Save area for AltiVec registers            |
7171
//        |     +--------------------------------------------+
7172
//        |     | AltiVec alignment padding                  |
7173
//        |     +--------------------------------------------+
7174
//        |     | Save area for VRSAVE register              |
7175
//        |     +--------------------------------------------+
7176
//        |     | Save area for General Purpose registers    |
7177
//        |     +--------------------------------------------+
7178
//        |     | Save area for Floating Point registers     |
7179
//        |     +--------------------------------------------+
7180
//        +---- | Back chain                                 |
7181
// High Memory  +--------------------------------------------+
7182
//
7183
//  Specifications:
7184
//  AIX 7.2 Assembler Language Reference
7185
//  Subroutine linkage convention
7186

7187
SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7188
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7189
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7190
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7191

7192
  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7193
          CallConv == CallingConv::Fast) &&
7194
         "Unexpected calling convention!");
7195

7196
  if (getTargetMachine().Options.GuaranteedTailCallOpt)
7197
    report_fatal_error("Tail call support is unimplemented on AIX.");
7198

7199
  if (useSoftFloat())
7200
    report_fatal_error("Soft float support is unimplemented on AIX.");
7201

7202
  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7203

7204
  const bool IsPPC64 = Subtarget.isPPC64();
7205
  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7206

7207
  // Assign locations to all of the incoming arguments.
7208
  SmallVector<CCValAssign, 16> ArgLocs;
7209
  MachineFunction &MF = DAG.getMachineFunction();
7210
  MachineFrameInfo &MFI = MF.getFrameInfo();
7211
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7212
  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7213

7214
  const EVT PtrVT = getPointerTy(MF.getDataLayout());
7215
  // Reserve space for the linkage area on the stack.
7216
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7217
  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7218
  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7219

7220
  SmallVector<SDValue, 8> MemOps;
7221

7222
  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7223
    CCValAssign &VA = ArgLocs[I++];
7224
    MVT LocVT = VA.getLocVT();
7225
    MVT ValVT = VA.getValVT();
7226
    ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7227
    // For compatibility with the AIX XL compiler, the float args in the
7228
    // parameter save area are initialized even if the argument is available
7229
    // in register.  The caller is required to initialize both the register
7230
    // and memory, however, the callee can choose to expect it in either.
7231
    // The memloc is dismissed here because the argument is retrieved from
7232
    // the register.
7233
    if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7234
      continue;
7235

7236
    auto HandleMemLoc = [&]() {
7237
      const unsigned LocSize = LocVT.getStoreSize();
7238
      const unsigned ValSize = ValVT.getStoreSize();
7239
      assert((ValSize <= LocSize) &&
7240
             "Object size is larger than size of MemLoc");
7241
      int CurArgOffset = VA.getLocMemOffset();
7242
      // Objects are right-justified because AIX is big-endian.
7243
      if (LocSize > ValSize)
7244
        CurArgOffset += LocSize - ValSize;
7245
      // Potential tail calls could cause overwriting of argument stack slots.
7246
      const bool IsImmutable =
7247
          !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7248
            (CallConv == CallingConv::Fast));
7249
      int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7250
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7251
      SDValue ArgValue =
7252
          DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7253
      InVals.push_back(ArgValue);
7254
    };
7255

7256
    // Vector arguments to VaArg functions are passed both on the stack, and
7257
    // in any available GPRs. Load the value from the stack and add the GPRs
7258
    // as live ins.
7259
    if (VA.isMemLoc() && VA.needsCustom()) {
7260
      assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7261
      assert(isVarArg && "Only use custom memloc for vararg.");
7262
      // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7263
      // matching custom RegLocs.
7264
      const unsigned OriginalValNo = VA.getValNo();
7265
      (void)OriginalValNo;
7266

7267
      auto HandleCustomVecRegLoc = [&]() {
7268
        assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7269
               "Missing custom RegLoc.");
7270
        VA = ArgLocs[I++];
7271
        assert(VA.getValVT().isVector() &&
7272
               "Unexpected Val type for custom RegLoc.");
7273
        assert(VA.getValNo() == OriginalValNo &&
7274
               "ValNo mismatch between custom MemLoc and RegLoc.");
7275
        MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7276
        MF.addLiveIn(VA.getLocReg(),
7277
                     getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7278
                                       Subtarget.hasVSX()));
7279
      };
7280

7281
      HandleMemLoc();
7282
      // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7283
      // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7284
      // R10.
7285
      HandleCustomVecRegLoc();
7286
      HandleCustomVecRegLoc();
7287

7288
      // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7289
      // we passed the vector in R5, R6, R7 and R8.
7290
      if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7291
        assert(!IsPPC64 &&
7292
               "Only 2 custom RegLocs expected for 64-bit codegen.");
7293
        HandleCustomVecRegLoc();
7294
        HandleCustomVecRegLoc();
7295
      }
7296

7297
      continue;
7298
    }
7299

7300
    if (VA.isRegLoc()) {
7301
      if (VA.getValVT().isScalarInteger())
7302
        FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7303
      else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7304
        switch (VA.getValVT().SimpleTy) {
7305
        default:
7306
          report_fatal_error("Unhandled value type for argument.");
7307
        case MVT::f32:
7308
          FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
7309
          break;
7310
        case MVT::f64:
7311
          FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
7312
          break;
7313
        }
7314
      } else if (VA.getValVT().isVector()) {
7315
        switch (VA.getValVT().SimpleTy) {
7316
        default:
7317
          report_fatal_error("Unhandled value type for argument.");
7318
        case MVT::v16i8:
7319
          FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
7320
          break;
7321
        case MVT::v8i16:
7322
          FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
7323
          break;
7324
        case MVT::v4i32:
7325
        case MVT::v2i64:
7326
        case MVT::v1i128:
7327
          FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
7328
          break;
7329
        case MVT::v4f32:
7330
        case MVT::v2f64:
7331
          FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
7332
          break;
7333
        }
7334
      }
7335
    }
7336

7337
    if (Flags.isByVal() && VA.isMemLoc()) {
7338
      const unsigned Size =
7339
          alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7340
                  PtrByteSize);
7341
      const int FI = MF.getFrameInfo().CreateFixedObject(
7342
          Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7343
          /* IsAliased */ true);
7344
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7345
      InVals.push_back(FIN);
7346

7347
      continue;
7348
    }
7349

7350
    if (Flags.isByVal()) {
7351
      assert(VA.isRegLoc() && "MemLocs should already be handled.");
7352

7353
      const MCPhysReg ArgReg = VA.getLocReg();
7354
      const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7355

7356
      const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7357
      const int FI = MF.getFrameInfo().CreateFixedObject(
7358
          StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7359
          /* IsAliased */ true);
7360
      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7361
      InVals.push_back(FIN);
7362

7363
      // Add live ins for all the RegLocs for the same ByVal.
7364
      const TargetRegisterClass *RegClass =
7365
          IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7366

7367
      auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7368
                                               unsigned Offset) {
7369
        const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7370
        // Since the callers side has left justified the aggregate in the
7371
        // register, we can simply store the entire register into the stack
7372
        // slot.
7373
        SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7374
        // The store to the fixedstack object is needed becuase accessing a
7375
        // field of the ByVal will use a gep and load. Ideally we will optimize
7376
        // to extracting the value from the register directly, and elide the
7377
        // stores when the arguments address is not taken, but that will need to
7378
        // be future work.
7379
        SDValue Store = DAG.getStore(
7380
            CopyFrom.getValue(1), dl, CopyFrom,
7381
            DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)),
7382
            MachinePointerInfo::getFixedStack(MF, FI, Offset));
7383

7384
        MemOps.push_back(Store);
7385
      };
7386

7387
      unsigned Offset = 0;
7388
      HandleRegLoc(VA.getLocReg(), Offset);
7389
      Offset += PtrByteSize;
7390
      for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7391
           Offset += PtrByteSize) {
7392
        assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7393
               "RegLocs should be for ByVal argument.");
7394

7395
        const CCValAssign RL = ArgLocs[I++];
7396
        HandleRegLoc(RL.getLocReg(), Offset);
7397
        FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7398
      }
7399

7400
      if (Offset != StackSize) {
7401
        assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7402
               "Expected MemLoc for remaining bytes.");
7403
        assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7404
        // Consume the MemLoc.The InVal has already been emitted, so nothing
7405
        // more needs to be done.
7406
        ++I;
7407
      }
7408

7409
      continue;
7410
    }
7411

7412
    if (VA.isRegLoc() && !VA.needsCustom()) {
7413
      MVT::SimpleValueType SVT = ValVT.SimpleTy;
7414
      Register VReg =
7415
          MF.addLiveIn(VA.getLocReg(),
7416
                       getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7417
                                         Subtarget.hasVSX()));
7418
      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7419
      if (ValVT.isScalarInteger() &&
7420
          (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7421
        ArgValue =
7422
            truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7423
      }
7424
      InVals.push_back(ArgValue);
7425
      continue;
7426
    }
7427
    if (VA.isMemLoc()) {
7428
      HandleMemLoc();
7429
      continue;
7430
    }
7431
  }
7432

7433
  // On AIX a minimum of 8 words is saved to the parameter save area.
7434
  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7435
  // Area that is at least reserved in the caller of this function.
7436
  unsigned CallerReservedArea = std::max<unsigned>(
7437
      CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7438

7439
  // Set the size that is at least reserved in caller of this function. Tail
7440
  // call optimized function's reserved stack space needs to be aligned so
7441
  // that taking the difference between two stack areas will result in an
7442
  // aligned stack.
7443
  CallerReservedArea =
7444
      EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7445
  FuncInfo->setMinReservedArea(CallerReservedArea);
7446

7447
  if (isVarArg) {
7448
    FuncInfo->setVarArgsFrameIndex(
7449
        MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7450
    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7451

7452
    static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7453
                                       PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7454

7455
    static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7456
                                       PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7457
    const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7458

7459
    // The fixed integer arguments of a variadic function are stored to the
7460
    // VarArgsFrameIndex on the stack so that they may be loaded by
7461
    // dereferencing the result of va_next.
7462
    for (unsigned GPRIndex =
7463
             (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7464
         GPRIndex < NumGPArgRegs; ++GPRIndex) {
7465

7466
      const Register VReg =
7467
          IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7468
                  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7469

7470
      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7471
      SDValue Store =
7472
          DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7473
      MemOps.push_back(Store);
7474
      // Increment the address for the next argument to store.
7475
      SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7476
      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7477
    }
7478
  }
7479

7480
  if (!MemOps.empty())
7481
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7482

7483
  return Chain;
7484
}
7485

7486
SDValue PPCTargetLowering::LowerCall_AIX(
7487
    SDValue Chain, SDValue Callee, CallFlags CFlags,
7488
    const SmallVectorImpl<ISD::OutputArg> &Outs,
7489
    const SmallVectorImpl<SDValue> &OutVals,
7490
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7491
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7492
    const CallBase *CB) const {
7493
  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7494
  // AIX ABI stack frame layout.
7495

7496
  assert((CFlags.CallConv == CallingConv::C ||
7497
          CFlags.CallConv == CallingConv::Cold ||
7498
          CFlags.CallConv == CallingConv::Fast) &&
7499
         "Unexpected calling convention!");
7500

7501
  if (CFlags.IsPatchPoint)
7502
    report_fatal_error("This call type is unimplemented on AIX.");
7503

7504
  const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7505

7506
  MachineFunction &MF = DAG.getMachineFunction();
7507
  SmallVector<CCValAssign, 16> ArgLocs;
7508
  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7509
                    *DAG.getContext());
7510

7511
  // Reserve space for the linkage save area (LSA) on the stack.
7512
  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7513
  //   [SP][CR][LR][2 x reserved][TOC].
7514
  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7515
  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7516
  const bool IsPPC64 = Subtarget.isPPC64();
7517
  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7518
  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7519
  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7520
  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7521

7522
  // The prolog code of the callee may store up to 8 GPR argument registers to
7523
  // the stack, allowing va_start to index over them in memory if the callee
7524
  // is variadic.
7525
  // Because we cannot tell if this is needed on the caller side, we have to
7526
  // conservatively assume that it is needed.  As such, make sure we have at
7527
  // least enough stack space for the caller to store the 8 GPRs.
7528
  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7529
  const unsigned NumBytes = std::max<unsigned>(
7530
      LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7531

7532
  // Adjust the stack pointer for the new arguments...
7533
  // These operations are automatically eliminated by the prolog/epilog pass.
7534
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7535
  SDValue CallSeqStart = Chain;
7536

7537
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7538
  SmallVector<SDValue, 8> MemOpChains;
7539

7540
  // Set up a copy of the stack pointer for loading and storing any
7541
  // arguments that may not fit in the registers available for argument
7542
  // passing.
7543
  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7544
                                   : DAG.getRegister(PPC::R1, MVT::i32);
7545

7546
  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7547
    const unsigned ValNo = ArgLocs[I].getValNo();
7548
    SDValue Arg = OutVals[ValNo];
7549
    ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7550

7551
    if (Flags.isByVal()) {
7552
      const unsigned ByValSize = Flags.getByValSize();
7553

7554
      // Nothing to do for zero-sized ByVals on the caller side.
7555
      if (!ByValSize) {
7556
        ++I;
7557
        continue;
7558
      }
7559

7560
      auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7561
        return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7562
                              (LoadOffset != 0)
7563
                                  ? DAG.getObjectPtrOffset(
7564
                                        dl, Arg, TypeSize::getFixed(LoadOffset))
7565
                                  : Arg,
7566
                              MachinePointerInfo(), VT);
7567
      };
7568

7569
      unsigned LoadOffset = 0;
7570

7571
      // Initialize registers, which are fully occupied by the by-val argument.
7572
      while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7573
        SDValue Load = GetLoad(PtrVT, LoadOffset);
7574
        MemOpChains.push_back(Load.getValue(1));
7575
        LoadOffset += PtrByteSize;
7576
        const CCValAssign &ByValVA = ArgLocs[I++];
7577
        assert(ByValVA.getValNo() == ValNo &&
7578
               "Unexpected location for pass-by-value argument.");
7579
        RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7580
      }
7581

7582
      if (LoadOffset == ByValSize)
7583
        continue;
7584

7585
      // There must be one more loc to handle the remainder.
7586
      assert(ArgLocs[I].getValNo() == ValNo &&
7587
             "Expected additional location for by-value argument.");
7588

7589
      if (ArgLocs[I].isMemLoc()) {
7590
        assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7591
        const CCValAssign &ByValVA = ArgLocs[I++];
7592
        ISD::ArgFlagsTy MemcpyFlags = Flags;
7593
        // Only memcpy the bytes that don't pass in register.
7594
        MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7595
        Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7596
            (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7597
                                    dl, Arg, TypeSize::getFixed(LoadOffset))
7598
                              : Arg,
7599
            DAG.getObjectPtrOffset(
7600
                dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7601
            CallSeqStart, MemcpyFlags, DAG, dl);
7602
        continue;
7603
      }
7604

7605
      // Initialize the final register residue.
7606
      // Any residue that occupies the final by-val arg register must be
7607
      // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7608
      // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7609
      // 2 and 1 byte loads.
7610
      const unsigned ResidueBytes = ByValSize % PtrByteSize;
7611
      assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7612
             "Unexpected register residue for by-value argument.");
7613
      SDValue ResidueVal;
7614
      for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7615
        const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7616
        const MVT VT =
7617
            N == 1 ? MVT::i8
7618
                   : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7619
        SDValue Load = GetLoad(VT, LoadOffset);
7620
        MemOpChains.push_back(Load.getValue(1));
7621
        LoadOffset += N;
7622
        Bytes += N;
7623

7624
        // By-val arguments are passed left-justfied in register.
7625
        // Every load here needs to be shifted, otherwise a full register load
7626
        // should have been used.
7627
        assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7628
               "Unexpected load emitted during handling of pass-by-value "
7629
               "argument.");
7630
        unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7631
        EVT ShiftAmountTy =
7632
            getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7633
        SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7634
        SDValue ShiftedLoad =
7635
            DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7636
        ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7637
                                              ShiftedLoad)
7638
                                : ShiftedLoad;
7639
      }
7640

7641
      const CCValAssign &ByValVA = ArgLocs[I++];
7642
      RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7643
      continue;
7644
    }
7645

7646
    CCValAssign &VA = ArgLocs[I++];
7647
    const MVT LocVT = VA.getLocVT();
7648
    const MVT ValVT = VA.getValVT();
7649

7650
    switch (VA.getLocInfo()) {
7651
    default:
7652
      report_fatal_error("Unexpected argument extension type.");
7653
    case CCValAssign::Full:
7654
      break;
7655
    case CCValAssign::ZExt:
7656
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7657
      break;
7658
    case CCValAssign::SExt:
7659
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7660
      break;
7661
    }
7662

7663
    if (VA.isRegLoc() && !VA.needsCustom()) {
7664
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7665
      continue;
7666
    }
7667

7668
    // Vector arguments passed to VarArg functions need custom handling when
7669
    // they are passed (at least partially) in GPRs.
7670
    if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7671
      assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7672
      // Store value to its stack slot.
7673
      SDValue PtrOff =
7674
          DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7675
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7676
      SDValue Store =
7677
          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7678
      MemOpChains.push_back(Store);
7679
      const unsigned OriginalValNo = VA.getValNo();
7680
      // Then load the GPRs from the stack
7681
      unsigned LoadOffset = 0;
7682
      auto HandleCustomVecRegLoc = [&]() {
7683
        assert(I != E && "Unexpected end of CCvalAssigns.");
7684
        assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7685
               "Expected custom RegLoc.");
7686
        CCValAssign RegVA = ArgLocs[I++];
7687
        assert(RegVA.getValNo() == OriginalValNo &&
7688
               "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7689
        SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7690
                                  DAG.getConstant(LoadOffset, dl, PtrVT));
7691
        SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7692
        MemOpChains.push_back(Load.getValue(1));
7693
        RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7694
        LoadOffset += PtrByteSize;
7695
      };
7696

7697
      // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7698
      // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7699
      // R10.
7700
      HandleCustomVecRegLoc();
7701
      HandleCustomVecRegLoc();
7702

7703
      if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7704
          ArgLocs[I].getValNo() == OriginalValNo) {
7705
        assert(!IsPPC64 &&
7706
               "Only 2 custom RegLocs expected for 64-bit codegen.");
7707
        HandleCustomVecRegLoc();
7708
        HandleCustomVecRegLoc();
7709
      }
7710

7711
      continue;
7712
    }
7713

7714
    if (VA.isMemLoc()) {
7715
      SDValue PtrOff =
7716
          DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7717
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7718
      MemOpChains.push_back(
7719
          DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7720

7721
      continue;
7722
    }
7723

7724
    if (!ValVT.isFloatingPoint())
7725
      report_fatal_error(
7726
          "Unexpected register handling for calling convention.");
7727

7728
    // Custom handling is used for GPR initializations for vararg float
7729
    // arguments.
7730
    assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7731
           LocVT.isInteger() &&
7732
           "Custom register handling only expected for VarArg.");
7733

7734
    SDValue ArgAsInt =
7735
        DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7736

7737
    if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7738
      // f32 in 32-bit GPR
7739
      // f64 in 64-bit GPR
7740
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7741
    else if (Arg.getValueType().getFixedSizeInBits() <
7742
             LocVT.getFixedSizeInBits())
7743
      // f32 in 64-bit GPR.
7744
      RegsToPass.push_back(std::make_pair(
7745
          VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7746
    else {
7747
      // f64 in two 32-bit GPRs
7748
      // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7749
      assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7750
             "Unexpected custom register for argument!");
7751
      CCValAssign &GPR1 = VA;
7752
      SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7753
                                     DAG.getConstant(32, dl, MVT::i8));
7754
      RegsToPass.push_back(std::make_pair(
7755
          GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7756

7757
      if (I != E) {
7758
        // If only 1 GPR was available, there will only be one custom GPR and
7759
        // the argument will also pass in memory.
7760
        CCValAssign &PeekArg = ArgLocs[I];
7761
        if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7762
          assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7763
          CCValAssign &GPR2 = ArgLocs[I++];
7764
          RegsToPass.push_back(std::make_pair(
7765
              GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7766
        }
7767
      }
7768
    }
7769
  }
7770

7771
  if (!MemOpChains.empty())
7772
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7773

7774
  // For indirect calls, we need to save the TOC base to the stack for
7775
  // restoration after the call.
7776
  if (CFlags.IsIndirect) {
7777
    assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7778
    const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7779
    const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7780
    const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7781
    const unsigned TOCSaveOffset =
7782
        Subtarget.getFrameLowering()->getTOCSaveOffset();
7783

7784
    setUsesTOCBasePtr(DAG);
7785
    SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7786
    SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7787
    SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7788
    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7789
    Chain = DAG.getStore(
7790
        Val.getValue(1), dl, Val, AddPtr,
7791
        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7792
  }
7793

7794
  // Build a sequence of copy-to-reg nodes chained together with token chain
7795
  // and flag operands which copy the outgoing args into the appropriate regs.
7796
  SDValue InGlue;
7797
  for (auto Reg : RegsToPass) {
7798
    Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7799
    InGlue = Chain.getValue(1);
7800
  }
7801

7802
  const int SPDiff = 0;
7803
  return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7804
                    Callee, SPDiff, NumBytes, Ins, InVals, CB);
7805
}
7806

7807
bool
7808
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7809
                                  MachineFunction &MF, bool isVarArg,
7810
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
7811
                                  LLVMContext &Context) const {
7812
  SmallVector<CCValAssign, 16> RVLocs;
7813
  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7814
  return CCInfo.CheckReturn(
7815
      Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7816
                ? RetCC_PPC_Cold
7817
                : RetCC_PPC);
7818
}
7819

7820
SDValue
7821
PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7822
                               bool isVarArg,
7823
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
7824
                               const SmallVectorImpl<SDValue> &OutVals,
7825
                               const SDLoc &dl, SelectionDAG &DAG) const {
7826
  SmallVector<CCValAssign, 16> RVLocs;
7827
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7828
                 *DAG.getContext());
7829
  CCInfo.AnalyzeReturn(Outs,
7830
                       (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7831
                           ? RetCC_PPC_Cold
7832
                           : RetCC_PPC);
7833

7834
  SDValue Glue;
7835
  SmallVector<SDValue, 4> RetOps(1, Chain);
7836

7837
  // Copy the result values into the output registers.
7838
  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7839
    CCValAssign &VA = RVLocs[i];
7840
    assert(VA.isRegLoc() && "Can only return in registers!");
7841

7842
    SDValue Arg = OutVals[RealResIdx];
7843

7844
    switch (VA.getLocInfo()) {
7845
    default: llvm_unreachable("Unknown loc info!");
7846
    case CCValAssign::Full: break;
7847
    case CCValAssign::AExt:
7848
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7849
      break;
7850
    case CCValAssign::ZExt:
7851
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7852
      break;
7853
    case CCValAssign::SExt:
7854
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7855
      break;
7856
    }
7857
    if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7858
      bool isLittleEndian = Subtarget.isLittleEndian();
7859
      // Legalize ret f64 -> ret 2 x i32.
7860
      SDValue SVal =
7861
          DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7862
                      DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7863
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7864
      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7865
      SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7866
                         DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7867
      Glue = Chain.getValue(1);
7868
      VA = RVLocs[++i]; // skip ahead to next loc
7869
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7870
    } else
7871
      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7872
    Glue = Chain.getValue(1);
7873
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7874
  }
7875

7876
  RetOps[0] = Chain;  // Update chain.
7877

7878
  // Add the glue if we have it.
7879
  if (Glue.getNode())
7880
    RetOps.push_back(Glue);
7881

7882
  return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7883
}
7884

7885
SDValue
7886
PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7887
                                                SelectionDAG &DAG) const {
7888
  SDLoc dl(Op);
7889

7890
  // Get the correct type for integers.
7891
  EVT IntVT = Op.getValueType();
7892

7893
  // Get the inputs.
7894
  SDValue Chain = Op.getOperand(0);
7895
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7896
  // Build a DYNAREAOFFSET node.
7897
  SDValue Ops[2] = {Chain, FPSIdx};
7898
  SDVTList VTs = DAG.getVTList(IntVT);
7899
  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7900
}
7901

7902
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7903
                                             SelectionDAG &DAG) const {
7904
  // When we pop the dynamic allocation we need to restore the SP link.
7905
  SDLoc dl(Op);
7906

7907
  // Get the correct type for pointers.
7908
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7909

7910
  // Construct the stack pointer operand.
7911
  bool isPPC64 = Subtarget.isPPC64();
7912
  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7913
  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7914

7915
  // Get the operands for the STACKRESTORE.
7916
  SDValue Chain = Op.getOperand(0);
7917
  SDValue SaveSP = Op.getOperand(1);
7918

7919
  // Load the old link SP.
7920
  SDValue LoadLinkSP =
7921
      DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7922

7923
  // Restore the stack pointer.
7924
  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7925

7926
  // Store the old link SP.
7927
  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7928
}
7929

7930
SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7931
  MachineFunction &MF = DAG.getMachineFunction();
7932
  bool isPPC64 = Subtarget.isPPC64();
7933
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7934

7935
  // Get current frame pointer save index.  The users of this index will be
7936
  // primarily DYNALLOC instructions.
7937
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7938
  int RASI = FI->getReturnAddrSaveIndex();
7939

7940
  // If the frame pointer save index hasn't been defined yet.
7941
  if (!RASI) {
7942
    // Find out what the fix offset of the frame pointer save area.
7943
    int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7944
    // Allocate the frame index for frame pointer save area.
7945
    RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7946
    // Save the result.
7947
    FI->setReturnAddrSaveIndex(RASI);
7948
  }
7949
  return DAG.getFrameIndex(RASI, PtrVT);
7950
}
7951

7952
SDValue
7953
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7954
  MachineFunction &MF = DAG.getMachineFunction();
7955
  bool isPPC64 = Subtarget.isPPC64();
7956
  EVT PtrVT = getPointerTy(MF.getDataLayout());
7957

7958
  // Get current frame pointer save index.  The users of this index will be
7959
  // primarily DYNALLOC instructions.
7960
  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7961
  int FPSI = FI->getFramePointerSaveIndex();
7962

7963
  // If the frame pointer save index hasn't been defined yet.
7964
  if (!FPSI) {
7965
    // Find out what the fix offset of the frame pointer save area.
7966
    int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7967
    // Allocate the frame index for frame pointer save area.
7968
    FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7969
    // Save the result.
7970
    FI->setFramePointerSaveIndex(FPSI);
7971
  }
7972
  return DAG.getFrameIndex(FPSI, PtrVT);
7973
}
7974

7975
SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7976
                                                   SelectionDAG &DAG) const {
7977
  MachineFunction &MF = DAG.getMachineFunction();
7978
  // Get the inputs.
7979
  SDValue Chain = Op.getOperand(0);
7980
  SDValue Size  = Op.getOperand(1);
7981
  SDLoc dl(Op);
7982

7983
  // Get the correct type for pointers.
7984
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7985
  // Negate the size.
7986
  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7987
                                DAG.getConstant(0, dl, PtrVT), Size);
7988
  // Construct a node for the frame pointer save index.
7989
  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7990
  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7991
  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7992
  if (hasInlineStackProbe(MF))
7993
    return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7994
  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7995
}
7996

7997
SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7998
                                                     SelectionDAG &DAG) const {
7999
  MachineFunction &MF = DAG.getMachineFunction();
8000

8001
  bool isPPC64 = Subtarget.isPPC64();
8002
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8003

8004
  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8005
  return DAG.getFrameIndex(FI, PtrVT);
8006
}
8007

8008
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8009
                                               SelectionDAG &DAG) const {
8010
  SDLoc DL(Op);
8011
  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8012
                     DAG.getVTList(MVT::i32, MVT::Other),
8013
                     Op.getOperand(0), Op.getOperand(1));
8014
}
8015

8016
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8017
                                                SelectionDAG &DAG) const {
8018
  SDLoc DL(Op);
8019
  return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8020
                     Op.getOperand(0), Op.getOperand(1));
8021
}
8022

8023
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8024
  if (Op.getValueType().isVector())
8025
    return LowerVectorLoad(Op, DAG);
8026

8027
  assert(Op.getValueType() == MVT::i1 &&
8028
         "Custom lowering only for i1 loads");
8029

8030
  // First, load 8 bits into 32 bits, then truncate to 1 bit.
8031

8032
  SDLoc dl(Op);
8033
  LoadSDNode *LD = cast<LoadSDNode>(Op);
8034

8035
  SDValue Chain = LD->getChain();
8036
  SDValue BasePtr = LD->getBasePtr();
8037
  MachineMemOperand *MMO = LD->getMemOperand();
8038

8039
  SDValue NewLD =
8040
      DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8041
                     BasePtr, MVT::i8, MMO);
8042
  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8043

8044
  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8045
  return DAG.getMergeValues(Ops, dl);
8046
}
8047

8048
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8049
  if (Op.getOperand(1).getValueType().isVector())
8050
    return LowerVectorStore(Op, DAG);
8051

8052
  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8053
         "Custom lowering only for i1 stores");
8054

8055
  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8056

8057
  SDLoc dl(Op);
8058
  StoreSDNode *ST = cast<StoreSDNode>(Op);
8059

8060
  SDValue Chain = ST->getChain();
8061
  SDValue BasePtr = ST->getBasePtr();
8062
  SDValue Value = ST->getValue();
8063
  MachineMemOperand *MMO = ST->getMemOperand();
8064

8065
  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
8066
                      Value);
8067
  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8068
}
8069

8070
// FIXME: Remove this once the ANDI glue bug is fixed:
8071
SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8072
  assert(Op.getValueType() == MVT::i1 &&
8073
         "Custom lowering only for i1 results");
8074

8075
  SDLoc DL(Op);
8076
  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8077
}
8078

8079
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8080
                                               SelectionDAG &DAG) const {
8081

8082
  // Implements a vector truncate that fits in a vector register as a shuffle.
8083
  // We want to legalize vector truncates down to where the source fits in
8084
  // a vector register (and target is therefore smaller than vector register
8085
  // size).  At that point legalization will try to custom lower the sub-legal
8086
  // result and get here - where we can contain the truncate as a single target
8087
  // operation.
8088

8089
  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8090
  //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8091
  //
8092
  // We will implement it for big-endian ordering as this (where x denotes
8093
  // undefined):
8094
  //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8095
  //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8096
  //
8097
  // The same operation in little-endian ordering will be:
8098
  //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8099
  //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8100

8101
  EVT TrgVT = Op.getValueType();
8102
  assert(TrgVT.isVector() && "Vector type expected.");
8103
  unsigned TrgNumElts = TrgVT.getVectorNumElements();
8104
  EVT EltVT = TrgVT.getVectorElementType();
8105
  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8106
      TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8107
      !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8108
    return SDValue();
8109

8110
  SDValue N1 = Op.getOperand(0);
8111
  EVT SrcVT = N1.getValueType();  
8112
  unsigned SrcSize = SrcVT.getSizeInBits();
8113
  if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8114
      !llvm::has_single_bit<uint32_t>(
8115
          SrcVT.getVectorElementType().getSizeInBits()))
8116
    return SDValue();
8117
  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8118
    return SDValue();
8119

8120
  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8121
  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8122

8123
  SDLoc DL(Op);
8124
  SDValue Op1, Op2;
8125
  if (SrcSize == 256) {
8126
    EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8127
    EVT SplitVT =
8128
        N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
8129
    unsigned SplitNumElts = SplitVT.getVectorNumElements();
8130
    Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8131
                      DAG.getConstant(0, DL, VecIdxTy));
8132
    Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8133
                      DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8134
  }
8135
  else {
8136
    Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8137
    Op2 = DAG.getUNDEF(WideVT);
8138
  }
8139

8140
  // First list the elements we want to keep.
8141
  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8142
  SmallVector<int, 16> ShuffV;
8143
  if (Subtarget.isLittleEndian())
8144
    for (unsigned i = 0; i < TrgNumElts; ++i)
8145
      ShuffV.push_back(i * SizeMult);
8146
  else
8147
    for (unsigned i = 1; i <= TrgNumElts; ++i)
8148
      ShuffV.push_back(i * SizeMult - 1);
8149

8150
  // Populate the remaining elements with undefs.
8151
  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8152
    // ShuffV.push_back(i + WideNumElts);
8153
    ShuffV.push_back(WideNumElts + 1);
8154

8155
  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8156
  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8157
  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8158
}
8159

8160
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8161
/// possible.
8162
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8163
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8164
  EVT ResVT = Op.getValueType();
8165
  EVT CmpVT = Op.getOperand(0).getValueType();
8166
  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8167
  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8168
  SDLoc dl(Op);
8169

8170
  // Without power9-vector, we don't have native instruction for f128 comparison.
8171
  // Following transformation to libcall is needed for setcc:
8172
  // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8173
  if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8174
    SDValue Z = DAG.getSetCC(
8175
        dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8176
        LHS, RHS, CC);
8177
    SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8178
    return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8179
  }
8180

8181
  // Not FP, or using SPE? Not a fsel.
8182
  if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8183
      Subtarget.hasSPE())
8184
    return Op;
8185

8186
  SDNodeFlags Flags = Op.getNode()->getFlags();
8187

8188
  // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8189
  // presence of infinities.
8190
  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8191
    switch (CC) {
8192
    default:
8193
      break;
8194
    case ISD::SETOGT:
8195
    case ISD::SETGT:
8196
      return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8197
    case ISD::SETOLT:
8198
    case ISD::SETLT:
8199
      return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8200
    }
8201
  }
8202

8203
  // We might be able to do better than this under some circumstances, but in
8204
  // general, fsel-based lowering of select is a finite-math-only optimization.
8205
  // For more information, see section F.3 of the 2.06 ISA specification.
8206
  // With ISA 3.0
8207
  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8208
      (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8209
      ResVT == MVT::f128)
8210
    return Op;
8211

8212
  // If the RHS of the comparison is a 0.0, we don't need to do the
8213
  // subtraction at all.
8214
  SDValue Sel1;
8215
  if (isFloatingPointZero(RHS))
8216
    switch (CC) {
8217
    default: break;       // SETUO etc aren't handled by fsel.
8218
    case ISD::SETNE:
8219
      std::swap(TV, FV);
8220
      [[fallthrough]];
8221
    case ISD::SETEQ:
8222
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8223
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8224
      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8225
      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8226
        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8227
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8228
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8229
    case ISD::SETULT:
8230
    case ISD::SETLT:
8231
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8232
      [[fallthrough]];
8233
    case ISD::SETOGE:
8234
    case ISD::SETGE:
8235
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8236
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8237
      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8238
    case ISD::SETUGT:
8239
    case ISD::SETGT:
8240
      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8241
      [[fallthrough]];
8242
    case ISD::SETOLE:
8243
    case ISD::SETLE:
8244
      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8245
        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8246
      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8247
                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8248
    }
8249

8250
  SDValue Cmp;
8251
  switch (CC) {
8252
  default: break;       // SETUO etc aren't handled by fsel.
8253
  case ISD::SETNE:
8254
    std::swap(TV, FV);
8255
    [[fallthrough]];
8256
  case ISD::SETEQ:
8257
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8258
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8259
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8260
    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8261
    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8262
      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8263
    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8264
                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8265
  case ISD::SETULT:
8266
  case ISD::SETLT:
8267
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8268
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8269
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8270
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8271
  case ISD::SETOGE:
8272
  case ISD::SETGE:
8273
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8274
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8275
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8276
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8277
  case ISD::SETUGT:
8278
  case ISD::SETGT:
8279
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8280
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8281
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8282
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8283
  case ISD::SETOLE:
8284
  case ISD::SETLE:
8285
    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8286
    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8287
      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8288
    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8289
  }
8290
  return Op;
8291
}
8292

8293
static unsigned getPPCStrictOpcode(unsigned Opc) {
8294
  switch (Opc) {
8295
  default:
8296
    llvm_unreachable("No strict version of this opcode!");
8297
  case PPCISD::FCTIDZ:
8298
    return PPCISD::STRICT_FCTIDZ;
8299
  case PPCISD::FCTIWZ:
8300
    return PPCISD::STRICT_FCTIWZ;
8301
  case PPCISD::FCTIDUZ:
8302
    return PPCISD::STRICT_FCTIDUZ;
8303
  case PPCISD::FCTIWUZ:
8304
    return PPCISD::STRICT_FCTIWUZ;
8305
  case PPCISD::FCFID:
8306
    return PPCISD::STRICT_FCFID;
8307
  case PPCISD::FCFIDU:
8308
    return PPCISD::STRICT_FCFIDU;
8309
  case PPCISD::FCFIDS:
8310
    return PPCISD::STRICT_FCFIDS;
8311
  case PPCISD::FCFIDUS:
8312
    return PPCISD::STRICT_FCFIDUS;
8313
  }
8314
}
8315

8316
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8317
                              const PPCSubtarget &Subtarget) {
8318
  SDLoc dl(Op);
8319
  bool IsStrict = Op->isStrictFPOpcode();
8320
  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8321
                  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8322

8323
  // TODO: Any other flags to propagate?
8324
  SDNodeFlags Flags;
8325
  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8326

8327
  // For strict nodes, source is the second operand.
8328
  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8329
  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8330
  MVT DestTy = Op.getSimpleValueType();
8331
  assert(Src.getValueType().isFloatingPoint() &&
8332
         (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8333
          DestTy == MVT::i64) &&
8334
         "Invalid FP_TO_INT types");
8335
  if (Src.getValueType() == MVT::f32) {
8336
    if (IsStrict) {
8337
      Src =
8338
          DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8339
                      DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8340
      Chain = Src.getValue(1);
8341
    } else
8342
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8343
  }
8344
  if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8345
    DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8346
  unsigned Opc = ISD::DELETED_NODE;
8347
  switch (DestTy.SimpleTy) {
8348
  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8349
  case MVT::i32:
8350
    Opc = IsSigned ? PPCISD::FCTIWZ
8351
                   : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8352
    break;
8353
  case MVT::i64:
8354
    assert((IsSigned || Subtarget.hasFPCVT()) &&
8355
           "i64 FP_TO_UINT is supported only with FPCVT");
8356
    Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8357
  }
8358
  EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8359
  SDValue Conv;
8360
  if (IsStrict) {
8361
    Opc = getPPCStrictOpcode(Opc);
8362
    Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8363
                       Flags);
8364
  } else {
8365
    Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8366
  }
8367
  return Conv;
8368
}
8369

8370
void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8371
                                               SelectionDAG &DAG,
8372
                                               const SDLoc &dl) const {
8373
  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8374
  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8375
                  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8376
  bool IsStrict = Op->isStrictFPOpcode();
8377

8378
  // Convert the FP value to an int value through memory.
8379
  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8380
                  (IsSigned || Subtarget.hasFPCVT());
8381
  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8382
  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8383
  MachinePointerInfo MPI =
8384
      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8385

8386
  // Emit a store to the stack slot.
8387
  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8388
  Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8389
  if (i32Stack) {
8390
    MachineFunction &MF = DAG.getMachineFunction();
8391
    Alignment = Align(4);
8392
    MachineMemOperand *MMO =
8393
        MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8394
    SDValue Ops[] = { Chain, Tmp, FIPtr };
8395
    Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8396
              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8397
  } else
8398
    Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8399

8400
  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8401
  // add in a bias on big endian.
8402
  if (Op.getValueType() == MVT::i32 && !i32Stack) {
8403
    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8404
                        DAG.getConstant(4, dl, FIPtr.getValueType()));
8405
    MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8406
  }
8407

8408
  RLI.Chain = Chain;
8409
  RLI.Ptr = FIPtr;
8410
  RLI.MPI = MPI;
8411
  RLI.Alignment = Alignment;
8412
}
8413

8414
/// Custom lowers floating point to integer conversions to use
8415
/// the direct move instructions available in ISA 2.07 to avoid the
8416
/// need for load/store combinations.
8417
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8418
                                                    SelectionDAG &DAG,
8419
                                                    const SDLoc &dl) const {
8420
  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8421
  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8422
  if (Op->isStrictFPOpcode())
8423
    return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8424
  else
8425
    return Mov;
8426
}
8427

8428
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8429
                                          const SDLoc &dl) const {
8430
  bool IsStrict = Op->isStrictFPOpcode();
8431
  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8432
                  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8433
  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8434
  EVT SrcVT = Src.getValueType();
8435
  EVT DstVT = Op.getValueType();
8436

8437
  // FP to INT conversions are legal for f128.
8438
  if (SrcVT == MVT::f128)
8439
    return Subtarget.hasP9Vector() ? Op : SDValue();
8440

8441
  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8442
  // PPC (the libcall is not available).
8443
  if (SrcVT == MVT::ppcf128) {
8444
    if (DstVT == MVT::i32) {
8445
      // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8446
      // set other fast-math flags to FP operations in both strict and
8447
      // non-strict cases. (FP_TO_SINT, FSUB)
8448
      SDNodeFlags Flags;
8449
      Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8450

8451
      if (IsSigned) {
8452
        SDValue Lo, Hi;
8453
        std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8454

8455
        // Add the two halves of the long double in round-to-zero mode, and use
8456
        // a smaller FP_TO_SINT.
8457
        if (IsStrict) {
8458
          SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8459
                                    DAG.getVTList(MVT::f64, MVT::Other),
8460
                                    {Op.getOperand(0), Lo, Hi}, Flags);
8461
          return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8462
                             DAG.getVTList(MVT::i32, MVT::Other),
8463
                             {Res.getValue(1), Res}, Flags);
8464
        } else {
8465
          SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8466
          return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8467
        }
8468
      } else {
8469
        const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8470
        APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8471
        SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8472
        SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8473
        if (IsStrict) {
8474
          // Sel = Src < 0x80000000
8475
          // FltOfs = select Sel, 0.0, 0x80000000
8476
          // IntOfs = select Sel, 0, 0x80000000
8477
          // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8478
          SDValue Chain = Op.getOperand(0);
8479
          EVT SetCCVT =
8480
              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8481
          EVT DstSetCCVT =
8482
              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8483
          SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8484
                                     Chain, true);
8485
          Chain = Sel.getValue(1);
8486

8487
          SDValue FltOfs = DAG.getSelect(
8488
              dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8489
          Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8490

8491
          SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8492
                                    DAG.getVTList(SrcVT, MVT::Other),
8493
                                    {Chain, Src, FltOfs}, Flags);
8494
          Chain = Val.getValue(1);
8495
          SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8496
                                     DAG.getVTList(DstVT, MVT::Other),
8497
                                     {Chain, Val}, Flags);
8498
          Chain = SInt.getValue(1);
8499
          SDValue IntOfs = DAG.getSelect(
8500
              dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8501
          SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8502
          return DAG.getMergeValues({Result, Chain}, dl);
8503
        } else {
8504
          // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8505
          // FIXME: generated code sucks.
8506
          SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8507
          True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8508
          True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8509
          SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8510
          return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8511
        }
8512
      }
8513
    }
8514

8515
    return SDValue();
8516
  }
8517

8518
  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8519
    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8520

8521
  ReuseLoadInfo RLI;
8522
  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8523

8524
  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8525
                     RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8526
}
8527

8528
// We're trying to insert a regular store, S, and then a load, L. If the
8529
// incoming value, O, is a load, we might just be able to have our load use the
8530
// address used by O. However, we don't know if anything else will store to
8531
// that address before we can load from it. To prevent this situation, we need
8532
// to insert our load, L, into the chain as a peer of O. To do this, we give L
8533
// the same chain operand as O, we create a token factor from the chain results
8534
// of O and L, and we replace all uses of O's chain result with that token
8535
// factor (see spliceIntoChain below for this last part).
8536
bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8537
                                            ReuseLoadInfo &RLI,
8538
                                            SelectionDAG &DAG,
8539
                                            ISD::LoadExtType ET) const {
8540
  // Conservatively skip reusing for constrained FP nodes.
8541
  if (Op->isStrictFPOpcode())
8542
    return false;
8543

8544
  SDLoc dl(Op);
8545
  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8546
                       (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8547
  if (ET == ISD::NON_EXTLOAD &&
8548
      (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8549
      isOperationLegalOrCustom(Op.getOpcode(),
8550
                               Op.getOperand(0).getValueType())) {
8551

8552
    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8553
    return true;
8554
  }
8555

8556
  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8557
  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8558
      LD->isNonTemporal())
8559
    return false;
8560
  if (LD->getMemoryVT() != MemVT)
8561
    return false;
8562

8563
  // If the result of the load is an illegal type, then we can't build a
8564
  // valid chain for reuse since the legalised loads and token factor node that
8565
  // ties the legalised loads together uses a different output chain then the
8566
  // illegal load.
8567
  if (!isTypeLegal(LD->getValueType(0)))
8568
    return false;
8569

8570
  RLI.Ptr = LD->getBasePtr();
8571
  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8572
    assert(LD->getAddressingMode() == ISD::PRE_INC &&
8573
           "Non-pre-inc AM on PPC?");
8574
    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8575
                          LD->getOffset());
8576
  }
8577

8578
  RLI.Chain = LD->getChain();
8579
  RLI.MPI = LD->getPointerInfo();
8580
  RLI.IsDereferenceable = LD->isDereferenceable();
8581
  RLI.IsInvariant = LD->isInvariant();
8582
  RLI.Alignment = LD->getAlign();
8583
  RLI.AAInfo = LD->getAAInfo();
8584
  RLI.Ranges = LD->getRanges();
8585

8586
  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8587
  return true;
8588
}
8589

8590
// Given the head of the old chain, ResChain, insert a token factor containing
8591
// it and NewResChain, and make users of ResChain now be users of that token
8592
// factor.
8593
// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8594
void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8595
                                        SDValue NewResChain,
8596
                                        SelectionDAG &DAG) const {
8597
  if (!ResChain)
8598
    return;
8599

8600
  SDLoc dl(NewResChain);
8601

8602
  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8603
                           NewResChain, DAG.getUNDEF(MVT::Other));
8604
  assert(TF.getNode() != NewResChain.getNode() &&
8605
         "A new TF really is required here");
8606

8607
  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8608
  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8609
}
8610

8611
/// Analyze profitability of direct move
8612
/// prefer float load to int load plus direct move
8613
/// when there is no integer use of int load
8614
bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8615
  SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8616
  if (Origin->getOpcode() != ISD::LOAD)
8617
    return true;
8618

8619
  // If there is no LXSIBZX/LXSIHZX, like Power8,
8620
  // prefer direct move if the memory size is 1 or 2 bytes.
8621
  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8622
  if (!Subtarget.hasP9Vector() &&
8623
      (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8624
    return true;
8625

8626
  for (SDNode::use_iterator UI = Origin->use_begin(),
8627
                            UE = Origin->use_end();
8628
       UI != UE; ++UI) {
8629

8630
    // Only look at the users of the loaded value.
8631
    if (UI.getUse().get().getResNo() != 0)
8632
      continue;
8633

8634
    if (UI->getOpcode() != ISD::SINT_TO_FP &&
8635
        UI->getOpcode() != ISD::UINT_TO_FP &&
8636
        UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8637
        UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8638
      return true;
8639
  }
8640

8641
  return false;
8642
}
8643

8644
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8645
                              const PPCSubtarget &Subtarget,
8646
                              SDValue Chain = SDValue()) {
8647
  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8648
                  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8649
  SDLoc dl(Op);
8650

8651
  // TODO: Any other flags to propagate?
8652
  SDNodeFlags Flags;
8653
  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8654

8655
  // If we have FCFIDS, then use it when converting to single-precision.
8656
  // Otherwise, convert to double-precision and then round.
8657
  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8658
  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8659
                              : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8660
  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8661
  if (Op->isStrictFPOpcode()) {
8662
    if (!Chain)
8663
      Chain = Op.getOperand(0);
8664
    return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8665
                       DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8666
  } else
8667
    return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8668
}
8669

8670
/// Custom lowers integer to floating point conversions to use
8671
/// the direct move instructions available in ISA 2.07 to avoid the
8672
/// need for load/store combinations.
8673
SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8674
                                                    SelectionDAG &DAG,
8675
                                                    const SDLoc &dl) const {
8676
  assert((Op.getValueType() == MVT::f32 ||
8677
          Op.getValueType() == MVT::f64) &&
8678
         "Invalid floating point type as target of conversion");
8679
  assert(Subtarget.hasFPCVT() &&
8680
         "Int to FP conversions with direct moves require FPCVT");
8681
  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8682
  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8683
  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8684
                Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8685
  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8686
  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8687
  return convertIntToFP(Op, Mov, DAG, Subtarget);
8688
}
8689

8690
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8691

8692
  EVT VecVT = Vec.getValueType();
8693
  assert(VecVT.isVector() && "Expected a vector type.");
8694
  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8695

8696
  EVT EltVT = VecVT.getVectorElementType();
8697
  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8698
  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8699

8700
  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8701
  SmallVector<SDValue, 16> Ops(NumConcat);
8702
  Ops[0] = Vec;
8703
  SDValue UndefVec = DAG.getUNDEF(VecVT);
8704
  for (unsigned i = 1; i < NumConcat; ++i)
8705
    Ops[i] = UndefVec;
8706

8707
  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8708
}
8709

8710
SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8711
                                                const SDLoc &dl) const {
8712
  bool IsStrict = Op->isStrictFPOpcode();
8713
  unsigned Opc = Op.getOpcode();
8714
  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8715
  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8716
          Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8717
         "Unexpected conversion type");
8718
  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8719
         "Supports conversions to v2f64/v4f32 only.");
8720

8721
  // TODO: Any other flags to propagate?
8722
  SDNodeFlags Flags;
8723
  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8724

8725
  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8726
  bool FourEltRes = Op.getValueType() == MVT::v4f32;
8727

8728
  SDValue Wide = widenVec(DAG, Src, dl);
8729
  EVT WideVT = Wide.getValueType();
8730
  unsigned WideNumElts = WideVT.getVectorNumElements();
8731
  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8732

8733
  SmallVector<int, 16> ShuffV;
8734
  for (unsigned i = 0; i < WideNumElts; ++i)
8735
    ShuffV.push_back(i + WideNumElts);
8736

8737
  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8738
  int SaveElts = FourEltRes ? 4 : 2;
8739
  if (Subtarget.isLittleEndian())
8740
    for (int i = 0; i < SaveElts; i++)
8741
      ShuffV[i * Stride] = i;
8742
  else
8743
    for (int i = 1; i <= SaveElts; i++)
8744
      ShuffV[i * Stride - 1] = i - 1;
8745

8746
  SDValue ShuffleSrc2 =
8747
      SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8748
  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8749

8750
  SDValue Extend;
8751
  if (SignedConv) {
8752
    Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8753
    EVT ExtVT = Src.getValueType();
8754
    if (Subtarget.hasP9Altivec())
8755
      ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8756
                               IntermediateVT.getVectorNumElements());
8757

8758
    Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8759
                         DAG.getValueType(ExtVT));
8760
  } else
8761
    Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8762

8763
  if (IsStrict)
8764
    return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8765
                       {Op.getOperand(0), Extend}, Flags);
8766

8767
  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8768
}
8769

8770
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8771
                                          SelectionDAG &DAG) const {
8772
  SDLoc dl(Op);
8773
  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8774
                  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8775
  bool IsStrict = Op->isStrictFPOpcode();
8776
  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8777
  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8778

8779
  // TODO: Any other flags to propagate?
8780
  SDNodeFlags Flags;
8781
  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8782

8783
  EVT InVT = Src.getValueType();
8784
  EVT OutVT = Op.getValueType();
8785
  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8786
      isOperationCustom(Op.getOpcode(), InVT))
8787
    return LowerINT_TO_FPVector(Op, DAG, dl);
8788

8789
  // Conversions to f128 are legal.
8790
  if (Op.getValueType() == MVT::f128)
8791
    return Subtarget.hasP9Vector() ? Op : SDValue();
8792

8793
  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8794
  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8795
    return SDValue();
8796

8797
  if (Src.getValueType() == MVT::i1) {
8798
    SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8799
                              DAG.getConstantFP(1.0, dl, Op.getValueType()),
8800
                              DAG.getConstantFP(0.0, dl, Op.getValueType()));
8801
    if (IsStrict)
8802
      return DAG.getMergeValues({Sel, Chain}, dl);
8803
    else
8804
      return Sel;
8805
  }
8806

8807
  // If we have direct moves, we can do all the conversion, skip the store/load
8808
  // however, without FPCVT we can't do most conversions.
8809
  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8810
      Subtarget.isPPC64() && Subtarget.hasFPCVT())
8811
    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8812

8813
  assert((IsSigned || Subtarget.hasFPCVT()) &&
8814
         "UINT_TO_FP is supported only with FPCVT");
8815

8816
  if (Src.getValueType() == MVT::i64) {
8817
    SDValue SINT = Src;
8818
    // When converting to single-precision, we actually need to convert
8819
    // to double-precision first and then round to single-precision.
8820
    // To avoid double-rounding effects during that operation, we have
8821
    // to prepare the input operand.  Bits that might be truncated when
8822
    // converting to double-precision are replaced by a bit that won't
8823
    // be lost at this stage, but is below the single-precision rounding
8824
    // position.
8825
    //
8826
    // However, if -enable-unsafe-fp-math is in effect, accept double
8827
    // rounding to avoid the extra overhead.
8828
    if (Op.getValueType() == MVT::f32 &&
8829
        !Subtarget.hasFPCVT() &&
8830
        !DAG.getTarget().Options.UnsafeFPMath) {
8831

8832
      // Twiddle input to make sure the low 11 bits are zero.  (If this
8833
      // is the case, we are guaranteed the value will fit into the 53 bit
8834
      // mantissa of an IEEE double-precision value without rounding.)
8835
      // If any of those low 11 bits were not zero originally, make sure
8836
      // bit 12 (value 2048) is set instead, so that the final rounding
8837
      // to single-precision gets the correct result.
8838
      SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8839
                                  SINT, DAG.getConstant(2047, dl, MVT::i64));
8840
      Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8841
                          Round, DAG.getConstant(2047, dl, MVT::i64));
8842
      Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8843
      Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8844
                          Round, DAG.getConstant(-2048, dl, MVT::i64));
8845

8846
      // However, we cannot use that value unconditionally: if the magnitude
8847
      // of the input value is small, the bit-twiddling we did above might
8848
      // end up visibly changing the output.  Fortunately, in that case, we
8849
      // don't need to twiddle bits since the original input will convert
8850
      // exactly to double-precision floating-point already.  Therefore,
8851
      // construct a conditional to use the original value if the top 11
8852
      // bits are all sign-bit copies, and use the rounded value computed
8853
      // above otherwise.
8854
      SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8855
                                 SINT, DAG.getConstant(53, dl, MVT::i32));
8856
      Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8857
                         Cond, DAG.getConstant(1, dl, MVT::i64));
8858
      Cond = DAG.getSetCC(
8859
          dl,
8860
          getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8861
          Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8862

8863
      SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8864
    }
8865

8866
    ReuseLoadInfo RLI;
8867
    SDValue Bits;
8868

8869
    MachineFunction &MF = DAG.getMachineFunction();
8870
    if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8871
      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8872
                         RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8873
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8874
    } else if (Subtarget.hasLFIWAX() &&
8875
               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8876
      MachineMemOperand *MMO =
8877
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8878
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8879
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8880
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8881
                                     DAG.getVTList(MVT::f64, MVT::Other),
8882
                                     Ops, MVT::i32, MMO);
8883
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8884
    } else if (Subtarget.hasFPCVT() &&
8885
               canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8886
      MachineMemOperand *MMO =
8887
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8888
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8889
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8890
      Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8891
                                     DAG.getVTList(MVT::f64, MVT::Other),
8892
                                     Ops, MVT::i32, MMO);
8893
      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8894
    } else if (((Subtarget.hasLFIWAX() &&
8895
                 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8896
                (Subtarget.hasFPCVT() &&
8897
                 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8898
               SINT.getOperand(0).getValueType() == MVT::i32) {
8899
      MachineFrameInfo &MFI = MF.getFrameInfo();
8900
      EVT PtrVT = getPointerTy(DAG.getDataLayout());
8901

8902
      int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8903
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8904

8905
      SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8906
                                   MachinePointerInfo::getFixedStack(
8907
                                       DAG.getMachineFunction(), FrameIdx));
8908
      Chain = Store;
8909

8910
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8911
             "Expected an i32 store");
8912

8913
      RLI.Ptr = FIdx;
8914
      RLI.Chain = Chain;
8915
      RLI.MPI =
8916
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8917
      RLI.Alignment = Align(4);
8918

8919
      MachineMemOperand *MMO =
8920
        MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8921
                                RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8922
      SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8923
      Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8924
                                     PPCISD::LFIWZX : PPCISD::LFIWAX,
8925
                                     dl, DAG.getVTList(MVT::f64, MVT::Other),
8926
                                     Ops, MVT::i32, MMO);
8927
      Chain = Bits.getValue(1);
8928
    } else
8929
      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8930

8931
    SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8932
    if (IsStrict)
8933
      Chain = FP.getValue(1);
8934

8935
    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8936
      if (IsStrict)
8937
        FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8938
                         DAG.getVTList(MVT::f32, MVT::Other),
8939
                         {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8940
      else
8941
        FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8942
                         DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8943
    }
8944
    return FP;
8945
  }
8946

8947
  assert(Src.getValueType() == MVT::i32 &&
8948
         "Unhandled INT_TO_FP type in custom expander!");
8949
  // Since we only generate this in 64-bit mode, we can take advantage of
8950
  // 64-bit registers.  In particular, sign extend the input value into the
8951
  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8952
  // then lfd it and fcfid it.
8953
  MachineFunction &MF = DAG.getMachineFunction();
8954
  MachineFrameInfo &MFI = MF.getFrameInfo();
8955
  EVT PtrVT = getPointerTy(MF.getDataLayout());
8956

8957
  SDValue Ld;
8958
  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8959
    ReuseLoadInfo RLI;
8960
    bool ReusingLoad;
8961
    if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8962
      int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8963
      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8964

8965
      SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8966
                                   MachinePointerInfo::getFixedStack(
8967
                                       DAG.getMachineFunction(), FrameIdx));
8968
      Chain = Store;
8969

8970
      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8971
             "Expected an i32 store");
8972

8973
      RLI.Ptr = FIdx;
8974
      RLI.Chain = Chain;
8975
      RLI.MPI =
8976
          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8977
      RLI.Alignment = Align(4);
8978
    }
8979

8980
    MachineMemOperand *MMO =
8981
      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8982
                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8983
    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8984
    Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8985
                                 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8986
                                 MVT::i32, MMO);
8987
    Chain = Ld.getValue(1);
8988
    if (ReusingLoad)
8989
      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8990
  } else {
8991
    assert(Subtarget.isPPC64() &&
8992
           "i32->FP without LFIWAX supported only on PPC64");
8993

8994
    int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8995
    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8996

8997
    SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8998

8999
    // STD the extended value into the stack slot.
9000
    SDValue Store = DAG.getStore(
9001
        Chain, dl, Ext64, FIdx,
9002
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9003
    Chain = Store;
9004

9005
    // Load the value as a double.
9006
    Ld = DAG.getLoad(
9007
        MVT::f64, dl, Chain, FIdx,
9008
        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9009
    Chain = Ld.getValue(1);
9010
  }
9011

9012
  // FCFID it and return it.
9013
  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9014
  if (IsStrict)
9015
    Chain = FP.getValue(1);
9016
  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9017
    if (IsStrict)
9018
      FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
9019
                       DAG.getVTList(MVT::f32, MVT::Other),
9020
                       {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9021
    else
9022
      FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9023
                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9024
  }
9025
  return FP;
9026
}
9027

9028
SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9029
                                             SelectionDAG &DAG) const {
9030
  SDLoc dl(Op);
9031
  /*
9032
   The rounding mode is in bits 30:31 of FPSR, and has the following
9033
   settings:
9034
     00 Round to nearest
9035
     01 Round to 0
9036
     10 Round to +inf
9037
     11 Round to -inf
9038

9039
  GET_ROUNDING, on the other hand, expects the following:
9040
    -1 Undefined
9041
     0 Round to 0
9042
     1 Round to nearest
9043
     2 Round to +inf
9044
     3 Round to -inf
9045

9046
  To perform the conversion, we do:
9047
    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9048
  */
9049

9050
  MachineFunction &MF = DAG.getMachineFunction();
9051
  EVT VT = Op.getValueType();
9052
  EVT PtrVT = getPointerTy(MF.getDataLayout());
9053

9054
  // Save FP Control Word to register
9055
  SDValue Chain = Op.getOperand(0);
9056
  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9057
  Chain = MFFS.getValue(1);
9058

9059
  SDValue CWD;
9060
  if (isTypeLegal(MVT::i64)) {
9061
    CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9062
                      DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9063
  } else {
9064
    // Save FP register to stack slot
9065
    int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9066
    SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9067
    Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9068

9069
    // Load FP Control Word from low 32 bits of stack slot.
9070
    assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9071
           "Stack slot adjustment is valid only on big endian subtargets!");
9072
    SDValue Four = DAG.getConstant(4, dl, PtrVT);
9073
    SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9074
    CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9075
    Chain = CWD.getValue(1);
9076
  }
9077

9078
  // Transform as necessary
9079
  SDValue CWD1 =
9080
    DAG.getNode(ISD::AND, dl, MVT::i32,
9081
                CWD, DAG.getConstant(3, dl, MVT::i32));
9082
  SDValue CWD2 =
9083
    DAG.getNode(ISD::SRL, dl, MVT::i32,
9084
                DAG.getNode(ISD::AND, dl, MVT::i32,
9085
                            DAG.getNode(ISD::XOR, dl, MVT::i32,
9086
                                        CWD, DAG.getConstant(3, dl, MVT::i32)),
9087
                            DAG.getConstant(3, dl, MVT::i32)),
9088
                DAG.getConstant(1, dl, MVT::i32));
9089

9090
  SDValue RetVal =
9091
    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9092

9093
  RetVal =
9094
      DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9095
                  dl, VT, RetVal);
9096

9097
  return DAG.getMergeValues({RetVal, Chain}, dl);
9098
}
9099

9100
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9101
  EVT VT = Op.getValueType();
9102
  unsigned BitWidth = VT.getSizeInBits();
9103
  SDLoc dl(Op);
9104
  assert(Op.getNumOperands() == 3 &&
9105
         VT == Op.getOperand(1).getValueType() &&
9106
         "Unexpected SHL!");
9107

9108
  // Expand into a bunch of logical ops.  Note that these ops
9109
  // depend on the PPC behavior for oversized shift amounts.
9110
  SDValue Lo = Op.getOperand(0);
9111
  SDValue Hi = Op.getOperand(1);
9112
  SDValue Amt = Op.getOperand(2);
9113
  EVT AmtVT = Amt.getValueType();
9114

9115
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9116
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9117
  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9118
  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9119
  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9120
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9121
                             DAG.getConstant(-BitWidth, dl, AmtVT));
9122
  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9123
  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9124
  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9125
  SDValue OutOps[] = { OutLo, OutHi };
9126
  return DAG.getMergeValues(OutOps, dl);
9127
}
9128

9129
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9130
  EVT VT = Op.getValueType();
9131
  SDLoc dl(Op);
9132
  unsigned BitWidth = VT.getSizeInBits();
9133
  assert(Op.getNumOperands() == 3 &&
9134
         VT == Op.getOperand(1).getValueType() &&
9135
         "Unexpected SRL!");
9136

9137
  // Expand into a bunch of logical ops.  Note that these ops
9138
  // depend on the PPC behavior for oversized shift amounts.
9139
  SDValue Lo = Op.getOperand(0);
9140
  SDValue Hi = Op.getOperand(1);
9141
  SDValue Amt = Op.getOperand(2);
9142
  EVT AmtVT = Amt.getValueType();
9143

9144
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9145
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9146
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9147
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9148
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9149
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9150
                             DAG.getConstant(-BitWidth, dl, AmtVT));
9151
  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9152
  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9153
  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9154
  SDValue OutOps[] = { OutLo, OutHi };
9155
  return DAG.getMergeValues(OutOps, dl);
9156
}
9157

9158
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9159
  SDLoc dl(Op);
9160
  EVT VT = Op.getValueType();
9161
  unsigned BitWidth = VT.getSizeInBits();
9162
  assert(Op.getNumOperands() == 3 &&
9163
         VT == Op.getOperand(1).getValueType() &&
9164
         "Unexpected SRA!");
9165

9166
  // Expand into a bunch of logical ops, followed by a select_cc.
9167
  SDValue Lo = Op.getOperand(0);
9168
  SDValue Hi = Op.getOperand(1);
9169
  SDValue Amt = Op.getOperand(2);
9170
  EVT AmtVT = Amt.getValueType();
9171

9172
  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9173
                             DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9174
  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9175
  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9176
  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9177
  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9178
                             DAG.getConstant(-BitWidth, dl, AmtVT));
9179
  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9180
  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9181
  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9182
                                  Tmp4, Tmp6, ISD::SETLE);
9183
  SDValue OutOps[] = { OutLo, OutHi };
9184
  return DAG.getMergeValues(OutOps, dl);
9185
}
9186

9187
SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9188
                                            SelectionDAG &DAG) const {
9189
  SDLoc dl(Op);
9190
  EVT VT = Op.getValueType();
9191
  unsigned BitWidth = VT.getSizeInBits();
9192

9193
  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9194
  SDValue X = Op.getOperand(0);
9195
  SDValue Y = Op.getOperand(1);
9196
  SDValue Z = Op.getOperand(2);
9197
  EVT AmtVT = Z.getValueType();
9198

9199
  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9200
  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9201
  // This is simpler than TargetLowering::expandFunnelShift because we can rely
9202
  // on PowerPC shift by BW being well defined.
9203
  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9204
                  DAG.getConstant(BitWidth - 1, dl, AmtVT));
9205
  SDValue SubZ =
9206
      DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9207
  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9208
  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9209
  return DAG.getNode(ISD::OR, dl, VT, X, Y);
9210
}
9211

9212
//===----------------------------------------------------------------------===//
9213
// Vector related lowering.
9214
//
9215

9216
/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9217
/// element size of SplatSize. Cast the result to VT.
9218
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9219
                                      SelectionDAG &DAG, const SDLoc &dl) {
9220
  static const MVT VTys[] = { // canonical VT to use for each size.
9221
    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9222
  };
9223

9224
  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9225

9226
  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9227
  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9228
    SplatSize = 1;
9229
    Val = 0xFF;
9230
  }
9231

9232
  EVT CanonicalVT = VTys[SplatSize-1];
9233

9234
  // Build a canonical splat for this value.
9235
  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9236
}
9237

9238
/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9239
/// specified intrinsic ID.
9240
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9241
                                const SDLoc &dl, EVT DestVT = MVT::Other) {
9242
  if (DestVT == MVT::Other) DestVT = Op.getValueType();
9243
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9244
                     DAG.getConstant(IID, dl, MVT::i32), Op);
9245
}
9246

9247
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9248
/// specified intrinsic ID.
9249
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9250
                                SelectionDAG &DAG, const SDLoc &dl,
9251
                                EVT DestVT = MVT::Other) {
9252
  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9253
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9254
                     DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9255
}
9256

9257
/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9258
/// specified intrinsic ID.
9259
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9260
                                SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9261
                                EVT DestVT = MVT::Other) {
9262
  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9263
  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9264
                     DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9265
}
9266

9267
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9268
/// amount.  The result has the specified value type.
9269
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9270
                           SelectionDAG &DAG, const SDLoc &dl) {
9271
  // Force LHS/RHS to be the right type.
9272
  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9273
  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9274

9275
  int Ops[16];
9276
  for (unsigned i = 0; i != 16; ++i)
9277
    Ops[i] = i + Amt;
9278
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9279
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9280
}
9281

9282
/// Do we have an efficient pattern in a .td file for this node?
9283
///
9284
/// \param V - pointer to the BuildVectorSDNode being matched
9285
/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9286
///
9287
/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9288
/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9289
/// the opposite is true (expansion is beneficial) are:
9290
/// - The node builds a vector out of integers that are not 32 or 64-bits
9291
/// - The node builds a vector out of constants
9292
/// - The node is a "load-and-splat"
9293
/// In all other cases, we will choose to keep the BUILD_VECTOR.
9294
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9295
                                            bool HasDirectMove,
9296
                                            bool HasP8Vector) {
9297
  EVT VecVT = V->getValueType(0);
9298
  bool RightType = VecVT == MVT::v2f64 ||
9299
    (HasP8Vector && VecVT == MVT::v4f32) ||
9300
    (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9301
  if (!RightType)
9302
    return false;
9303

9304
  bool IsSplat = true;
9305
  bool IsLoad = false;
9306
  SDValue Op0 = V->getOperand(0);
9307

9308
  // This function is called in a block that confirms the node is not a constant
9309
  // splat. So a constant BUILD_VECTOR here means the vector is built out of
9310
  // different constants.
9311
  if (V->isConstant())
9312
    return false;
9313
  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9314
    if (V->getOperand(i).isUndef())
9315
      return false;
9316
    // We want to expand nodes that represent load-and-splat even if the
9317
    // loaded value is a floating point truncation or conversion to int.
9318
    if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9319
        (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9320
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9321
        (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9322
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9323
        (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9324
         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9325
      IsLoad = true;
9326
    // If the operands are different or the input is not a load and has more
9327
    // uses than just this BV node, then it isn't a splat.
9328
    if (V->getOperand(i) != Op0 ||
9329
        (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9330
      IsSplat = false;
9331
  }
9332
  return !(IsSplat && IsLoad);
9333
}
9334

9335
// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9336
SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9337

9338
  SDLoc dl(Op);
9339
  SDValue Op0 = Op->getOperand(0);
9340

9341
  if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9342
      (Op.getValueType() != MVT::f128))
9343
    return SDValue();
9344

9345
  SDValue Lo = Op0.getOperand(0);
9346
  SDValue Hi = Op0.getOperand(1);
9347
  if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9348
    return SDValue();
9349

9350
  if (!Subtarget.isLittleEndian())
9351
    std::swap(Lo, Hi);
9352

9353
  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9354
}
9355

9356
static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9357
  const SDValue *InputLoad = &Op;
9358
  while (InputLoad->getOpcode() == ISD::BITCAST)
9359
    InputLoad = &InputLoad->getOperand(0);
9360
  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9361
      InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9362
    IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9363
    InputLoad = &InputLoad->getOperand(0);
9364
  }
9365
  if (InputLoad->getOpcode() != ISD::LOAD)
9366
    return nullptr;
9367
  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9368
  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9369
}
9370

9371
// Convert the argument APFloat to a single precision APFloat if there is no
9372
// loss in information during the conversion to single precision APFloat and the
9373
// resulting number is not a denormal number. Return true if successful.
9374
bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9375
  APFloat APFloatToConvert = ArgAPFloat;
9376
  bool LosesInfo = true;
9377
  APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9378
                           &LosesInfo);
9379
  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9380
  if (Success)
9381
    ArgAPFloat = APFloatToConvert;
9382
  return Success;
9383
}
9384

9385
// Bitcast the argument APInt to a double and convert it to a single precision
9386
// APFloat, bitcast the APFloat to an APInt and assign it to the original
9387
// argument if there is no loss in information during the conversion from
9388
// double to single precision APFloat and the resulting number is not a denormal
9389
// number. Return true if successful.
9390
bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9391
  double DpValue = ArgAPInt.bitsToDouble();
9392
  APFloat APFloatDp(DpValue);
9393
  bool Success = convertToNonDenormSingle(APFloatDp);
9394
  if (Success)
9395
    ArgAPInt = APFloatDp.bitcastToAPInt();
9396
  return Success;
9397
}
9398

9399
// Nondestructive check for convertTonNonDenormSingle.
9400
bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9401
  // Only convert if it loses info, since XXSPLTIDP should
9402
  // handle the other case.
9403
  APFloat APFloatToConvert = ArgAPFloat;
9404
  bool LosesInfo = true;
9405
  APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9406
                           &LosesInfo);
9407

9408
  return (!LosesInfo && !APFloatToConvert.isDenormal());
9409
}
9410

9411
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9412
                             unsigned &Opcode) {
9413
  LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9414
  if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9415
    return false;
9416

9417
  EVT Ty = Op->getValueType(0);
9418
  // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9419
  // as we cannot handle extending loads for these types.
9420
  if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9421
      ISD::isNON_EXTLoad(InputNode))
9422
    return true;
9423

9424
  EVT MemVT = InputNode->getMemoryVT();
9425
  // For v8i16 and v16i8 types, extending loads can be handled as long as the
9426
  // memory VT is the same vector element VT type.
9427
  // The loads feeding into the v8i16 and v16i8 types will be extending because
9428
  // scalar i8/i16 are not legal types.
9429
  if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9430
      (MemVT == Ty.getVectorElementType()))
9431
    return true;
9432

9433
  if (Ty == MVT::v2i64) {
9434
    // Check the extend type, when the input type is i32, and the output vector
9435
    // type is v2i64.
9436
    if (MemVT == MVT::i32) {
9437
      if (ISD::isZEXTLoad(InputNode))
9438
        Opcode = PPCISD::ZEXT_LD_SPLAT;
9439
      if (ISD::isSEXTLoad(InputNode))
9440
        Opcode = PPCISD::SEXT_LD_SPLAT;
9441
    }
9442
    return true;
9443
  }
9444
  return false;
9445
}
9446

9447
// If this is a case we can't handle, return null and let the default
9448
// expansion code take care of it.  If we CAN select this case, and if it
9449
// selects to a single instruction, return Op.  Otherwise, if we can codegen
9450
// this case more efficiently than a constant pool load, lower it to the
9451
// sequence of ops that should be used.
9452
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9453
                                             SelectionDAG &DAG) const {
9454
  SDLoc dl(Op);
9455
  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9456
  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9457

9458
  // Check if this is a splat of a constant value.
9459
  APInt APSplatBits, APSplatUndef;
9460
  unsigned SplatBitSize;
9461
  bool HasAnyUndefs;
9462
  bool BVNIsConstantSplat =
9463
      BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9464
                           HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9465

9466
  // If it is a splat of a double, check if we can shrink it to a 32 bit
9467
  // non-denormal float which when converted back to double gives us the same
9468
  // double. This is to exploit the XXSPLTIDP instruction.
9469
  // If we lose precision, we use XXSPLTI32DX.
9470
  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9471
      Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9472
    // Check the type first to short-circuit so we don't modify APSplatBits if
9473
    // this block isn't executed.
9474
    if ((Op->getValueType(0) == MVT::v2f64) &&
9475
        convertToNonDenormSingle(APSplatBits)) {
9476
      SDValue SplatNode = DAG.getNode(
9477
          PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9478
          DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9479
      return DAG.getBitcast(Op.getValueType(), SplatNode);
9480
    } else {
9481
      // We may lose precision, so we have to use XXSPLTI32DX.
9482

9483
      uint32_t Hi =
9484
          (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9485
      uint32_t Lo =
9486
          (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9487
      SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9488

9489
      if (!Hi || !Lo)
9490
        // If either load is 0, then we should generate XXLXOR to set to 0.
9491
        SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9492

9493
      if (Hi)
9494
        SplatNode = DAG.getNode(
9495
            PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9496
            DAG.getTargetConstant(0, dl, MVT::i32),
9497
            DAG.getTargetConstant(Hi, dl, MVT::i32));
9498

9499
      if (Lo)
9500
        SplatNode =
9501
            DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9502
                        DAG.getTargetConstant(1, dl, MVT::i32),
9503
                        DAG.getTargetConstant(Lo, dl, MVT::i32));
9504

9505
      return DAG.getBitcast(Op.getValueType(), SplatNode);
9506
    }
9507
  }
9508

9509
  if (!BVNIsConstantSplat || SplatBitSize > 32) {
9510
    unsigned NewOpcode = PPCISD::LD_SPLAT;
9511

9512
    // Handle load-and-splat patterns as we have instructions that will do this
9513
    // in one go.
9514
    if (DAG.isSplatValue(Op, true) &&
9515
        isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9516
      const SDValue *InputLoad = &Op.getOperand(0);
9517
      LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9518

9519
      // If the input load is an extending load, it will be an i32 -> i64
9520
      // extending load and isValidSplatLoad() will update NewOpcode.
9521
      unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9522
      unsigned ElementSize =
9523
          MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9524

9525
      assert(((ElementSize == 2 * MemorySize)
9526
                  ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9527
                     NewOpcode == PPCISD::SEXT_LD_SPLAT)
9528
                  : (NewOpcode == PPCISD::LD_SPLAT)) &&
9529
             "Unmatched element size and opcode!\n");
9530

9531
      // Checking for a single use of this load, we have to check for vector
9532
      // width (128 bits) / ElementSize uses (since each operand of the
9533
      // BUILD_VECTOR is a separate use of the value.
9534
      unsigned NumUsesOfInputLD = 128 / ElementSize;
9535
      for (SDValue BVInOp : Op->ops())
9536
        if (BVInOp.isUndef())
9537
          NumUsesOfInputLD--;
9538

9539
      // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9540
      // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9541
      // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9542
      // 15", but function IsValidSplatLoad() now will only return true when
9543
      // the data at index 0 is not nullptr. So we will not get into trouble for
9544
      // these cases.
9545
      //
9546
      // case 1 - lfiwzx/lfiwax
9547
      // 1.1: load result is i32 and is sign/zero extend to i64;
9548
      // 1.2: build a v2i64 vector type with above loaded value;
9549
      // 1.3: the vector has only one value at index 0, others are all undef;
9550
      // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9551
      if (NumUsesOfInputLD == 1 &&
9552
          (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9553
           !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9554
           Subtarget.hasLFIWAX()))
9555
        return SDValue();
9556

9557
      // case 2 - lxvr[hb]x
9558
      // 2.1: load result is at most i16;
9559
      // 2.2: build a vector with above loaded value;
9560
      // 2.3: the vector has only one value at index 0, others are all undef;
9561
      // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9562
      if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9563
          Subtarget.isISA3_1() && ElementSize <= 16)
9564
        return SDValue();
9565

9566
      assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9567
      if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9568
          Subtarget.hasVSX()) {
9569
        SDValue Ops[] = {
9570
          LD->getChain(),    // Chain
9571
          LD->getBasePtr(),  // Ptr
9572
          DAG.getValueType(Op.getValueType()) // VT
9573
        };
9574
        SDValue LdSplt = DAG.getMemIntrinsicNode(
9575
            NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9576
            LD->getMemoryVT(), LD->getMemOperand());
9577
        // Replace all uses of the output chain of the original load with the
9578
        // output chain of the new load.
9579
        DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9580
                                      LdSplt.getValue(1));
9581
        return LdSplt;
9582
      }
9583
    }
9584

9585
    // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9586
    // 32-bits can be lowered to VSX instructions under certain conditions.
9587
    // Without VSX, there is no pattern more efficient than expanding the node.
9588
    if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9589
        haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9590
                                        Subtarget.hasP8Vector()))
9591
      return Op;
9592
    return SDValue();
9593
  }
9594

9595
  uint64_t SplatBits = APSplatBits.getZExtValue();
9596
  uint64_t SplatUndef = APSplatUndef.getZExtValue();
9597
  unsigned SplatSize = SplatBitSize / 8;
9598

9599
  // First, handle single instruction cases.
9600

9601
  // All zeros?
9602
  if (SplatBits == 0) {
9603
    // Canonicalize all zero vectors to be v4i32.
9604
    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9605
      SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9606
      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9607
    }
9608
    return Op;
9609
  }
9610

9611
  // We have XXSPLTIW for constant splats four bytes wide.
9612
  // Given vector length is a multiple of 4, 2-byte splats can be replaced
9613
  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9614
  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9615
  // turned into a 4-byte splat of 0xABABABAB.
9616
  if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9617
    return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9618
                                  Op.getValueType(), DAG, dl);
9619

9620
  if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9621
    return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9622
                                  dl);
9623

9624
  // We have XXSPLTIB for constant splats one byte wide.
9625
  if (Subtarget.hasP9Vector() && SplatSize == 1)
9626
    return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9627
                                  dl);
9628

9629
  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9630
  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9631
                    (32-SplatBitSize));
9632
  if (SextVal >= -16 && SextVal <= 15)
9633
    return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9634
                                  dl);
9635

9636
  // Two instruction sequences.
9637

9638
  // If this value is in the range [-32,30] and is even, use:
9639
  //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9640
  // If this value is in the range [17,31] and is odd, use:
9641
  //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9642
  // If this value is in the range [-31,-17] and is odd, use:
9643
  //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9644
  // Note the last two are three-instruction sequences.
9645
  if (SextVal >= -32 && SextVal <= 31) {
9646
    // To avoid having these optimizations undone by constant folding,
9647
    // we convert to a pseudo that will be expanded later into one of
9648
    // the above forms.
9649
    SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9650
    EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9651
              (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9652
    SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9653
    SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9654
    if (VT == Op.getValueType())
9655
      return RetVal;
9656
    else
9657
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9658
  }
9659

9660
  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9661
  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9662
  // for fneg/fabs.
9663
  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9664
    // Make -1 and vspltisw -1:
9665
    SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9666

9667
    // Make the VSLW intrinsic, computing 0x8000_0000.
9668
    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9669
                                   OnesV, DAG, dl);
9670

9671
    // xor by OnesV to invert it.
9672
    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9673
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9674
  }
9675

9676
  // Check to see if this is a wide variety of vsplti*, binop self cases.
9677
  static const signed char SplatCsts[] = {
9678
    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9679
    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9680
  };
9681

9682
  for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9683
    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9684
    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9685
    int i = SplatCsts[idx];
9686

9687
    // Figure out what shift amount will be used by altivec if shifted by i in
9688
    // this splat size.
9689
    unsigned TypeShiftAmt = i & (SplatBitSize-1);
9690

9691
    // vsplti + shl self.
9692
    if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9693
      SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9694
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
9695
        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9696
        Intrinsic::ppc_altivec_vslw
9697
      };
9698
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9699
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9700
    }
9701

9702
    // vsplti + srl self.
9703
    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9704
      SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9705
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
9706
        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9707
        Intrinsic::ppc_altivec_vsrw
9708
      };
9709
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9710
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9711
    }
9712

9713
    // vsplti + rol self.
9714
    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9715
                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9716
      SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9717
      static const unsigned IIDs[] = { // Intrinsic to use for each size.
9718
        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9719
        Intrinsic::ppc_altivec_vrlw
9720
      };
9721
      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9722
      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9723
    }
9724

9725
    // t = vsplti c, result = vsldoi t, t, 1
9726
    if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9727
      SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9728
      unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9729
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9730
    }
9731
    // t = vsplti c, result = vsldoi t, t, 2
9732
    if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9733
      SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9734
      unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9735
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9736
    }
9737
    // t = vsplti c, result = vsldoi t, t, 3
9738
    if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9739
      SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9740
      unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9741
      return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9742
    }
9743
  }
9744

9745
  return SDValue();
9746
}
9747

9748
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9749
/// the specified operations to build the shuffle.
9750
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9751
                                      SDValue RHS, SelectionDAG &DAG,
9752
                                      const SDLoc &dl) {
9753
  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9754
  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9755
  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9756

9757
  enum {
9758
    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9759
    OP_VMRGHW,
9760
    OP_VMRGLW,
9761
    OP_VSPLTISW0,
9762
    OP_VSPLTISW1,
9763
    OP_VSPLTISW2,
9764
    OP_VSPLTISW3,
9765
    OP_VSLDOI4,
9766
    OP_VSLDOI8,
9767
    OP_VSLDOI12
9768
  };
9769

9770
  if (OpNum == OP_COPY) {
9771
    if (LHSID == (1*9+2)*9+3) return LHS;
9772
    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9773
    return RHS;
9774
  }
9775

9776
  SDValue OpLHS, OpRHS;
9777
  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9778
  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9779

9780
  int ShufIdxs[16];
9781
  switch (OpNum) {
9782
  default: llvm_unreachable("Unknown i32 permute!");
9783
  case OP_VMRGHW:
9784
    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9785
    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9786
    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9787
    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9788
    break;
9789
  case OP_VMRGLW:
9790
    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9791
    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9792
    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9793
    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9794
    break;
9795
  case OP_VSPLTISW0:
9796
    for (unsigned i = 0; i != 16; ++i)
9797
      ShufIdxs[i] = (i&3)+0;
9798
    break;
9799
  case OP_VSPLTISW1:
9800
    for (unsigned i = 0; i != 16; ++i)
9801
      ShufIdxs[i] = (i&3)+4;
9802
    break;
9803
  case OP_VSPLTISW2:
9804
    for (unsigned i = 0; i != 16; ++i)
9805
      ShufIdxs[i] = (i&3)+8;
9806
    break;
9807
  case OP_VSPLTISW3:
9808
    for (unsigned i = 0; i != 16; ++i)
9809
      ShufIdxs[i] = (i&3)+12;
9810
    break;
9811
  case OP_VSLDOI4:
9812
    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9813
  case OP_VSLDOI8:
9814
    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9815
  case OP_VSLDOI12:
9816
    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9817
  }
9818
  EVT VT = OpLHS.getValueType();
9819
  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9820
  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9821
  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9822
  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9823
}
9824

9825
/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9826
/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9827
/// SDValue.
9828
SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9829
                                           SelectionDAG &DAG) const {
9830
  const unsigned BytesInVector = 16;
9831
  bool IsLE = Subtarget.isLittleEndian();
9832
  SDLoc dl(N);
9833
  SDValue V1 = N->getOperand(0);
9834
  SDValue V2 = N->getOperand(1);
9835
  unsigned ShiftElts = 0, InsertAtByte = 0;
9836
  bool Swap = false;
9837

9838
  // Shifts required to get the byte we want at element 7.
9839
  unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9840
                                   0, 15, 14, 13, 12, 11, 10, 9};
9841
  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9842
                                1, 2,  3,  4,  5,  6,  7,  8};
9843

9844
  ArrayRef<int> Mask = N->getMask();
9845
  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9846

9847
  // For each mask element, find out if we're just inserting something
9848
  // from V2 into V1 or vice versa.
9849
  // Possible permutations inserting an element from V2 into V1:
9850
  //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9851
  //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9852
  //   ...
9853
  //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9854
  // Inserting from V1 into V2 will be similar, except mask range will be
9855
  // [16,31].
9856

9857
  bool FoundCandidate = false;
9858
  // If both vector operands for the shuffle are the same vector, the mask
9859
  // will contain only elements from the first one and the second one will be
9860
  // undef.
9861
  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9862
  // Go through the mask of half-words to find an element that's being moved
9863
  // from one vector to the other.
9864
  for (unsigned i = 0; i < BytesInVector; ++i) {
9865
    unsigned CurrentElement = Mask[i];
9866
    // If 2nd operand is undefined, we should only look for element 7 in the
9867
    // Mask.
9868
    if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9869
      continue;
9870

9871
    bool OtherElementsInOrder = true;
9872
    // Examine the other elements in the Mask to see if they're in original
9873
    // order.
9874
    for (unsigned j = 0; j < BytesInVector; ++j) {
9875
      if (j == i)
9876
        continue;
9877
      // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9878
      // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9879
      // in which we always assume we're always picking from the 1st operand.
9880
      int MaskOffset =
9881
          (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9882
      if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9883
        OtherElementsInOrder = false;
9884
        break;
9885
      }
9886
    }
9887
    // If other elements are in original order, we record the number of shifts
9888
    // we need to get the element we want into element 7. Also record which byte
9889
    // in the vector we should insert into.
9890
    if (OtherElementsInOrder) {
9891
      // If 2nd operand is undefined, we assume no shifts and no swapping.
9892
      if (V2.isUndef()) {
9893
        ShiftElts = 0;
9894
        Swap = false;
9895
      } else {
9896
        // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9897
        ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9898
                         : BigEndianShifts[CurrentElement & 0xF];
9899
        Swap = CurrentElement < BytesInVector;
9900
      }
9901
      InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9902
      FoundCandidate = true;
9903
      break;
9904
    }
9905
  }
9906

9907
  if (!FoundCandidate)
9908
    return SDValue();
9909

9910
  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9911
  // optionally with VECSHL if shift is required.
9912
  if (Swap)
9913
    std::swap(V1, V2);
9914
  if (V2.isUndef())
9915
    V2 = V1;
9916
  if (ShiftElts) {
9917
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9918
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
9919
    return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9920
                       DAG.getConstant(InsertAtByte, dl, MVT::i32));
9921
  }
9922
  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9923
                     DAG.getConstant(InsertAtByte, dl, MVT::i32));
9924
}
9925

9926
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9927
/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9928
/// SDValue.
9929
SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9930
                                           SelectionDAG &DAG) const {
9931
  const unsigned NumHalfWords = 8;
9932
  const unsigned BytesInVector = NumHalfWords * 2;
9933
  // Check that the shuffle is on half-words.
9934
  if (!isNByteElemShuffleMask(N, 2, 1))
9935
    return SDValue();
9936

9937
  bool IsLE = Subtarget.isLittleEndian();
9938
  SDLoc dl(N);
9939
  SDValue V1 = N->getOperand(0);
9940
  SDValue V2 = N->getOperand(1);
9941
  unsigned ShiftElts = 0, InsertAtByte = 0;
9942
  bool Swap = false;
9943

9944
  // Shifts required to get the half-word we want at element 3.
9945
  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9946
  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9947

9948
  uint32_t Mask = 0;
9949
  uint32_t OriginalOrderLow = 0x1234567;
9950
  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9951
  // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9952
  // 32-bit space, only need 4-bit nibbles per element.
9953
  for (unsigned i = 0; i < NumHalfWords; ++i) {
9954
    unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9955
    Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9956
  }
9957

9958
  // For each mask element, find out if we're just inserting something
9959
  // from V2 into V1 or vice versa.  Possible permutations inserting an element
9960
  // from V2 into V1:
9961
  //   X, 1, 2, 3, 4, 5, 6, 7
9962
  //   0, X, 2, 3, 4, 5, 6, 7
9963
  //   0, 1, X, 3, 4, 5, 6, 7
9964
  //   0, 1, 2, X, 4, 5, 6, 7
9965
  //   0, 1, 2, 3, X, 5, 6, 7
9966
  //   0, 1, 2, 3, 4, X, 6, 7
9967
  //   0, 1, 2, 3, 4, 5, X, 7
9968
  //   0, 1, 2, 3, 4, 5, 6, X
9969
  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9970

9971
  bool FoundCandidate = false;
9972
  // Go through the mask of half-words to find an element that's being moved
9973
  // from one vector to the other.
9974
  for (unsigned i = 0; i < NumHalfWords; ++i) {
9975
    unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9976
    uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9977
    uint32_t MaskOtherElts = ~(0xF << MaskShift);
9978
    uint32_t TargetOrder = 0x0;
9979

9980
    // If both vector operands for the shuffle are the same vector, the mask
9981
    // will contain only elements from the first one and the second one will be
9982
    // undef.
9983
    if (V2.isUndef()) {
9984
      ShiftElts = 0;
9985
      unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9986
      TargetOrder = OriginalOrderLow;
9987
      Swap = false;
9988
      // Skip if not the correct element or mask of other elements don't equal
9989
      // to our expected order.
9990
      if (MaskOneElt == VINSERTHSrcElem &&
9991
          (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9992
        InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9993
        FoundCandidate = true;
9994
        break;
9995
      }
9996
    } else { // If both operands are defined.
9997
      // Target order is [8,15] if the current mask is between [0,7].
9998
      TargetOrder =
9999
          (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10000
      // Skip if mask of other elements don't equal our expected order.
10001
      if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10002
        // We only need the last 3 bits for the number of shifts.
10003
        ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10004
                         : BigEndianShifts[MaskOneElt & 0x7];
10005
        InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10006
        Swap = MaskOneElt < NumHalfWords;
10007
        FoundCandidate = true;
10008
        break;
10009
      }
10010
    }
10011
  }
10012

10013
  if (!FoundCandidate)
10014
    return SDValue();
10015

10016
  // Candidate found, construct the proper SDAG sequence with VINSERTH,
10017
  // optionally with VECSHL if shift is required.
10018
  if (Swap)
10019
    std::swap(V1, V2);
10020
  if (V2.isUndef())
10021
    V2 = V1;
10022
  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10023
  if (ShiftElts) {
10024
    // Double ShiftElts because we're left shifting on v16i8 type.
10025
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10026
                              DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10027
    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10028
    SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10029
                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
10030
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10031
  }
10032
  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10033
  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10034
                            DAG.getConstant(InsertAtByte, dl, MVT::i32));
10035
  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10036
}
10037

10038
/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10039
/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10040
/// return the default SDValue.
10041
SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10042
                                              SelectionDAG &DAG) const {
10043
  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10044
  // to v16i8. Peek through the bitcasts to get the actual operands.
10045
  SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
10046
  SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
10047

10048
  auto ShuffleMask = SVN->getMask();
10049
  SDValue VecShuffle(SVN, 0);
10050
  SDLoc DL(SVN);
10051

10052
  // Check that we have a four byte shuffle.
10053
  if (!isNByteElemShuffleMask(SVN, 4, 1))
10054
    return SDValue();
10055

10056
  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10057
  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10058
    std::swap(LHS, RHS);
10059
    VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN));
10060
    ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10061
    if (!CommutedSV)
10062
      return SDValue();
10063
    ShuffleMask = CommutedSV->getMask();
10064
  }
10065

10066
  // Ensure that the RHS is a vector of constants.
10067
  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10068
  if (!BVN)
10069
    return SDValue();
10070

10071
  // Check if RHS is a splat of 4-bytes (or smaller).
10072
  APInt APSplatValue, APSplatUndef;
10073
  unsigned SplatBitSize;
10074
  bool HasAnyUndefs;
10075
  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10076
                            HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10077
      SplatBitSize > 32)
10078
    return SDValue();
10079

10080
  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10081
  // The instruction splats a constant C into two words of the source vector
10082
  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10083
  // Thus we check that the shuffle mask is the equivalent  of
10084
  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10085
  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10086
  // within each word are consecutive, so we only need to check the first byte.
10087
  SDValue Index;
10088
  bool IsLE = Subtarget.isLittleEndian();
10089
  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10090
      (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10091
       ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10092
    Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10093
  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10094
           (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10095
            ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10096
    Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10097
  else
10098
    return SDValue();
10099

10100
  // If the splat is narrower than 32-bits, we need to get the 32-bit value
10101
  // for XXSPLTI32DX.
10102
  unsigned SplatVal = APSplatValue.getZExtValue();
10103
  for (; SplatBitSize < 32; SplatBitSize <<= 1)
10104
    SplatVal |= (SplatVal << SplatBitSize);
10105

10106
  SDValue SplatNode = DAG.getNode(
10107
      PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10108
      Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10109
  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10110
}
10111

10112
/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10113
/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10114
/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10115
/// i.e (or (shl x, C1), (srl x, 128-C1)).
10116
SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10117
  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10118
  assert(Op.getValueType() == MVT::v1i128 &&
10119
         "Only set v1i128 as custom, other type shouldn't reach here!");
10120
  SDLoc dl(Op);
10121
  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10122
  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10123
  unsigned SHLAmt = N1.getConstantOperandVal(0);
10124
  if (SHLAmt % 8 == 0) {
10125
    std::array<int, 16> Mask;
10126
    std::iota(Mask.begin(), Mask.end(), 0);
10127
    std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10128
    if (SDValue Shuffle =
10129
            DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10130
                                 DAG.getUNDEF(MVT::v16i8), Mask))
10131
      return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10132
  }
10133
  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10134
  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10135
                              DAG.getConstant(SHLAmt, dl, MVT::i32));
10136
  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10137
                              DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10138
  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10139
  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10140
}
10141

10142
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
10143
/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
10144
/// return the code it can be lowered into.  Worst case, it can always be
10145
/// lowered into a vperm.
10146
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10147
                                               SelectionDAG &DAG) const {
10148
  SDLoc dl(Op);
10149
  SDValue V1 = Op.getOperand(0);
10150
  SDValue V2 = Op.getOperand(1);
10151
  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10152

10153
  // Any nodes that were combined in the target-independent combiner prior
10154
  // to vector legalization will not be sent to the target combine. Try to
10155
  // combine it here.
10156
  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10157
    if (!isa<ShuffleVectorSDNode>(NewShuffle))
10158
      return NewShuffle;
10159
    Op = NewShuffle;
10160
    SVOp = cast<ShuffleVectorSDNode>(Op);
10161
    V1 = Op.getOperand(0);
10162
    V2 = Op.getOperand(1);
10163
  }
10164
  EVT VT = Op.getValueType();
10165
  bool isLittleEndian = Subtarget.isLittleEndian();
10166

10167
  unsigned ShiftElts, InsertAtByte;
10168
  bool Swap = false;
10169

10170
  // If this is a load-and-splat, we can do that with a single instruction
10171
  // in some cases. However if the load has multiple uses, we don't want to
10172
  // combine it because that will just produce multiple loads.
10173
  bool IsPermutedLoad = false;
10174
  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10175
  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10176
      (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10177
      InputLoad->hasOneUse()) {
10178
    bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10179
    int SplatIdx =
10180
      PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10181

10182
    // The splat index for permuted loads will be in the left half of the vector
10183
    // which is strictly wider than the loaded value by 8 bytes. So we need to
10184
    // adjust the splat index to point to the correct address in memory.
10185
    if (IsPermutedLoad) {
10186
      assert((isLittleEndian || IsFourByte) &&
10187
             "Unexpected size for permuted load on big endian target");
10188
      SplatIdx += IsFourByte ? 2 : 1;
10189
      assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10190
             "Splat of a value outside of the loaded memory");
10191
    }
10192

10193
    LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10194
    // For 4-byte load-and-splat, we need Power9.
10195
    if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10196
      uint64_t Offset = 0;
10197
      if (IsFourByte)
10198
        Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10199
      else
10200
        Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10201

10202
      // If the width of the load is the same as the width of the splat,
10203
      // loading with an offset would load the wrong memory.
10204
      if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10205
        Offset = 0;
10206

10207
      SDValue BasePtr = LD->getBasePtr();
10208
      if (Offset != 0)
10209
        BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
10210
                              BasePtr, DAG.getIntPtrConstant(Offset, dl));
10211
      SDValue Ops[] = {
10212
        LD->getChain(),    // Chain
10213
        BasePtr,           // BasePtr
10214
        DAG.getValueType(Op.getValueType()) // VT
10215
      };
10216
      SDVTList VTL =
10217
        DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10218
      SDValue LdSplt =
10219
        DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10220
                                Ops, LD->getMemoryVT(), LD->getMemOperand());
10221
      DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10222
      if (LdSplt.getValueType() != SVOp->getValueType(0))
10223
        LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10224
      return LdSplt;
10225
    }
10226
  }
10227

10228
  // All v2i64 and v2f64 shuffles are legal
10229
  if (VT == MVT::v2i64 || VT == MVT::v2f64)
10230
    return Op;
10231

10232
  if (Subtarget.hasP9Vector() &&
10233
      PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10234
                           isLittleEndian)) {
10235
    if (V2.isUndef())
10236
      V2 = V1;
10237
    else if (Swap)
10238
      std::swap(V1, V2);
10239
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10240
    SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10241
    if (ShiftElts) {
10242
      SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10243
                                DAG.getConstant(ShiftElts, dl, MVT::i32));
10244
      SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10245
                                DAG.getConstant(InsertAtByte, dl, MVT::i32));
10246
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10247
    }
10248
    SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10249
                              DAG.getConstant(InsertAtByte, dl, MVT::i32));
10250
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10251
  }
10252

10253
  if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10254
    SDValue SplatInsertNode;
10255
    if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10256
      return SplatInsertNode;
10257
  }
10258

10259
  if (Subtarget.hasP9Altivec()) {
10260
    SDValue NewISDNode;
10261
    if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10262
      return NewISDNode;
10263

10264
    if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10265
      return NewISDNode;
10266
  }
10267

10268
  if (Subtarget.hasVSX() &&
10269
      PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10270
    if (Swap)
10271
      std::swap(V1, V2);
10272
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10273
    SDValue Conv2 =
10274
        DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10275

10276
    SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10277
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
10278
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10279
  }
10280

10281
  if (Subtarget.hasVSX() &&
10282
    PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10283
    if (Swap)
10284
      std::swap(V1, V2);
10285
    SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10286
    SDValue Conv2 =
10287
        DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10288

10289
    SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10290
                              DAG.getConstant(ShiftElts, dl, MVT::i32));
10291
    return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10292
  }
10293

10294
  if (Subtarget.hasP9Vector()) {
10295
     if (PPC::isXXBRHShuffleMask(SVOp)) {
10296
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10297
      SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10298
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10299
    } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10300
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10301
      SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10302
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10303
    } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10304
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10305
      SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10306
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10307
    } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10308
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10309
      SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10310
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10311
    }
10312
  }
10313

10314
  if (Subtarget.hasVSX()) {
10315
    if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10316
      int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10317

10318
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10319
      SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10320
                                  DAG.getConstant(SplatIdx, dl, MVT::i32));
10321
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10322
    }
10323

10324
    // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10325
    if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10326
      SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10327
      SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10328
      return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10329
    }
10330
  }
10331

10332
  // Cases that are handled by instructions that take permute immediates
10333
  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10334
  // selected by the instruction selector.
10335
  if (V2.isUndef()) {
10336
    if (PPC::isSplatShuffleMask(SVOp, 1) ||
10337
        PPC::isSplatShuffleMask(SVOp, 2) ||
10338
        PPC::isSplatShuffleMask(SVOp, 4) ||
10339
        PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10340
        PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10341
        PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10342
        PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10343
        PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10344
        PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10345
        PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10346
        PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10347
        PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10348
        (Subtarget.hasP8Altivec() && (
10349
         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10350
         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10351
         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10352
      return Op;
10353
    }
10354
  }
10355

10356
  // Altivec has a variety of "shuffle immediates" that take two vector inputs
10357
  // and produce a fixed permutation.  If any of these match, do not lower to
10358
  // VPERM.
10359
  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10360
  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10361
      PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10362
      PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10363
      PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10364
      PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10365
      PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10366
      PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10367
      PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10368
      PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10369
      (Subtarget.hasP8Altivec() && (
10370
       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10371
       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10372
       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10373
    return Op;
10374

10375
  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10376
  // perfect shuffle table to emit an optimal matching sequence.
10377
  ArrayRef<int> PermMask = SVOp->getMask();
10378

10379
  if (!DisablePerfectShuffle && !isLittleEndian) {
10380
    unsigned PFIndexes[4];
10381
    bool isFourElementShuffle = true;
10382
    for (unsigned i = 0; i != 4 && isFourElementShuffle;
10383
         ++i) {                           // Element number
10384
      unsigned EltNo = 8;                 // Start out undef.
10385
      for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10386
        if (PermMask[i * 4 + j] < 0)
10387
          continue; // Undef, ignore it.
10388

10389
        unsigned ByteSource = PermMask[i * 4 + j];
10390
        if ((ByteSource & 3) != j) {
10391
          isFourElementShuffle = false;
10392
          break;
10393
        }
10394

10395
        if (EltNo == 8) {
10396
          EltNo = ByteSource / 4;
10397
        } else if (EltNo != ByteSource / 4) {
10398
          isFourElementShuffle = false;
10399
          break;
10400
        }
10401
      }
10402
      PFIndexes[i] = EltNo;
10403
    }
10404

10405
    // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10406
    // perfect shuffle vector to determine if it is cost effective to do this as
10407
    // discrete instructions, or whether we should use a vperm.
10408
    // For now, we skip this for little endian until such time as we have a
10409
    // little-endian perfect shuffle table.
10410
    if (isFourElementShuffle) {
10411
      // Compute the index in the perfect shuffle table.
10412
      unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10413
                              PFIndexes[2] * 9 + PFIndexes[3];
10414

10415
      unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10416
      unsigned Cost = (PFEntry >> 30);
10417

10418
      // Determining when to avoid vperm is tricky.  Many things affect the cost
10419
      // of vperm, particularly how many times the perm mask needs to be
10420
      // computed. For example, if the perm mask can be hoisted out of a loop or
10421
      // is already used (perhaps because there are multiple permutes with the
10422
      // same shuffle mask?) the vperm has a cost of 1.  OTOH, hoisting the
10423
      // permute mask out of the loop requires an extra register.
10424
      //
10425
      // As a compromise, we only emit discrete instructions if the shuffle can
10426
      // be generated in 3 or fewer operations.  When we have loop information
10427
      // available, if this block is within a loop, we should avoid using vperm
10428
      // for 3-operation perms and use a constant pool load instead.
10429
      if (Cost < 3)
10430
        return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10431
    }
10432
  }
10433

10434
  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10435
  // vector that will get spilled to the constant pool.
10436
  if (V2.isUndef()) V2 = V1;
10437

10438
  return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10439
}
10440

10441
SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10442
                                      ArrayRef<int> PermMask, EVT VT,
10443
                                      SDValue V1, SDValue V2) const {
10444
  unsigned Opcode = PPCISD::VPERM;
10445
  EVT ValType = V1.getValueType();
10446
  SDLoc dl(Op);
10447
  bool NeedSwap = false;
10448
  bool isLittleEndian = Subtarget.isLittleEndian();
10449
  bool isPPC64 = Subtarget.isPPC64();
10450

10451
  if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10452
      (V1->hasOneUse() || V2->hasOneUse())) {
10453
    LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10454
                         "XXPERM instead\n");
10455
    Opcode = PPCISD::XXPERM;
10456

10457
    // The second input to XXPERM is also an output so if the second input has
10458
    // multiple uses then copying is necessary, as a result we want the
10459
    // single-use operand to be used as the second input to prevent copying.
10460
    if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10461
        (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10462
      std::swap(V1, V2);
10463
      NeedSwap = !NeedSwap;
10464
    }
10465
  }
10466

10467
  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10468
  // that it is in input element units, not in bytes.  Convert now.
10469

10470
  // For little endian, the order of the input vectors is reversed, and
10471
  // the permutation mask is complemented with respect to 31.  This is
10472
  // necessary to produce proper semantics with the big-endian-based vperm
10473
  // instruction.
10474
  EVT EltVT = V1.getValueType().getVectorElementType();
10475
  unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10476

10477
  bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10478
  bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10479

10480
  /*
10481
  Vectors will be appended like so: [ V1 | v2 ]
10482
  XXSWAPD on V1:
10483
  [   A   |   B   |   C   |   D   ] -> [   C   |   D   |   A   |   B   ]
10484
     0-3     4-7     8-11   12-15         0-3     4-7     8-11   12-15
10485
  i.e.  index of A, B += 8, and index of C, D -= 8.
10486
  XXSWAPD on V2:
10487
  [   E   |   F   |   G   |   H   ] -> [   G   |   H   |   E   |   F   ]
10488
    16-19   20-23   24-27   28-31        16-19   20-23   24-27   28-31
10489
  i.e.  index of E, F += 8, index of G, H -= 8
10490
  Swap V1 and V2:
10491
  [   V1   |   V2  ] -> [   V2   |   V1   ]
10492
     0-15     16-31        0-15     16-31
10493
  i.e.  index of V1 += 16, index of V2 -= 16
10494
  */
10495

10496
  SmallVector<SDValue, 16> ResultMask;
10497
  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10498
    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10499

10500
    if (V1HasXXSWAPD) {
10501
      if (SrcElt < 8)
10502
        SrcElt += 8;
10503
      else if (SrcElt < 16)
10504
        SrcElt -= 8;
10505
    }
10506
    if (V2HasXXSWAPD) {
10507
      if (SrcElt > 23)
10508
        SrcElt -= 8;
10509
      else if (SrcElt > 15)
10510
        SrcElt += 8;
10511
    }
10512
    if (NeedSwap) {
10513
      if (SrcElt < 16)
10514
        SrcElt += 16;
10515
      else
10516
        SrcElt -= 16;
10517
    }
10518
    for (unsigned j = 0; j != BytesPerElement; ++j)
10519
      if (isLittleEndian)
10520
        ResultMask.push_back(
10521
            DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10522
      else
10523
        ResultMask.push_back(
10524
            DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10525
  }
10526

10527
  if (V1HasXXSWAPD) {
10528
    dl = SDLoc(V1->getOperand(0));
10529
    V1 = V1->getOperand(0)->getOperand(1);
10530
  }
10531
  if (V2HasXXSWAPD) {
10532
    dl = SDLoc(V2->getOperand(0));
10533
    V2 = V2->getOperand(0)->getOperand(1);
10534
  }
10535

10536
  if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10537
    if (ValType != MVT::v2f64)
10538
      V1 = DAG.getBitcast(MVT::v2f64, V1);
10539
    if (V2.getValueType() != MVT::v2f64)
10540
      V2 = DAG.getBitcast(MVT::v2f64, V2);
10541
  }
10542

10543
  ShufflesHandledWithVPERM++;
10544
  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10545
  LLVM_DEBUG({
10546
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10547
    if (Opcode == PPCISD::XXPERM) {
10548
      dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10549
    } else {
10550
      dbgs() << "Emitting a VPERM for the following shuffle:\n";
10551
    }
10552
    SVOp->dump();
10553
    dbgs() << "With the following permute control vector:\n";
10554
    VPermMask.dump();
10555
  });
10556

10557
  if (Opcode == PPCISD::XXPERM)
10558
    VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10559

10560
  // Only need to place items backwards in LE,
10561
  // the mask was properly calculated.
10562
  if (isLittleEndian)
10563
    std::swap(V1, V2);
10564

10565
  SDValue VPERMNode =
10566
      DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10567

10568
  VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10569
  return VPERMNode;
10570
}
10571

10572
/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10573
/// vector comparison.  If it is, return true and fill in Opc/isDot with
10574
/// information about the intrinsic.
10575
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10576
                                 bool &isDot, const PPCSubtarget &Subtarget) {
10577
  unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10578
  CompareOpc = -1;
10579
  isDot = false;
10580
  switch (IntrinsicID) {
10581
  default:
10582
    return false;
10583
  // Comparison predicates.
10584
  case Intrinsic::ppc_altivec_vcmpbfp_p:
10585
    CompareOpc = 966;
10586
    isDot = true;
10587
    break;
10588
  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10589
    CompareOpc = 198;
10590
    isDot = true;
10591
    break;
10592
  case Intrinsic::ppc_altivec_vcmpequb_p:
10593
    CompareOpc = 6;
10594
    isDot = true;
10595
    break;
10596
  case Intrinsic::ppc_altivec_vcmpequh_p:
10597
    CompareOpc = 70;
10598
    isDot = true;
10599
    break;
10600
  case Intrinsic::ppc_altivec_vcmpequw_p:
10601
    CompareOpc = 134;
10602
    isDot = true;
10603
    break;
10604
  case Intrinsic::ppc_altivec_vcmpequd_p:
10605
    if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10606
      CompareOpc = 199;
10607
      isDot = true;
10608
    } else
10609
      return false;
10610
    break;
10611
  case Intrinsic::ppc_altivec_vcmpneb_p:
10612
  case Intrinsic::ppc_altivec_vcmpneh_p:
10613
  case Intrinsic::ppc_altivec_vcmpnew_p:
10614
  case Intrinsic::ppc_altivec_vcmpnezb_p:
10615
  case Intrinsic::ppc_altivec_vcmpnezh_p:
10616
  case Intrinsic::ppc_altivec_vcmpnezw_p:
10617
    if (Subtarget.hasP9Altivec()) {
10618
      switch (IntrinsicID) {
10619
      default:
10620
        llvm_unreachable("Unknown comparison intrinsic.");
10621
      case Intrinsic::ppc_altivec_vcmpneb_p:
10622
        CompareOpc = 7;
10623
        break;
10624
      case Intrinsic::ppc_altivec_vcmpneh_p:
10625
        CompareOpc = 71;
10626
        break;
10627
      case Intrinsic::ppc_altivec_vcmpnew_p:
10628
        CompareOpc = 135;
10629
        break;
10630
      case Intrinsic::ppc_altivec_vcmpnezb_p:
10631
        CompareOpc = 263;
10632
        break;
10633
      case Intrinsic::ppc_altivec_vcmpnezh_p:
10634
        CompareOpc = 327;
10635
        break;
10636
      case Intrinsic::ppc_altivec_vcmpnezw_p:
10637
        CompareOpc = 391;
10638
        break;
10639
      }
10640
      isDot = true;
10641
    } else
10642
      return false;
10643
    break;
10644
  case Intrinsic::ppc_altivec_vcmpgefp_p:
10645
    CompareOpc = 454;
10646
    isDot = true;
10647
    break;
10648
  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10649
    CompareOpc = 710;
10650
    isDot = true;
10651
    break;
10652
  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10653
    CompareOpc = 774;
10654
    isDot = true;
10655
    break;
10656
  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10657
    CompareOpc = 838;
10658
    isDot = true;
10659
    break;
10660
  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10661
    CompareOpc = 902;
10662
    isDot = true;
10663
    break;
10664
  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10665
    if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10666
      CompareOpc = 967;
10667
      isDot = true;
10668
    } else
10669
      return false;
10670
    break;
10671
  case Intrinsic::ppc_altivec_vcmpgtub_p:
10672
    CompareOpc = 518;
10673
    isDot = true;
10674
    break;
10675
  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10676
    CompareOpc = 582;
10677
    isDot = true;
10678
    break;
10679
  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10680
    CompareOpc = 646;
10681
    isDot = true;
10682
    break;
10683
  case Intrinsic::ppc_altivec_vcmpgtud_p:
10684
    if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10685
      CompareOpc = 711;
10686
      isDot = true;
10687
    } else
10688
      return false;
10689
    break;
10690

10691
  case Intrinsic::ppc_altivec_vcmpequq:
10692
  case Intrinsic::ppc_altivec_vcmpgtsq:
10693
  case Intrinsic::ppc_altivec_vcmpgtuq:
10694
    if (!Subtarget.isISA3_1())
10695
      return false;
10696
    switch (IntrinsicID) {
10697
    default:
10698
      llvm_unreachable("Unknown comparison intrinsic.");
10699
    case Intrinsic::ppc_altivec_vcmpequq:
10700
      CompareOpc = 455;
10701
      break;
10702
    case Intrinsic::ppc_altivec_vcmpgtsq:
10703
      CompareOpc = 903;
10704
      break;
10705
    case Intrinsic::ppc_altivec_vcmpgtuq:
10706
      CompareOpc = 647;
10707
      break;
10708
    }
10709
    break;
10710

10711
  // VSX predicate comparisons use the same infrastructure
10712
  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10713
  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10714
  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10715
  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10716
  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10717
  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10718
    if (Subtarget.hasVSX()) {
10719
      switch (IntrinsicID) {
10720
      case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10721
        CompareOpc = 99;
10722
        break;
10723
      case Intrinsic::ppc_vsx_xvcmpgedp_p:
10724
        CompareOpc = 115;
10725
        break;
10726
      case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10727
        CompareOpc = 107;
10728
        break;
10729
      case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10730
        CompareOpc = 67;
10731
        break;
10732
      case Intrinsic::ppc_vsx_xvcmpgesp_p:
10733
        CompareOpc = 83;
10734
        break;
10735
      case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10736
        CompareOpc = 75;
10737
        break;
10738
      }
10739
      isDot = true;
10740
    } else
10741
      return false;
10742
    break;
10743

10744
  // Normal Comparisons.
10745
  case Intrinsic::ppc_altivec_vcmpbfp:
10746
    CompareOpc = 966;
10747
    break;
10748
  case Intrinsic::ppc_altivec_vcmpeqfp:
10749
    CompareOpc = 198;
10750
    break;
10751
  case Intrinsic::ppc_altivec_vcmpequb:
10752
    CompareOpc = 6;
10753
    break;
10754
  case Intrinsic::ppc_altivec_vcmpequh:
10755
    CompareOpc = 70;
10756
    break;
10757
  case Intrinsic::ppc_altivec_vcmpequw:
10758
    CompareOpc = 134;
10759
    break;
10760
  case Intrinsic::ppc_altivec_vcmpequd:
10761
    if (Subtarget.hasP8Altivec())
10762
      CompareOpc = 199;
10763
    else
10764
      return false;
10765
    break;
10766
  case Intrinsic::ppc_altivec_vcmpneb:
10767
  case Intrinsic::ppc_altivec_vcmpneh:
10768
  case Intrinsic::ppc_altivec_vcmpnew:
10769
  case Intrinsic::ppc_altivec_vcmpnezb:
10770
  case Intrinsic::ppc_altivec_vcmpnezh:
10771
  case Intrinsic::ppc_altivec_vcmpnezw:
10772
    if (Subtarget.hasP9Altivec())
10773
      switch (IntrinsicID) {
10774
      default:
10775
        llvm_unreachable("Unknown comparison intrinsic.");
10776
      case Intrinsic::ppc_altivec_vcmpneb:
10777
        CompareOpc = 7;
10778
        break;
10779
      case Intrinsic::ppc_altivec_vcmpneh:
10780
        CompareOpc = 71;
10781
        break;
10782
      case Intrinsic::ppc_altivec_vcmpnew:
10783
        CompareOpc = 135;
10784
        break;
10785
      case Intrinsic::ppc_altivec_vcmpnezb:
10786
        CompareOpc = 263;
10787
        break;
10788
      case Intrinsic::ppc_altivec_vcmpnezh:
10789
        CompareOpc = 327;
10790
        break;
10791
      case Intrinsic::ppc_altivec_vcmpnezw:
10792
        CompareOpc = 391;
10793
        break;
10794
      }
10795
    else
10796
      return false;
10797
    break;
10798
  case Intrinsic::ppc_altivec_vcmpgefp:
10799
    CompareOpc = 454;
10800
    break;
10801
  case Intrinsic::ppc_altivec_vcmpgtfp:
10802
    CompareOpc = 710;
10803
    break;
10804
  case Intrinsic::ppc_altivec_vcmpgtsb:
10805
    CompareOpc = 774;
10806
    break;
10807
  case Intrinsic::ppc_altivec_vcmpgtsh:
10808
    CompareOpc = 838;
10809
    break;
10810
  case Intrinsic::ppc_altivec_vcmpgtsw:
10811
    CompareOpc = 902;
10812
    break;
10813
  case Intrinsic::ppc_altivec_vcmpgtsd:
10814
    if (Subtarget.hasP8Altivec())
10815
      CompareOpc = 967;
10816
    else
10817
      return false;
10818
    break;
10819
  case Intrinsic::ppc_altivec_vcmpgtub:
10820
    CompareOpc = 518;
10821
    break;
10822
  case Intrinsic::ppc_altivec_vcmpgtuh:
10823
    CompareOpc = 582;
10824
    break;
10825
  case Intrinsic::ppc_altivec_vcmpgtuw:
10826
    CompareOpc = 646;
10827
    break;
10828
  case Intrinsic::ppc_altivec_vcmpgtud:
10829
    if (Subtarget.hasP8Altivec())
10830
      CompareOpc = 711;
10831
    else
10832
      return false;
10833
    break;
10834
  case Intrinsic::ppc_altivec_vcmpequq_p:
10835
  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10836
  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10837
    if (!Subtarget.isISA3_1())
10838
      return false;
10839
    switch (IntrinsicID) {
10840
    default:
10841
      llvm_unreachable("Unknown comparison intrinsic.");
10842
    case Intrinsic::ppc_altivec_vcmpequq_p:
10843
      CompareOpc = 455;
10844
      break;
10845
    case Intrinsic::ppc_altivec_vcmpgtsq_p:
10846
      CompareOpc = 903;
10847
      break;
10848
    case Intrinsic::ppc_altivec_vcmpgtuq_p:
10849
      CompareOpc = 647;
10850
      break;
10851
    }
10852
    isDot = true;
10853
    break;
10854
  }
10855
  return true;
10856
}
10857

10858
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10859
/// lower, do it, otherwise return null.
10860
SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10861
                                                   SelectionDAG &DAG) const {
10862
  unsigned IntrinsicID = Op.getConstantOperandVal(0);
10863

10864
  SDLoc dl(Op);
10865

10866
  switch (IntrinsicID) {
10867
  case Intrinsic::thread_pointer:
10868
    // Reads the thread pointer register, used for __builtin_thread_pointer.
10869
    if (Subtarget.isPPC64())
10870
      return DAG.getRegister(PPC::X13, MVT::i64);
10871
    return DAG.getRegister(PPC::R2, MVT::i32);
10872

10873
  case Intrinsic::ppc_rldimi: {
10874
    assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10875
    SDValue Src = Op.getOperand(1);
10876
    APInt Mask = Op.getConstantOperandAPInt(4);
10877
    if (Mask.isZero())
10878
      return Op.getOperand(2);
10879
    if (Mask.isAllOnes())
10880
      return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10881
    uint64_t SH = Op.getConstantOperandVal(3);
10882
    unsigned MB = 0, ME = 0;
10883
    if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10884
      report_fatal_error("invalid rldimi mask!");
10885
    // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10886
    if (ME < 63 - SH) {
10887
      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10888
                        DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10889
    } else if (ME > 63 - SH) {
10890
      Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10891
                        DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10892
    }
10893
    return SDValue(
10894
        DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10895
                           {Op.getOperand(2), Src,
10896
                            DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10897
                            DAG.getTargetConstant(MB, dl, MVT::i32)}),
10898
        0);
10899
  }
10900

10901
  case Intrinsic::ppc_rlwimi: {
10902
    APInt Mask = Op.getConstantOperandAPInt(4);
10903
    if (Mask.isZero())
10904
      return Op.getOperand(2);
10905
    if (Mask.isAllOnes())
10906
      return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10907
                         Op.getOperand(3));
10908
    unsigned MB = 0, ME = 0;
10909
    if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10910
      report_fatal_error("invalid rlwimi mask!");
10911
    return SDValue(DAG.getMachineNode(
10912
                       PPC::RLWIMI, dl, MVT::i32,
10913
                       {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10914
                        DAG.getTargetConstant(MB, dl, MVT::i32),
10915
                        DAG.getTargetConstant(ME, dl, MVT::i32)}),
10916
                   0);
10917
  }
10918

10919
  case Intrinsic::ppc_rlwnm: {
10920
    if (Op.getConstantOperandVal(3) == 0)
10921
      return DAG.getConstant(0, dl, MVT::i32);
10922
    unsigned MB = 0, ME = 0;
10923
    if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10924
      report_fatal_error("invalid rlwnm mask!");
10925
    return SDValue(
10926
        DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10927
                           {Op.getOperand(1), Op.getOperand(2),
10928
                            DAG.getTargetConstant(MB, dl, MVT::i32),
10929
                            DAG.getTargetConstant(ME, dl, MVT::i32)}),
10930
        0);
10931
  }
10932

10933
  case Intrinsic::ppc_mma_disassemble_acc: {
10934
    if (Subtarget.isISAFuture()) {
10935
      EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10936
      SDValue WideVec =
10937
          SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
10938
                                     Op.getOperand(1)),
10939
                  0);
10940
      SmallVector<SDValue, 4> RetOps;
10941
      SDValue Value = SDValue(WideVec.getNode(), 0);
10942
      SDValue Value2 = SDValue(WideVec.getNode(), 1);
10943

10944
      SDValue Extract;
10945
      Extract = DAG.getNode(
10946
          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10947
          Subtarget.isLittleEndian() ? Value2 : Value,
10948
          DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10949
                          dl, getPointerTy(DAG.getDataLayout())));
10950
      RetOps.push_back(Extract);
10951
      Extract = DAG.getNode(
10952
          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10953
          Subtarget.isLittleEndian() ? Value2 : Value,
10954
          DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10955
                          dl, getPointerTy(DAG.getDataLayout())));
10956
      RetOps.push_back(Extract);
10957
      Extract = DAG.getNode(
10958
          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10959
          Subtarget.isLittleEndian() ? Value : Value2,
10960
          DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10961
                          dl, getPointerTy(DAG.getDataLayout())));
10962
      RetOps.push_back(Extract);
10963
      Extract = DAG.getNode(
10964
          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10965
          Subtarget.isLittleEndian() ? Value : Value2,
10966
          DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10967
                          dl, getPointerTy(DAG.getDataLayout())));
10968
      RetOps.push_back(Extract);
10969
      return DAG.getMergeValues(RetOps, dl);
10970
    }
10971
    [[fallthrough]];
10972
  }
10973
  case Intrinsic::ppc_vsx_disassemble_pair: {
10974
    int NumVecs = 2;
10975
    SDValue WideVec = Op.getOperand(1);
10976
    if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10977
      NumVecs = 4;
10978
      WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10979
    }
10980
    SmallVector<SDValue, 4> RetOps;
10981
    for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10982
      SDValue Extract = DAG.getNode(
10983
          PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10984
          DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10985
                                                     : VecNo,
10986
                          dl, getPointerTy(DAG.getDataLayout())));
10987
      RetOps.push_back(Extract);
10988
    }
10989
    return DAG.getMergeValues(RetOps, dl);
10990
  }
10991

10992
  case Intrinsic::ppc_mma_xxmfacc:
10993
  case Intrinsic::ppc_mma_xxmtacc: {
10994
    // Allow pre-isa-future subtargets to lower as normal.
10995
    if (!Subtarget.isISAFuture())
10996
      return SDValue();
10997
    // The intrinsics for xxmtacc and xxmfacc take one argument of
10998
    // type v512i1, for future cpu the corresponding wacc instruction
10999
    // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11000
    // the need to produce the xxm[t|f]acc.
11001
    SDValue WideVec = Op.getOperand(1);
11002
    DAG.ReplaceAllUsesWith(Op, WideVec);
11003
    return SDValue();
11004
  }
11005

11006
  case Intrinsic::ppc_unpack_longdouble: {
11007
    auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11008
    assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11009
           "Argument of long double unpack must be 0 or 1!");
11010
    return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11011
                       DAG.getConstant(!!(Idx->getSExtValue()), dl,
11012
                                       Idx->getValueType(0)));
11013
  }
11014

11015
  case Intrinsic::ppc_compare_exp_lt:
11016
  case Intrinsic::ppc_compare_exp_gt:
11017
  case Intrinsic::ppc_compare_exp_eq:
11018
  case Intrinsic::ppc_compare_exp_uo: {
11019
    unsigned Pred;
11020
    switch (IntrinsicID) {
11021
    case Intrinsic::ppc_compare_exp_lt:
11022
      Pred = PPC::PRED_LT;
11023
      break;
11024
    case Intrinsic::ppc_compare_exp_gt:
11025
      Pred = PPC::PRED_GT;
11026
      break;
11027
    case Intrinsic::ppc_compare_exp_eq:
11028
      Pred = PPC::PRED_EQ;
11029
      break;
11030
    case Intrinsic::ppc_compare_exp_uo:
11031
      Pred = PPC::PRED_UN;
11032
      break;
11033
    }
11034
    return SDValue(
11035
        DAG.getMachineNode(
11036
            PPC::SELECT_CC_I4, dl, MVT::i32,
11037
            {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11038
                                        Op.getOperand(1), Op.getOperand(2)),
11039
                     0),
11040
             DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11041
             DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11042
        0);
11043
  }
11044
  case Intrinsic::ppc_test_data_class: {
11045
    EVT OpVT = Op.getOperand(1).getValueType();
11046
    unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11047
                                         : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11048
                                                             : PPC::XSTSTDCSP);
11049
    return SDValue(
11050
        DAG.getMachineNode(
11051
            PPC::SELECT_CC_I4, dl, MVT::i32,
11052
            {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11053
                                        Op.getOperand(1)),
11054
                     0),
11055
             DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11056
             DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11057
        0);
11058
  }
11059
  case Intrinsic::ppc_fnmsub: {
11060
    EVT VT = Op.getOperand(1).getValueType();
11061
    if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11062
      return DAG.getNode(
11063
          ISD::FNEG, dl, VT,
11064
          DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11065
                      DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11066
    return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11067
                       Op.getOperand(2), Op.getOperand(3));
11068
  }
11069
  case Intrinsic::ppc_convert_f128_to_ppcf128:
11070
  case Intrinsic::ppc_convert_ppcf128_to_f128: {
11071
    RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11072
                            ? RTLIB::CONVERT_PPCF128_F128
11073
                            : RTLIB::CONVERT_F128_PPCF128;
11074
    MakeLibCallOptions CallOptions;
11075
    std::pair<SDValue, SDValue> Result =
11076
        makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11077
                    dl, SDValue());
11078
    return Result.first;
11079
  }
11080
  case Intrinsic::ppc_maxfe:
11081
  case Intrinsic::ppc_maxfl:
11082
  case Intrinsic::ppc_maxfs:
11083
  case Intrinsic::ppc_minfe:
11084
  case Intrinsic::ppc_minfl:
11085
  case Intrinsic::ppc_minfs: {
11086
    EVT VT = Op.getValueType();
11087
    assert(
11088
        all_of(Op->ops().drop_front(4),
11089
               [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11090
        "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11091
    (void)VT;
11092
    ISD::CondCode CC = ISD::SETGT;
11093
    if (IntrinsicID == Intrinsic::ppc_minfe ||
11094
        IntrinsicID == Intrinsic::ppc_minfl ||
11095
        IntrinsicID == Intrinsic::ppc_minfs)
11096
      CC = ISD::SETLT;
11097
    unsigned I = Op.getNumOperands() - 2, Cnt = I;
11098
    SDValue Res = Op.getOperand(I);
11099
    for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11100
      Res =
11101
          DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11102
    }
11103
    return Res;
11104
  }
11105
  }
11106

11107
  // If this is a lowered altivec predicate compare, CompareOpc is set to the
11108
  // opcode number of the comparison.
11109
  int CompareOpc;
11110
  bool isDot;
11111
  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11112
    return SDValue();    // Don't custom lower most intrinsics.
11113

11114
  // If this is a non-dot comparison, make the VCMP node and we are done.
11115
  if (!isDot) {
11116
    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11117
                              Op.getOperand(1), Op.getOperand(2),
11118
                              DAG.getConstant(CompareOpc, dl, MVT::i32));
11119
    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11120
  }
11121

11122
  // Create the PPCISD altivec 'dot' comparison node.
11123
  SDValue Ops[] = {
11124
    Op.getOperand(2),  // LHS
11125
    Op.getOperand(3),  // RHS
11126
    DAG.getConstant(CompareOpc, dl, MVT::i32)
11127
  };
11128
  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11129
  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11130

11131
  // Now that we have the comparison, emit a copy from the CR to a GPR.
11132
  // This is flagged to the above dot comparison.
11133
  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11134
                                DAG.getRegister(PPC::CR6, MVT::i32),
11135
                                CompNode.getValue(1));
11136

11137
  // Unpack the result based on how the target uses it.
11138
  unsigned BitNo;   // Bit # of CR6.
11139
  bool InvertBit;   // Invert result?
11140
  switch (Op.getConstantOperandVal(1)) {
11141
  default:  // Can't happen, don't crash on invalid number though.
11142
  case 0:   // Return the value of the EQ bit of CR6.
11143
    BitNo = 0; InvertBit = false;
11144
    break;
11145
  case 1:   // Return the inverted value of the EQ bit of CR6.
11146
    BitNo = 0; InvertBit = true;
11147
    break;
11148
  case 2:   // Return the value of the LT bit of CR6.
11149
    BitNo = 2; InvertBit = false;
11150
    break;
11151
  case 3:   // Return the inverted value of the LT bit of CR6.
11152
    BitNo = 2; InvertBit = true;
11153
    break;
11154
  }
11155

11156
  // Shift the bit into the low position.
11157
  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11158
                      DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11159
  // Isolate the bit.
11160
  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11161
                      DAG.getConstant(1, dl, MVT::i32));
11162

11163
  // If we are supposed to, toggle the bit.
11164
  if (InvertBit)
11165
    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11166
                        DAG.getConstant(1, dl, MVT::i32));
11167
  return Flags;
11168
}
11169

11170
SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11171
                                               SelectionDAG &DAG) const {
11172
  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11173
  // the beginning of the argument list.
11174
  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11175
  SDLoc DL(Op);
11176
  switch (Op.getConstantOperandVal(ArgStart)) {
11177
  case Intrinsic::ppc_cfence: {
11178
    assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11179
    SDValue Val = Op.getOperand(ArgStart + 1);
11180
    EVT Ty = Val.getValueType();
11181
    if (Ty == MVT::i128) {
11182
      // FIXME: Testing one of two paired registers is sufficient to guarantee
11183
      // ordering?
11184
      Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11185
    }
11186
    unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11187
    EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11188
    return SDValue(
11189
        DAG.getMachineNode(Opcode, DL, MVT::Other,
11190
                           DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11191
                           Op.getOperand(0)),
11192
        0);
11193
  }
11194
  default:
11195
    break;
11196
  }
11197
  return SDValue();
11198
}
11199

11200
// Lower scalar BSWAP64 to xxbrd.
11201
SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11202
  SDLoc dl(Op);
11203
  if (!Subtarget.isPPC64())
11204
    return Op;
11205
  // MTVSRDD
11206
  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11207
                   Op.getOperand(0));
11208
  // XXBRD
11209
  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11210
  // MFVSRD
11211
  int VectorIndex = 0;
11212
  if (Subtarget.isLittleEndian())
11213
    VectorIndex = 1;
11214
  Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11215
                   DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11216
  return Op;
11217
}
11218

11219
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11220
// compared to a value that is atomically loaded (atomic loads zero-extend).
11221
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11222
                                                SelectionDAG &DAG) const {
11223
  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11224
         "Expecting an atomic compare-and-swap here.");
11225
  SDLoc dl(Op);
11226
  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11227
  EVT MemVT = AtomicNode->getMemoryVT();
11228
  if (MemVT.getSizeInBits() >= 32)
11229
    return Op;
11230

11231
  SDValue CmpOp = Op.getOperand(2);
11232
  // If this is already correctly zero-extended, leave it alone.
11233
  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11234
  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11235
    return Op;
11236

11237
  // Clear the high bits of the compare operand.
11238
  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11239
  SDValue NewCmpOp =
11240
    DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11241
                DAG.getConstant(MaskVal, dl, MVT::i32));
11242

11243
  // Replace the existing compare operand with the properly zero-extended one.
11244
  SmallVector<SDValue, 4> Ops;
11245
  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11246
    Ops.push_back(AtomicNode->getOperand(i));
11247
  Ops[2] = NewCmpOp;
11248
  MachineMemOperand *MMO = AtomicNode->getMemOperand();
11249
  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11250
  auto NodeTy =
11251
    (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11252
  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11253
}
11254

11255
SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11256
                                                  SelectionDAG &DAG) const {
11257
  AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11258
  EVT MemVT = N->getMemoryVT();
11259
  assert(MemVT.getSimpleVT() == MVT::i128 &&
11260
         "Expect quadword atomic operations");
11261
  SDLoc dl(N);
11262
  unsigned Opc = N->getOpcode();
11263
  switch (Opc) {
11264
  case ISD::ATOMIC_LOAD: {
11265
    // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11266
    // lowered to ppc instructions by pattern matching instruction selector.
11267
    SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11268
    SmallVector<SDValue, 4> Ops{
11269
        N->getOperand(0),
11270
        DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11271
    for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11272
      Ops.push_back(N->getOperand(I));
11273
    SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11274
                                                Ops, MemVT, N->getMemOperand());
11275
    SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11276
    SDValue ValHi =
11277
        DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11278
    ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11279
                        DAG.getConstant(64, dl, MVT::i32));
11280
    SDValue Val =
11281
        DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11282
    return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11283
                       {Val, LoadedVal.getValue(2)});
11284
  }
11285
  case ISD::ATOMIC_STORE: {
11286
    // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11287
    // lowered to ppc instructions by pattern matching instruction selector.
11288
    SDVTList Tys = DAG.getVTList(MVT::Other);
11289
    SmallVector<SDValue, 4> Ops{
11290
        N->getOperand(0),
11291
        DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11292
    SDValue Val = N->getOperand(1);
11293
    SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11294
    SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11295
                                DAG.getConstant(64, dl, MVT::i32));
11296
    ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11297
    Ops.push_back(ValLo);
11298
    Ops.push_back(ValHi);
11299
    Ops.push_back(N->getOperand(2));
11300
    return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11301
                                   N->getMemOperand());
11302
  }
11303
  default:
11304
    llvm_unreachable("Unexpected atomic opcode");
11305
  }
11306
}
11307

11308
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11309
                                SelectionDAG &DAG,
11310
                                const PPCSubtarget &Subtarget) {
11311
  assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11312

11313
  enum DataClassMask {
11314
    DC_NAN = 1 << 6,
11315
    DC_NEG_INF = 1 << 4,
11316
    DC_POS_INF = 1 << 5,
11317
    DC_NEG_ZERO = 1 << 2,
11318
    DC_POS_ZERO = 1 << 3,
11319
    DC_NEG_SUBNORM = 1,
11320
    DC_POS_SUBNORM = 1 << 1,
11321
  };
11322

11323
  EVT VT = Op.getValueType();
11324

11325
  unsigned TestOp = VT == MVT::f128  ? PPC::XSTSTDCQP
11326
                    : VT == MVT::f64 ? PPC::XSTSTDCDP
11327
                                     : PPC::XSTSTDCSP;
11328

11329
  if (Mask == fcAllFlags)
11330
    return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11331
  if (Mask == 0)
11332
    return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11333

11334
  // When it's cheaper or necessary to test reverse flags.
11335
  if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11336
    SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11337
    return DAG.getNOT(Dl, Rev, MVT::i1);
11338
  }
11339

11340
  // Power doesn't support testing whether a value is 'normal'. Test the rest
11341
  // first, and test if it's 'not not-normal' with expected sign.
11342
  if (Mask & fcNormal) {
11343
    SDValue Rev(DAG.getMachineNode(
11344
                    TestOp, Dl, MVT::i32,
11345
                    DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11346
                                              DC_NEG_ZERO | DC_POS_ZERO |
11347
                                              DC_NEG_SUBNORM | DC_POS_SUBNORM,
11348
                                          Dl, MVT::i32),
11349
                    Op),
11350
                0);
11351
    // Sign are stored in CR bit 0, result are in CR bit 2.
11352
    SDValue Sign(
11353
        DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11354
                           DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11355
        0);
11356
    SDValue Normal(DAG.getNOT(
11357
        Dl,
11358
        SDValue(DAG.getMachineNode(
11359
                    TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11360
                    DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11361
                0),
11362
        MVT::i1));
11363
    if (Mask & fcPosNormal)
11364
      Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11365
    SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11366
    if (Mask == fcPosNormal || Mask == fcNegNormal)
11367
      return Result;
11368

11369
    return DAG.getNode(
11370
        ISD::OR, Dl, MVT::i1,
11371
        getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11372
  }
11373

11374
  // The instruction doesn't differentiate between signaling or quiet NaN. Test
11375
  // the rest first, and test if it 'is NaN and is signaling/quiet'.
11376
  if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11377
    bool IsQuiet = Mask & fcQNan;
11378
    SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11379

11380
    // Quietness is determined by the first bit in fraction field.
11381
    uint64_t QuietMask = 0;
11382
    SDValue HighWord;
11383
    if (VT == MVT::f128) {
11384
      HighWord = DAG.getNode(
11385
          ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11386
          DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11387
      QuietMask = 0x8000;
11388
    } else if (VT == MVT::f64) {
11389
      if (Subtarget.isPPC64()) {
11390
        HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11391
                               DAG.getBitcast(MVT::i64, Op),
11392
                               DAG.getConstant(1, Dl, MVT::i32));
11393
      } else {
11394
        SDValue Vec = DAG.getBitcast(
11395
            MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11396
        HighWord = DAG.getNode(
11397
            ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11398
            DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11399
      }
11400
      QuietMask = 0x80000;
11401
    } else if (VT == MVT::f32) {
11402
      HighWord = DAG.getBitcast(MVT::i32, Op);
11403
      QuietMask = 0x400000;
11404
    }
11405
    SDValue NanRes = DAG.getSetCC(
11406
        Dl, MVT::i1,
11407
        DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11408
                    DAG.getConstant(QuietMask, Dl, MVT::i32)),
11409
        DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11410
    NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11411
    if (Mask == fcQNan || Mask == fcSNan)
11412
      return NanRes;
11413

11414
    return DAG.getNode(ISD::OR, Dl, MVT::i1,
11415
                       getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11416
                       NanRes);
11417
  }
11418

11419
  unsigned NativeMask = 0;
11420
  if ((Mask & fcNan) == fcNan)
11421
    NativeMask |= DC_NAN;
11422
  if (Mask & fcNegInf)
11423
    NativeMask |= DC_NEG_INF;
11424
  if (Mask & fcPosInf)
11425
    NativeMask |= DC_POS_INF;
11426
  if (Mask & fcNegZero)
11427
    NativeMask |= DC_NEG_ZERO;
11428
  if (Mask & fcPosZero)
11429
    NativeMask |= DC_POS_ZERO;
11430
  if (Mask & fcNegSubnormal)
11431
    NativeMask |= DC_NEG_SUBNORM;
11432
  if (Mask & fcPosSubnormal)
11433
    NativeMask |= DC_POS_SUBNORM;
11434
  return SDValue(
11435
      DAG.getMachineNode(
11436
          TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11437
          SDValue(DAG.getMachineNode(
11438
                      TestOp, Dl, MVT::i32,
11439
                      DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11440
                  0),
11441
          DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11442
      0);
11443
}
11444

11445
SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11446
                                           SelectionDAG &DAG) const {
11447
  assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11448
  SDValue LHS = Op.getOperand(0);
11449
  uint64_t RHSC = Op.getConstantOperandVal(1);
11450
  SDLoc Dl(Op);
11451
  FPClassTest Category = static_cast<FPClassTest>(RHSC);
11452
  return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11453
}
11454

11455
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11456
                                                 SelectionDAG &DAG) const {
11457
  SDLoc dl(Op);
11458
  // Create a stack slot that is 16-byte aligned.
11459
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11460
  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11461
  EVT PtrVT = getPointerTy(DAG.getDataLayout());
11462
  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11463

11464
  // Store the input value into Value#0 of the stack slot.
11465
  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11466
                               MachinePointerInfo());
11467
  // Load it out.
11468
  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11469
}
11470

11471
SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11472
                                                  SelectionDAG &DAG) const {
11473
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11474
         "Should only be called for ISD::INSERT_VECTOR_ELT");
11475

11476
  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11477

11478
  EVT VT = Op.getValueType();
11479
  SDLoc dl(Op);
11480
  SDValue V1 = Op.getOperand(0);
11481
  SDValue V2 = Op.getOperand(1);
11482

11483
  if (VT == MVT::v2f64 && C)
11484
    return Op;
11485

11486
  if (Subtarget.hasP9Vector()) {
11487
    // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11488
    // because on P10, it allows this specific insert_vector_elt load pattern to
11489
    // utilize the refactored load and store infrastructure in order to exploit
11490
    // prefixed loads.
11491
    // On targets with inexpensive direct moves (Power9 and up), a
11492
    // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11493
    // load since a single precision load will involve conversion to double
11494
    // precision on the load followed by another conversion to single precision.
11495
    if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11496
        (isa<LoadSDNode>(V2))) {
11497
      SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11498
      SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11499
      SDValue InsVecElt =
11500
          DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11501
                      BitcastLoad, Op.getOperand(2));
11502
      return DAG.getBitcast(MVT::v4f32, InsVecElt);
11503
    }
11504
  }
11505

11506
  if (Subtarget.isISA3_1()) {
11507
    if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11508
      return SDValue();
11509
    // On P10, we have legal lowering for constant and variable indices for
11510
    // all vectors.
11511
    if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11512
        VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11513
      return Op;
11514
  }
11515

11516
  // Before P10, we have legal lowering for constant indices but not for
11517
  // variable ones.
11518
  if (!C)
11519
    return SDValue();
11520

11521
  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11522
  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11523
    SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11524
    unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11525
    unsigned InsertAtElement = C->getZExtValue();
11526
    unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11527
    if (Subtarget.isLittleEndian()) {
11528
      InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11529
    }
11530
    return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11531
                       DAG.getConstant(InsertAtByte, dl, MVT::i32));
11532
  }
11533
  return Op;
11534
}
11535

11536
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11537
                                           SelectionDAG &DAG) const {
11538
  SDLoc dl(Op);
11539
  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11540
  SDValue LoadChain = LN->getChain();
11541
  SDValue BasePtr = LN->getBasePtr();
11542
  EVT VT = Op.getValueType();
11543

11544
  if (VT != MVT::v256i1 && VT != MVT::v512i1)
11545
    return Op;
11546

11547
  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11548
  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11549
  // 2 or 4 vsx registers.
11550
  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11551
         "Type unsupported without MMA");
11552
  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11553
         "Type unsupported without paired vector support");
11554
  Align Alignment = LN->getAlign();
11555
  SmallVector<SDValue, 4> Loads;
11556
  SmallVector<SDValue, 4> LoadChains;
11557
  unsigned NumVecs = VT.getSizeInBits() / 128;
11558
  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11559
    SDValue Load =
11560
        DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11561
                    LN->getPointerInfo().getWithOffset(Idx * 16),
11562
                    commonAlignment(Alignment, Idx * 16),
11563
                    LN->getMemOperand()->getFlags(), LN->getAAInfo());
11564
    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11565
                          DAG.getConstant(16, dl, BasePtr.getValueType()));
11566
    Loads.push_back(Load);
11567
    LoadChains.push_back(Load.getValue(1));
11568
  }
11569
  if (Subtarget.isLittleEndian()) {
11570
    std::reverse(Loads.begin(), Loads.end());
11571
    std::reverse(LoadChains.begin(), LoadChains.end());
11572
  }
11573
  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11574
  SDValue Value =
11575
      DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11576
                  dl, VT, Loads);
11577
  SDValue RetOps[] = {Value, TF};
11578
  return DAG.getMergeValues(RetOps, dl);
11579
}
11580

11581
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11582
                                            SelectionDAG &DAG) const {
11583
  SDLoc dl(Op);
11584
  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11585
  SDValue StoreChain = SN->getChain();
11586
  SDValue BasePtr = SN->getBasePtr();
11587
  SDValue Value = SN->getValue();
11588
  SDValue Value2 = SN->getValue();
11589
  EVT StoreVT = Value.getValueType();
11590

11591
  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11592
    return Op;
11593

11594
  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11595
  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11596
  // underlying registers individually.
11597
  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11598
         "Type unsupported without MMA");
11599
  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11600
         "Type unsupported without paired vector support");
11601
  Align Alignment = SN->getAlign();
11602
  SmallVector<SDValue, 4> Stores;
11603
  unsigned NumVecs = 2;
11604
  if (StoreVT == MVT::v512i1) {
11605
    if (Subtarget.isISAFuture()) {
11606
      EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11607
      MachineSDNode *ExtNode = DAG.getMachineNode(
11608
          PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11609

11610
      Value = SDValue(ExtNode, 0);
11611
      Value2 = SDValue(ExtNode, 1);
11612
    } else
11613
      Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11614
    NumVecs = 4;
11615
  }
11616
  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11617
    unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11618
    SDValue Elt;
11619
    if (Subtarget.isISAFuture()) {
11620
      VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11621
      Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11622
                        Idx > 1 ? Value2 : Value,
11623
                        DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11624
    } else
11625
      Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11626
                        DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11627

11628
    SDValue Store =
11629
        DAG.getStore(StoreChain, dl, Elt, BasePtr,
11630
                     SN->getPointerInfo().getWithOffset(Idx * 16),
11631
                     commonAlignment(Alignment, Idx * 16),
11632
                     SN->getMemOperand()->getFlags(), SN->getAAInfo());
11633
    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11634
                          DAG.getConstant(16, dl, BasePtr.getValueType()));
11635
    Stores.push_back(Store);
11636
  }
11637
  SDValue TF = DAG.getTokenFactor(dl, Stores);
11638
  return TF;
11639
}
11640

11641
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11642
  SDLoc dl(Op);
11643
  if (Op.getValueType() == MVT::v4i32) {
11644
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11645

11646
    SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11647
    // +16 as shift amt.
11648
    SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11649
    SDValue RHSSwap =   // = vrlw RHS, 16
11650
      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11651

11652
    // Shrinkify inputs to v8i16.
11653
    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11654
    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11655
    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11656

11657
    // Low parts multiplied together, generating 32-bit results (we ignore the
11658
    // top parts).
11659
    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11660
                                        LHS, RHS, DAG, dl, MVT::v4i32);
11661

11662
    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11663
                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11664
    // Shift the high parts up 16 bits.
11665
    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11666
                              Neg16, DAG, dl);
11667
    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11668
  } else if (Op.getValueType() == MVT::v16i8) {
11669
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11670
    bool isLittleEndian = Subtarget.isLittleEndian();
11671

11672
    // Multiply the even 8-bit parts, producing 16-bit sums.
11673
    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11674
                                           LHS, RHS, DAG, dl, MVT::v8i16);
11675
    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11676

11677
    // Multiply the odd 8-bit parts, producing 16-bit sums.
11678
    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11679
                                          LHS, RHS, DAG, dl, MVT::v8i16);
11680
    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11681

11682
    // Merge the results together.  Because vmuleub and vmuloub are
11683
    // instructions with a big-endian bias, we must reverse the
11684
    // element numbering and reverse the meaning of "odd" and "even"
11685
    // when generating little endian code.
11686
    int Ops[16];
11687
    for (unsigned i = 0; i != 8; ++i) {
11688
      if (isLittleEndian) {
11689
        Ops[i*2  ] = 2*i;
11690
        Ops[i*2+1] = 2*i+16;
11691
      } else {
11692
        Ops[i*2  ] = 2*i+1;
11693
        Ops[i*2+1] = 2*i+1+16;
11694
      }
11695
    }
11696
    if (isLittleEndian)
11697
      return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11698
    else
11699
      return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11700
  } else {
11701
    llvm_unreachable("Unknown mul to lower!");
11702
  }
11703
}
11704

11705
SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11706
  bool IsStrict = Op->isStrictFPOpcode();
11707
  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11708
      !Subtarget.hasP9Vector())
11709
    return SDValue();
11710

11711
  return Op;
11712
}
11713

11714
// Custom lowering for fpext vf32 to v2f64
11715
SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11716

11717
  assert(Op.getOpcode() == ISD::FP_EXTEND &&
11718
         "Should only be called for ISD::FP_EXTEND");
11719

11720
  // FIXME: handle extends from half precision float vectors on P9.
11721
  // We only want to custom lower an extend from v2f32 to v2f64.
11722
  if (Op.getValueType() != MVT::v2f64 ||
11723
      Op.getOperand(0).getValueType() != MVT::v2f32)
11724
    return SDValue();
11725

11726
  SDLoc dl(Op);
11727
  SDValue Op0 = Op.getOperand(0);
11728

11729
  switch (Op0.getOpcode()) {
11730
  default:
11731
    return SDValue();
11732
  case ISD::EXTRACT_SUBVECTOR: {
11733
    assert(Op0.getNumOperands() == 2 &&
11734
           isa<ConstantSDNode>(Op0->getOperand(1)) &&
11735
           "Node should have 2 operands with second one being a constant!");
11736

11737
    if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11738
      return SDValue();
11739

11740
    // Custom lower is only done for high or low doubleword.
11741
    int Idx = Op0.getConstantOperandVal(1);
11742
    if (Idx % 2 != 0)
11743
      return SDValue();
11744

11745
    // Since input is v4f32, at this point Idx is either 0 or 2.
11746
    // Shift to get the doubleword position we want.
11747
    int DWord = Idx >> 1;
11748

11749
    // High and low word positions are different on little endian.
11750
    if (Subtarget.isLittleEndian())
11751
      DWord ^= 0x1;
11752

11753
    return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11754
                       Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11755
  }
11756
  case ISD::FADD:
11757
  case ISD::FMUL:
11758
  case ISD::FSUB: {
11759
    SDValue NewLoad[2];
11760
    for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11761
      // Ensure both input are loads.
11762
      SDValue LdOp = Op0.getOperand(i);
11763
      if (LdOp.getOpcode() != ISD::LOAD)
11764
        return SDValue();
11765
      // Generate new load node.
11766
      LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11767
      SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11768
      NewLoad[i] = DAG.getMemIntrinsicNode(
11769
          PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11770
          LD->getMemoryVT(), LD->getMemOperand());
11771
    }
11772
    SDValue NewOp =
11773
        DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11774
                    NewLoad[1], Op0.getNode()->getFlags());
11775
    return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11776
                       DAG.getConstant(0, dl, MVT::i32));
11777
  }
11778
  case ISD::LOAD: {
11779
    LoadSDNode *LD = cast<LoadSDNode>(Op0);
11780
    SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11781
    SDValue NewLd = DAG.getMemIntrinsicNode(
11782
        PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11783
        LD->getMemoryVT(), LD->getMemOperand());
11784
    return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11785
                       DAG.getConstant(0, dl, MVT::i32));
11786
  }
11787
  }
11788
  llvm_unreachable("ERROR:Should return for all cases within swtich.");
11789
}
11790

11791
/// LowerOperation - Provide custom lowering hooks for some operations.
11792
///
11793
SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11794
  switch (Op.getOpcode()) {
11795
  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11796
  case ISD::FPOW:               return lowerPow(Op, DAG);
11797
  case ISD::FSIN:               return lowerSin(Op, DAG);
11798
  case ISD::FCOS:               return lowerCos(Op, DAG);
11799
  case ISD::FLOG:               return lowerLog(Op, DAG);
11800
  case ISD::FLOG10:             return lowerLog10(Op, DAG);
11801
  case ISD::FEXP:               return lowerExp(Op, DAG);
11802
  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
11803
  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
11804
  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
11805
  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
11806
  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
11807
  case ISD::STRICT_FSETCC:
11808
  case ISD::STRICT_FSETCCS:
11809
  case ISD::SETCC:              return LowerSETCC(Op, DAG);
11810
  case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
11811
  case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
11812

11813
  case ISD::INLINEASM:
11814
  case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
11815
  // Variable argument lowering.
11816
  case ISD::VASTART:            return LowerVASTART(Op, DAG);
11817
  case ISD::VAARG:              return LowerVAARG(Op, DAG);
11818
  case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
11819

11820
  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
11821
  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11822
  case ISD::GET_DYNAMIC_AREA_OFFSET:
11823
    return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11824

11825
  // Exception handling lowering.
11826
  case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
11827
  case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
11828
  case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
11829

11830
  case ISD::LOAD:               return LowerLOAD(Op, DAG);
11831
  case ISD::STORE:              return LowerSTORE(Op, DAG);
11832
  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
11833
  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
11834
  case ISD::STRICT_FP_TO_UINT:
11835
  case ISD::STRICT_FP_TO_SINT:
11836
  case ISD::FP_TO_UINT:
11837
  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11838
  case ISD::STRICT_UINT_TO_FP:
11839
  case ISD::STRICT_SINT_TO_FP:
11840
  case ISD::UINT_TO_FP:
11841
  case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
11842
  case ISD::GET_ROUNDING:       return LowerGET_ROUNDING(Op, DAG);
11843

11844
  // Lower 64-bit shifts.
11845
  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
11846
  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11847
  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11848

11849
  case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11850
  case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11851

11852
  // Vector-related lowering.
11853
  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11854
  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11855
  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11856
  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11857
  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11858
  case ISD::MUL:                return LowerMUL(Op, DAG);
11859
  case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11860
  case ISD::STRICT_FP_ROUND:
11861
  case ISD::FP_ROUND:
11862
    return LowerFP_ROUND(Op, DAG);
11863
  case ISD::ROTL:               return LowerROTL(Op, DAG);
11864

11865
  // For counter-based loop handling.
11866
  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11867

11868
  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11869

11870
  // Frame & Return address.
11871
  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11872
  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11873

11874
  case ISD::INTRINSIC_VOID:
11875
    return LowerINTRINSIC_VOID(Op, DAG);
11876
  case ISD::BSWAP:
11877
    return LowerBSWAP(Op, DAG);
11878
  case ISD::ATOMIC_CMP_SWAP:
11879
    return LowerATOMIC_CMP_SWAP(Op, DAG);
11880
  case ISD::ATOMIC_STORE:
11881
    return LowerATOMIC_LOAD_STORE(Op, DAG);
11882
  case ISD::IS_FPCLASS:
11883
    return LowerIS_FPCLASS(Op, DAG);
11884
  }
11885
}
11886

11887
void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11888
                                           SmallVectorImpl<SDValue>&Results,
11889
                                           SelectionDAG &DAG) const {
11890
  SDLoc dl(N);
11891
  switch (N->getOpcode()) {
11892
  default:
11893
    llvm_unreachable("Do not know how to custom type legalize this operation!");
11894
  case ISD::ATOMIC_LOAD: {
11895
    SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11896
    Results.push_back(Res);
11897
    Results.push_back(Res.getValue(1));
11898
    break;
11899
  }
11900
  case ISD::READCYCLECOUNTER: {
11901
    SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11902
    SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11903

11904
    Results.push_back(
11905
        DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11906
    Results.push_back(RTB.getValue(2));
11907
    break;
11908
  }
11909
  case ISD::INTRINSIC_W_CHAIN: {
11910
    if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11911
      break;
11912

11913
    assert(N->getValueType(0) == MVT::i1 &&
11914
           "Unexpected result type for CTR decrement intrinsic");
11915
    EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11916
                                 N->getValueType(0));
11917
    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11918
    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11919
                                 N->getOperand(1));
11920

11921
    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11922
    Results.push_back(NewInt.getValue(1));
11923
    break;
11924
  }
11925
  case ISD::INTRINSIC_WO_CHAIN: {
11926
    switch (N->getConstantOperandVal(0)) {
11927
    case Intrinsic::ppc_pack_longdouble:
11928
      Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11929
                                    N->getOperand(2), N->getOperand(1)));
11930
      break;
11931
    case Intrinsic::ppc_maxfe:
11932
    case Intrinsic::ppc_minfe:
11933
    case Intrinsic::ppc_fnmsub:
11934
    case Intrinsic::ppc_convert_f128_to_ppcf128:
11935
      Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11936
      break;
11937
    }
11938
    break;
11939
  }
11940
  case ISD::VAARG: {
11941
    if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11942
      return;
11943

11944
    EVT VT = N->getValueType(0);
11945

11946
    if (VT == MVT::i64) {
11947
      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11948

11949
      Results.push_back(NewNode);
11950
      Results.push_back(NewNode.getValue(1));
11951
    }
11952
    return;
11953
  }
11954
  case ISD::STRICT_FP_TO_SINT:
11955
  case ISD::STRICT_FP_TO_UINT:
11956
  case ISD::FP_TO_SINT:
11957
  case ISD::FP_TO_UINT: {
11958
    // LowerFP_TO_INT() can only handle f32 and f64.
11959
    if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11960
        MVT::ppcf128)
11961
      return;
11962
    SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11963
    Results.push_back(LoweredValue);
11964
    if (N->isStrictFPOpcode())
11965
      Results.push_back(LoweredValue.getValue(1));
11966
    return;
11967
  }
11968
  case ISD::TRUNCATE: {
11969
    if (!N->getValueType(0).isVector())
11970
      return;
11971
    SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11972
    if (Lowered)
11973
      Results.push_back(Lowered);
11974
    return;
11975
  }
11976
  case ISD::FSHL:
11977
  case ISD::FSHR:
11978
    // Don't handle funnel shifts here.
11979
    return;
11980
  case ISD::BITCAST:
11981
    // Don't handle bitcast here.
11982
    return;
11983
  case ISD::FP_EXTEND:
11984
    SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11985
    if (Lowered)
11986
      Results.push_back(Lowered);
11987
    return;
11988
  }
11989
}
11990

11991
//===----------------------------------------------------------------------===//
11992
//  Other Lowering Code
11993
//===----------------------------------------------------------------------===//
11994

11995
static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
11996
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11997
  Function *Func = Intrinsic::getDeclaration(M, Id);
11998
  return Builder.CreateCall(Func, {});
11999
}
12000

12001
// The mappings for emitLeading/TrailingFence is taken from
12002
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12003
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12004
                                                 Instruction *Inst,
12005
                                                 AtomicOrdering Ord) const {
12006
  if (Ord == AtomicOrdering::SequentiallyConsistent)
12007
    return callIntrinsic(Builder, Intrinsic::ppc_sync);
12008
  if (isReleaseOrStronger(Ord))
12009
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12010
  return nullptr;
12011
}
12012

12013
Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12014
                                                  Instruction *Inst,
12015
                                                  AtomicOrdering Ord) const {
12016
  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12017
    // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12018
    // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12019
    // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12020
    if (isa<LoadInst>(Inst))
12021
      return Builder.CreateCall(
12022
          Intrinsic::getDeclaration(
12023
              Builder.GetInsertBlock()->getParent()->getParent(),
12024
              Intrinsic::ppc_cfence, {Inst->getType()}),
12025
          {Inst});
12026
    // FIXME: Can use isync for rmw operation.
12027
    return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12028
  }
12029
  return nullptr;
12030
}
12031

12032
MachineBasicBlock *
12033
PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
12034
                                    unsigned AtomicSize,
12035
                                    unsigned BinOpcode,
12036
                                    unsigned CmpOpcode,
12037
                                    unsigned CmpPred) const {
12038
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12039
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12040

12041
  auto LoadMnemonic = PPC::LDARX;
12042
  auto StoreMnemonic = PPC::STDCX;
12043
  switch (AtomicSize) {
12044
  default:
12045
    llvm_unreachable("Unexpected size of atomic entity");
12046
  case 1:
12047
    LoadMnemonic = PPC::LBARX;
12048
    StoreMnemonic = PPC::STBCX;
12049
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12050
    break;
12051
  case 2:
12052
    LoadMnemonic = PPC::LHARX;
12053
    StoreMnemonic = PPC::STHCX;
12054
    assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12055
    break;
12056
  case 4:
12057
    LoadMnemonic = PPC::LWARX;
12058
    StoreMnemonic = PPC::STWCX;
12059
    break;
12060
  case 8:
12061
    LoadMnemonic = PPC::LDARX;
12062
    StoreMnemonic = PPC::STDCX;
12063
    break;
12064
  }
12065

12066
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12067
  MachineFunction *F = BB->getParent();
12068
  MachineFunction::iterator It = ++BB->getIterator();
12069

12070
  Register dest = MI.getOperand(0).getReg();
12071
  Register ptrA = MI.getOperand(1).getReg();
12072
  Register ptrB = MI.getOperand(2).getReg();
12073
  Register incr = MI.getOperand(3).getReg();
12074
  DebugLoc dl = MI.getDebugLoc();
12075

12076
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12077
  MachineBasicBlock *loop2MBB =
12078
    CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12079
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12080
  F->insert(It, loopMBB);
12081
  if (CmpOpcode)
12082
    F->insert(It, loop2MBB);
12083
  F->insert(It, exitMBB);
12084
  exitMBB->splice(exitMBB->begin(), BB,
12085
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12086
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12087

12088
  MachineRegisterInfo &RegInfo = F->getRegInfo();
12089
  Register TmpReg = (!BinOpcode) ? incr :
12090
    RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12091
                                           : &PPC::GPRCRegClass);
12092

12093
  //  thisMBB:
12094
  //   ...
12095
  //   fallthrough --> loopMBB
12096
  BB->addSuccessor(loopMBB);
12097

12098
  //  loopMBB:
12099
  //   l[wd]arx dest, ptr
12100
  //   add r0, dest, incr
12101
  //   st[wd]cx. r0, ptr
12102
  //   bne- loopMBB
12103
  //   fallthrough --> exitMBB
12104

12105
  // For max/min...
12106
  //  loopMBB:
12107
  //   l[wd]arx dest, ptr
12108
  //   cmpl?[wd] dest, incr
12109
  //   bgt exitMBB
12110
  //  loop2MBB:
12111
  //   st[wd]cx. dest, ptr
12112
  //   bne- loopMBB
12113
  //   fallthrough --> exitMBB
12114

12115
  BB = loopMBB;
12116
  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12117
    .addReg(ptrA).addReg(ptrB);
12118
  if (BinOpcode)
12119
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12120
  if (CmpOpcode) {
12121
    Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12122
    // Signed comparisons of byte or halfword values must be sign-extended.
12123
    if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12124
      Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12125
      BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12126
              ExtReg).addReg(dest);
12127
      BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12128
    } else
12129
      BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12130

12131
    BuildMI(BB, dl, TII->get(PPC::BCC))
12132
        .addImm(CmpPred)
12133
        .addReg(CrReg)
12134
        .addMBB(exitMBB);
12135
    BB->addSuccessor(loop2MBB);
12136
    BB->addSuccessor(exitMBB);
12137
    BB = loop2MBB;
12138
  }
12139
  BuildMI(BB, dl, TII->get(StoreMnemonic))
12140
    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12141
  BuildMI(BB, dl, TII->get(PPC::BCC))
12142
    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12143
  BB->addSuccessor(loopMBB);
12144
  BB->addSuccessor(exitMBB);
12145

12146
  //  exitMBB:
12147
  //   ...
12148
  BB = exitMBB;
12149
  return BB;
12150
}
12151

12152
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12153
  switch(MI.getOpcode()) {
12154
  default:
12155
    return false;
12156
  case PPC::COPY:
12157
    return TII->isSignExtended(MI.getOperand(1).getReg(),
12158
                               &MI.getMF()->getRegInfo());
12159
  case PPC::LHA:
12160
  case PPC::LHA8:
12161
  case PPC::LHAU:
12162
  case PPC::LHAU8:
12163
  case PPC::LHAUX:
12164
  case PPC::LHAUX8:
12165
  case PPC::LHAX:
12166
  case PPC::LHAX8:
12167
  case PPC::LWA:
12168
  case PPC::LWAUX:
12169
  case PPC::LWAX:
12170
  case PPC::LWAX_32:
12171
  case PPC::LWA_32:
12172
  case PPC::PLHA:
12173
  case PPC::PLHA8:
12174
  case PPC::PLHA8pc:
12175
  case PPC::PLHApc:
12176
  case PPC::PLWA:
12177
  case PPC::PLWA8:
12178
  case PPC::PLWA8pc:
12179
  case PPC::PLWApc:
12180
  case PPC::EXTSB:
12181
  case PPC::EXTSB8:
12182
  case PPC::EXTSB8_32_64:
12183
  case PPC::EXTSB8_rec:
12184
  case PPC::EXTSB_rec:
12185
  case PPC::EXTSH:
12186
  case PPC::EXTSH8:
12187
  case PPC::EXTSH8_32_64:
12188
  case PPC::EXTSH8_rec:
12189
  case PPC::EXTSH_rec:
12190
  case PPC::EXTSW:
12191
  case PPC::EXTSWSLI:
12192
  case PPC::EXTSWSLI_32_64:
12193
  case PPC::EXTSWSLI_32_64_rec:
12194
  case PPC::EXTSWSLI_rec:
12195
  case PPC::EXTSW_32:
12196
  case PPC::EXTSW_32_64:
12197
  case PPC::EXTSW_32_64_rec:
12198
  case PPC::EXTSW_rec:
12199
  case PPC::SRAW:
12200
  case PPC::SRAWI:
12201
  case PPC::SRAWI_rec:
12202
  case PPC::SRAW_rec:
12203
    return true;
12204
  }
12205
  return false;
12206
}
12207

12208
MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12209
    MachineInstr &MI, MachineBasicBlock *BB,
12210
    bool is8bit, // operation
12211
    unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12212
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12213
  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12214

12215
  // If this is a signed comparison and the value being compared is not known
12216
  // to be sign extended, sign extend it here.
12217
  DebugLoc dl = MI.getDebugLoc();
12218
  MachineFunction *F = BB->getParent();
12219
  MachineRegisterInfo &RegInfo = F->getRegInfo();
12220
  Register incr = MI.getOperand(3).getReg();
12221
  bool IsSignExtended =
12222
      incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12223

12224
  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12225
    Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12226
    BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12227
        .addReg(MI.getOperand(3).getReg());
12228
    MI.getOperand(3).setReg(ValueReg);
12229
    incr = ValueReg;
12230
  }
12231
  // If we support part-word atomic mnemonics, just use them
12232
  if (Subtarget.hasPartwordAtomics())
12233
    return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12234
                            CmpPred);
12235

12236
  // In 64 bit mode we have to use 64 bits for addresses, even though the
12237
  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
12238
  // registers without caring whether they're 32 or 64, but here we're
12239
  // doing actual arithmetic on the addresses.
12240
  bool is64bit = Subtarget.isPPC64();
12241
  bool isLittleEndian = Subtarget.isLittleEndian();
12242
  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12243

12244
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12245
  MachineFunction::iterator It = ++BB->getIterator();
12246

12247
  Register dest = MI.getOperand(0).getReg();
12248
  Register ptrA = MI.getOperand(1).getReg();
12249
  Register ptrB = MI.getOperand(2).getReg();
12250

12251
  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12252
  MachineBasicBlock *loop2MBB =
12253
      CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12254
  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12255
  F->insert(It, loopMBB);
12256
  if (CmpOpcode)
12257
    F->insert(It, loop2MBB);
12258
  F->insert(It, exitMBB);
12259
  exitMBB->splice(exitMBB->begin(), BB,
12260
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12261
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12262

12263
  const TargetRegisterClass *RC =
12264
      is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12265
  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12266

12267
  Register PtrReg = RegInfo.createVirtualRegister(RC);
12268
  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12269
  Register ShiftReg =
12270
      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12271
  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12272
  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12273
  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12274
  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12275
  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12276
  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12277
  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12278
  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12279
  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12280
  Register Ptr1Reg;
12281
  Register TmpReg =
12282
      (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12283

12284
  //  thisMBB:
12285
  //   ...
12286
  //   fallthrough --> loopMBB
12287
  BB->addSuccessor(loopMBB);
12288

12289
  // The 4-byte load must be aligned, while a char or short may be
12290
  // anywhere in the word.  Hence all this nasty bookkeeping code.
12291
  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12292
  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12293
  //   xori shift, shift1, 24 [16]
12294
  //   rlwinm ptr, ptr1, 0, 0, 29
12295
  //   slw incr2, incr, shift
12296
  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12297
  //   slw mask, mask2, shift
12298
  //  loopMBB:
12299
  //   lwarx tmpDest, ptr
12300
  //   add tmp, tmpDest, incr2
12301
  //   andc tmp2, tmpDest, mask
12302
  //   and tmp3, tmp, mask
12303
  //   or tmp4, tmp3, tmp2
12304
  //   stwcx. tmp4, ptr
12305
  //   bne- loopMBB
12306
  //   fallthrough --> exitMBB
12307
  //   srw SrwDest, tmpDest, shift
12308
  //   rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12309
  if (ptrA != ZeroReg) {
12310
    Ptr1Reg = RegInfo.createVirtualRegister(RC);
12311
    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12312
        .addReg(ptrA)
12313
        .addReg(ptrB);
12314
  } else {
12315
    Ptr1Reg = ptrB;
12316
  }
12317
  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12318
  // mode.
12319
  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12320
      .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12321
      .addImm(3)
12322
      .addImm(27)
12323
      .addImm(is8bit ? 28 : 27);
12324
  if (!isLittleEndian)
12325
    BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12326
        .addReg(Shift1Reg)
12327
        .addImm(is8bit ? 24 : 16);
12328
  if (is64bit)
12329
    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12330
        .addReg(Ptr1Reg)
12331
        .addImm(0)
12332
        .addImm(61);
12333
  else
12334
    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12335
        .addReg(Ptr1Reg)
12336
        .addImm(0)
12337
        .addImm(0)
12338
        .addImm(29);
12339
  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12340
  if (is8bit)
12341
    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12342
  else {
12343
    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12344
    BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12345
        .addReg(Mask3Reg)
12346
        .addImm(65535);
12347
  }
12348
  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12349
      .addReg(Mask2Reg)
12350
      .addReg(ShiftReg);
12351

12352
  BB = loopMBB;
12353
  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12354
      .addReg(ZeroReg)
12355
      .addReg(PtrReg);
12356
  if (BinOpcode)
12357
    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12358
        .addReg(Incr2Reg)
12359
        .addReg(TmpDestReg);
12360
  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12361
      .addReg(TmpDestReg)
12362
      .addReg(MaskReg);
12363
  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12364
  if (CmpOpcode) {
12365
    // For unsigned comparisons, we can directly compare the shifted values.
12366
    // For signed comparisons we shift and sign extend.
12367
    Register SReg = RegInfo.createVirtualRegister(GPRC);
12368
    Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12369
    BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12370
        .addReg(TmpDestReg)
12371
        .addReg(MaskReg);
12372
    unsigned ValueReg = SReg;
12373
    unsigned CmpReg = Incr2Reg;
12374
    if (CmpOpcode == PPC::CMPW) {
12375
      ValueReg = RegInfo.createVirtualRegister(GPRC);
12376
      BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12377
          .addReg(SReg)
12378
          .addReg(ShiftReg);
12379
      Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12380
      BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12381
          .addReg(ValueReg);
12382
      ValueReg = ValueSReg;
12383
      CmpReg = incr;
12384
    }
12385
    BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12386
    BuildMI(BB, dl, TII->get(PPC::BCC))
12387
        .addImm(CmpPred)
12388
        .addReg(CrReg)
12389
        .addMBB(exitMBB);
12390
    BB->addSuccessor(loop2MBB);
12391
    BB->addSuccessor(exitMBB);
12392
    BB = loop2MBB;
12393
  }
12394
  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12395
  BuildMI(BB, dl, TII->get(PPC::STWCX))
12396
      .addReg(Tmp4Reg)
12397
      .addReg(ZeroReg)
12398
      .addReg(PtrReg);
12399
  BuildMI(BB, dl, TII->get(PPC::BCC))
12400
      .addImm(PPC::PRED_NE)
12401
      .addReg(PPC::CR0)
12402
      .addMBB(loopMBB);
12403
  BB->addSuccessor(loopMBB);
12404
  BB->addSuccessor(exitMBB);
12405

12406
  //  exitMBB:
12407
  //   ...
12408
  BB = exitMBB;
12409
  // Since the shift amount is not a constant, we need to clear
12410
  // the upper bits with a separate RLWINM.
12411
  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12412
      .addReg(SrwDestReg)
12413
      .addImm(0)
12414
      .addImm(is8bit ? 24 : 16)
12415
      .addImm(31);
12416
  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12417
      .addReg(TmpDestReg)
12418
      .addReg(ShiftReg);
12419
  return BB;
12420
}
12421

12422
llvm::MachineBasicBlock *
12423
PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
12424
                                    MachineBasicBlock *MBB) const {
12425
  DebugLoc DL = MI.getDebugLoc();
12426
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12427
  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12428

12429
  MachineFunction *MF = MBB->getParent();
12430
  MachineRegisterInfo &MRI = MF->getRegInfo();
12431

12432
  const BasicBlock *BB = MBB->getBasicBlock();
12433
  MachineFunction::iterator I = ++MBB->getIterator();
12434

12435
  Register DstReg = MI.getOperand(0).getReg();
12436
  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12437
  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12438
  Register mainDstReg = MRI.createVirtualRegister(RC);
12439
  Register restoreDstReg = MRI.createVirtualRegister(RC);
12440

12441
  MVT PVT = getPointerTy(MF->getDataLayout());
12442
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12443
         "Invalid Pointer Size!");
12444
  // For v = setjmp(buf), we generate
12445
  //
12446
  // thisMBB:
12447
  //  SjLjSetup mainMBB
12448
  //  bl mainMBB
12449
  //  v_restore = 1
12450
  //  b sinkMBB
12451
  //
12452
  // mainMBB:
12453
  //  buf[LabelOffset] = LR
12454
  //  v_main = 0
12455
  //
12456
  // sinkMBB:
12457
  //  v = phi(main, restore)
12458
  //
12459

12460
  MachineBasicBlock *thisMBB = MBB;
12461
  MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12462
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12463
  MF->insert(I, mainMBB);
12464
  MF->insert(I, sinkMBB);
12465

12466
  MachineInstrBuilder MIB;
12467

12468
  // Transfer the remainder of BB and its successor edges to sinkMBB.
12469
  sinkMBB->splice(sinkMBB->begin(), MBB,
12470
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12471
  sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
12472

12473
  // Note that the structure of the jmp_buf used here is not compatible
12474
  // with that used by libc, and is not designed to be. Specifically, it
12475
  // stores only those 'reserved' registers that LLVM does not otherwise
12476
  // understand how to spill. Also, by convention, by the time this
12477
  // intrinsic is called, Clang has already stored the frame address in the
12478
  // first slot of the buffer and stack address in the third. Following the
12479
  // X86 target code, we'll store the jump address in the second slot. We also
12480
  // need to save the TOC pointer (R2) to handle jumps between shared
12481
  // libraries, and that will be stored in the fourth slot. The thread
12482
  // identifier (R13) is not affected.
12483

12484
  // thisMBB:
12485
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
12486
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12487
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
12488

12489
  // Prepare IP either in reg.
12490
  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12491
  Register LabelReg = MRI.createVirtualRegister(PtrRC);
12492
  Register BufReg = MI.getOperand(1).getReg();
12493

12494
  if (Subtarget.is64BitELFABI()) {
12495
    setUsesTOCBasePtr(*MBB->getParent());
12496
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12497
              .addReg(PPC::X2)
12498
              .addImm(TOCOffset)
12499
              .addReg(BufReg)
12500
              .cloneMemRefs(MI);
12501
  }
12502

12503
  // Naked functions never have a base pointer, and so we use r1. For all
12504
  // other functions, this decision must be delayed until during PEI.
12505
  unsigned BaseReg;
12506
  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12507
    BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12508
  else
12509
    BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12510

12511
  MIB = BuildMI(*thisMBB, MI, DL,
12512
                TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12513
            .addReg(BaseReg)
12514
            .addImm(BPOffset)
12515
            .addReg(BufReg)
12516
            .cloneMemRefs(MI);
12517

12518
  // Setup
12519
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12520
  MIB.addRegMask(TRI->getNoPreservedMask());
12521

12522
  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12523

12524
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12525
          .addMBB(mainMBB);
12526
  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12527

12528
  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12529
  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12530

12531
  // mainMBB:
12532
  //  mainDstReg = 0
12533
  MIB =
12534
      BuildMI(mainMBB, DL,
12535
              TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12536

12537
  // Store IP
12538
  if (Subtarget.isPPC64()) {
12539
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12540
            .addReg(LabelReg)
12541
            .addImm(LabelOffset)
12542
            .addReg(BufReg);
12543
  } else {
12544
    MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12545
            .addReg(LabelReg)
12546
            .addImm(LabelOffset)
12547
            .addReg(BufReg);
12548
  }
12549
  MIB.cloneMemRefs(MI);
12550

12551
  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12552
  mainMBB->addSuccessor(sinkMBB);
12553

12554
  // sinkMBB:
12555
  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12556
          TII->get(PPC::PHI), DstReg)
12557
    .addReg(mainDstReg).addMBB(mainMBB)
12558
    .addReg(restoreDstReg).addMBB(thisMBB);
12559

12560
  MI.eraseFromParent();
12561
  return sinkMBB;
12562
}
12563

12564
MachineBasicBlock *
12565
PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
12566
                                     MachineBasicBlock *MBB) const {
12567
  DebugLoc DL = MI.getDebugLoc();
12568
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12569

12570
  MachineFunction *MF = MBB->getParent();
12571
  MachineRegisterInfo &MRI = MF->getRegInfo();
12572

12573
  MVT PVT = getPointerTy(MF->getDataLayout());
12574
  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12575
         "Invalid Pointer Size!");
12576

12577
  const TargetRegisterClass *RC =
12578
    (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12579
  Register Tmp = MRI.createVirtualRegister(RC);
12580
  // Since FP is only updated here but NOT referenced, it's treated as GPR.
12581
  unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12582
  unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12583
  unsigned BP =
12584
      (PVT == MVT::i64)
12585
          ? PPC::X30
12586
          : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12587
                                                              : PPC::R30);
12588

12589
  MachineInstrBuilder MIB;
12590

12591
  const int64_t LabelOffset = 1 * PVT.getStoreSize();
12592
  const int64_t SPOffset    = 2 * PVT.getStoreSize();
12593
  const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12594
  const int64_t BPOffset    = 4 * PVT.getStoreSize();
12595

12596
  Register BufReg = MI.getOperand(0).getReg();
12597

12598
  // Reload FP (the jumped-to function may not have had a
12599
  // frame pointer, and if so, then its r31 will be restored
12600
  // as necessary).
12601
  if (PVT == MVT::i64) {
12602
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12603
            .addImm(0)
12604
            .addReg(BufReg);
12605
  } else {
12606
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12607
            .addImm(0)
12608
            .addReg(BufReg);
12609
  }
12610
  MIB.cloneMemRefs(MI);
12611

12612
  // Reload IP
12613
  if (PVT == MVT::i64) {
12614
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12615
            .addImm(LabelOffset)
12616
            .addReg(BufReg);
12617
  } else {
12618
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12619
            .addImm(LabelOffset)
12620
            .addReg(BufReg);
12621
  }
12622
  MIB.cloneMemRefs(MI);
12623

12624
  // Reload SP
12625
  if (PVT == MVT::i64) {
12626
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12627
            .addImm(SPOffset)
12628
            .addReg(BufReg);
12629
  } else {
12630
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12631
            .addImm(SPOffset)
12632
            .addReg(BufReg);
12633
  }
12634
  MIB.cloneMemRefs(MI);
12635

12636
  // Reload BP
12637
  if (PVT == MVT::i64) {
12638
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12639
            .addImm(BPOffset)
12640
            .addReg(BufReg);
12641
  } else {
12642
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12643
            .addImm(BPOffset)
12644
            .addReg(BufReg);
12645
  }
12646
  MIB.cloneMemRefs(MI);
12647

12648
  // Reload TOC
12649
  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12650
    setUsesTOCBasePtr(*MBB->getParent());
12651
    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12652
              .addImm(TOCOffset)
12653
              .addReg(BufReg)
12654
              .cloneMemRefs(MI);
12655
  }
12656

12657
  // Jump
12658
  BuildMI(*MBB, MI, DL,
12659
          TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12660
  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12661

12662
  MI.eraseFromParent();
12663
  return MBB;
12664
}
12665

12666
bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
12667
  // If the function specifically requests inline stack probes, emit them.
12668
  if (MF.getFunction().hasFnAttribute("probe-stack"))
12669
    return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12670
           "inline-asm";
12671
  return false;
12672
}
12673

12674
unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
12675
  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12676
  unsigned StackAlign = TFI->getStackAlignment();
12677
  assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12678
         "Unexpected stack alignment");
12679
  // The default stack probe size is 4096 if the function has no
12680
  // stack-probe-size attribute.
12681
  const Function &Fn = MF.getFunction();
12682
  unsigned StackProbeSize =
12683
      Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12684
  // Round down to the stack alignment.
12685
  StackProbeSize &= ~(StackAlign - 1);
12686
  return StackProbeSize ? StackProbeSize : StackAlign;
12687
}
12688

12689
// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12690
// into three phases. In the first phase, it uses pseudo instruction
12691
// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12692
// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12693
// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12694
// MaxCallFrameSize so that it can calculate correct data area pointer.
12695
MachineBasicBlock *
12696
PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12697
                                    MachineBasicBlock *MBB) const {
12698
  const bool isPPC64 = Subtarget.isPPC64();
12699
  MachineFunction *MF = MBB->getParent();
12700
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12701
  DebugLoc DL = MI.getDebugLoc();
12702
  const unsigned ProbeSize = getStackProbeSize(*MF);
12703
  const BasicBlock *ProbedBB = MBB->getBasicBlock();
12704
  MachineRegisterInfo &MRI = MF->getRegInfo();
12705
  // The CFG of probing stack looks as
12706
  //         +-----+
12707
  //         | MBB |
12708
  //         +--+--+
12709
  //            |
12710
  //       +----v----+
12711
  //  +--->+ TestMBB +---+
12712
  //  |    +----+----+   |
12713
  //  |         |        |
12714
  //  |   +-----v----+   |
12715
  //  +---+ BlockMBB |   |
12716
  //      +----------+   |
12717
  //                     |
12718
  //       +---------+   |
12719
  //       | TailMBB +<--+
12720
  //       +---------+
12721
  // In MBB, calculate previous frame pointer and final stack pointer.
12722
  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12723
  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12724
  // TailMBB is spliced via \p MI.
12725
  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12726
  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12727
  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12728

12729
  MachineFunction::iterator MBBIter = ++MBB->getIterator();
12730
  MF->insert(MBBIter, TestMBB);
12731
  MF->insert(MBBIter, BlockMBB);
12732
  MF->insert(MBBIter, TailMBB);
12733

12734
  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12735
  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12736

12737
  Register DstReg = MI.getOperand(0).getReg();
12738
  Register NegSizeReg = MI.getOperand(1).getReg();
12739
  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12740
  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12741
  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12742
  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12743

12744
  // Since value of NegSizeReg might be realigned in prologepilog, insert a
12745
  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12746
  // NegSize.
12747
  unsigned ProbeOpc;
12748
  if (!MRI.hasOneNonDBGUse(NegSizeReg))
12749
    ProbeOpc =
12750
        isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12751
  else
12752
    // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12753
    // and NegSizeReg will be allocated in the same phyreg to avoid
12754
    // redundant copy when NegSizeReg has only one use which is current MI and
12755
    // will be replaced by PREPARE_PROBED_ALLOCA then.
12756
    ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12757
                       : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12758
  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12759
      .addDef(ActualNegSizeReg)
12760
      .addReg(NegSizeReg)
12761
      .add(MI.getOperand(2))
12762
      .add(MI.getOperand(3));
12763

12764
  // Calculate final stack pointer, which equals to SP + ActualNegSize.
12765
  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12766
          FinalStackPtr)
12767
      .addReg(SPReg)
12768
      .addReg(ActualNegSizeReg);
12769

12770
  // Materialize a scratch register for update.
12771
  int64_t NegProbeSize = -(int64_t)ProbeSize;
12772
  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12773
  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12774
  if (!isInt<16>(NegProbeSize)) {
12775
    Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12776
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12777
        .addImm(NegProbeSize >> 16);
12778
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12779
            ScratchReg)
12780
        .addReg(TempReg)
12781
        .addImm(NegProbeSize & 0xFFFF);
12782
  } else
12783
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12784
        .addImm(NegProbeSize);
12785

12786
  {
12787
    // Probing leading residual part.
12788
    Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12789
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12790
        .addReg(ActualNegSizeReg)
12791
        .addReg(ScratchReg);
12792
    Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12793
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12794
        .addReg(Div)
12795
        .addReg(ScratchReg);
12796
    Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12797
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12798
        .addReg(Mul)
12799
        .addReg(ActualNegSizeReg);
12800
    BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12801
        .addReg(FramePointer)
12802
        .addReg(SPReg)
12803
        .addReg(NegMod);
12804
  }
12805

12806
  {
12807
    // Remaining part should be multiple of ProbeSize.
12808
    Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12809
    BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12810
        .addReg(SPReg)
12811
        .addReg(FinalStackPtr);
12812
    BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12813
        .addImm(PPC::PRED_EQ)
12814
        .addReg(CmpResult)
12815
        .addMBB(TailMBB);
12816
    TestMBB->addSuccessor(BlockMBB);
12817
    TestMBB->addSuccessor(TailMBB);
12818
  }
12819

12820
  {
12821
    // Touch the block.
12822
    // |P...|P...|P...
12823
    BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12824
        .addReg(FramePointer)
12825
        .addReg(SPReg)
12826
        .addReg(ScratchReg);
12827
    BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12828
    BlockMBB->addSuccessor(TestMBB);
12829
  }
12830

12831
  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12832
  // DYNAREAOFFSET pseudo instruction to get the future result.
12833
  Register MaxCallFrameSizeReg =
12834
      MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12835
  BuildMI(TailMBB, DL,
12836
          TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12837
          MaxCallFrameSizeReg)
12838
      .add(MI.getOperand(2))
12839
      .add(MI.getOperand(3));
12840
  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12841
      .addReg(SPReg)
12842
      .addReg(MaxCallFrameSizeReg);
12843

12844
  // Splice instructions after MI to TailMBB.
12845
  TailMBB->splice(TailMBB->end(), MBB,
12846
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12847
  TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12848
  MBB->addSuccessor(TestMBB);
12849

12850
  // Delete the pseudo instruction.
12851
  MI.eraseFromParent();
12852

12853
  ++NumDynamicAllocaProbed;
12854
  return TailMBB;
12855
}
12856

12857
static bool IsSelectCC(MachineInstr &MI) {
12858
  switch (MI.getOpcode()) {
12859
  case PPC::SELECT_CC_I4:
12860
  case PPC::SELECT_CC_I8:
12861
  case PPC::SELECT_CC_F4:
12862
  case PPC::SELECT_CC_F8:
12863
  case PPC::SELECT_CC_F16:
12864
  case PPC::SELECT_CC_VRRC:
12865
  case PPC::SELECT_CC_VSFRC:
12866
  case PPC::SELECT_CC_VSSRC:
12867
  case PPC::SELECT_CC_VSRC:
12868
  case PPC::SELECT_CC_SPE4:
12869
  case PPC::SELECT_CC_SPE:
12870
    return true;
12871
  default:
12872
    return false;
12873
  }
12874
}
12875

12876
static bool IsSelect(MachineInstr &MI) {
12877
  switch (MI.getOpcode()) {
12878
  case PPC::SELECT_I4:
12879
  case PPC::SELECT_I8:
12880
  case PPC::SELECT_F4:
12881
  case PPC::SELECT_F8:
12882
  case PPC::SELECT_F16:
12883
  case PPC::SELECT_SPE:
12884
  case PPC::SELECT_SPE4:
12885
  case PPC::SELECT_VRRC:
12886
  case PPC::SELECT_VSFRC:
12887
  case PPC::SELECT_VSSRC:
12888
  case PPC::SELECT_VSRC:
12889
    return true;
12890
  default:
12891
    return false;
12892
  }
12893
}
12894

12895
MachineBasicBlock *
12896
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12897
                                               MachineBasicBlock *BB) const {
12898
  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12899
      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12900
    if (Subtarget.is64BitELFABI() &&
12901
        MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12902
        !Subtarget.isUsingPCRelativeCalls()) {
12903
      // Call lowering should have added an r2 operand to indicate a dependence
12904
      // on the TOC base pointer value. It can't however, because there is no
12905
      // way to mark the dependence as implicit there, and so the stackmap code
12906
      // will confuse it with a regular operand. Instead, add the dependence
12907
      // here.
12908
      MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12909
    }
12910

12911
    return emitPatchPoint(MI, BB);
12912
  }
12913

12914
  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12915
      MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12916
    return emitEHSjLjSetJmp(MI, BB);
12917
  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12918
             MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12919
    return emitEHSjLjLongJmp(MI, BB);
12920
  }
12921

12922
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12923

12924
  // To "insert" these instructions we actually have to insert their
12925
  // control-flow patterns.
12926
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12927
  MachineFunction::iterator It = ++BB->getIterator();
12928

12929
  MachineFunction *F = BB->getParent();
12930
  MachineRegisterInfo &MRI = F->getRegInfo();
12931

12932
  if (Subtarget.hasISEL() &&
12933
      (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12934
       MI.getOpcode() == PPC::SELECT_CC_I8 ||
12935
       MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12936
    SmallVector<MachineOperand, 2> Cond;
12937
    if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12938
        MI.getOpcode() == PPC::SELECT_CC_I8)
12939
      Cond.push_back(MI.getOperand(4));
12940
    else
12941
      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
12942
    Cond.push_back(MI.getOperand(1));
12943

12944
    DebugLoc dl = MI.getDebugLoc();
12945
    TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12946
                      MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12947
  } else if (IsSelectCC(MI) || IsSelect(MI)) {
12948
    // The incoming instruction knows the destination vreg to set, the
12949
    // condition code register to branch on, the true/false values to
12950
    // select between, and a branch opcode to use.
12951

12952
    //  thisMBB:
12953
    //  ...
12954
    //   TrueVal = ...
12955
    //   cmpTY ccX, r1, r2
12956
    //   bCC sinkMBB
12957
    //   fallthrough --> copy0MBB
12958
    MachineBasicBlock *thisMBB = BB;
12959
    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12960
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12961
    DebugLoc dl = MI.getDebugLoc();
12962
    F->insert(It, copy0MBB);
12963
    F->insert(It, sinkMBB);
12964

12965
    // Set the call frame size on entry to the new basic blocks.
12966
    // See https://reviews.llvm.org/D156113.
12967
    unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12968
    copy0MBB->setCallFrameSize(CallFrameSize);
12969
    sinkMBB->setCallFrameSize(CallFrameSize);
12970

12971
    // Transfer the remainder of BB and its successor edges to sinkMBB.
12972
    sinkMBB->splice(sinkMBB->begin(), BB,
12973
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
12974
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12975

12976
    // Next, add the true and fallthrough blocks as its successors.
12977
    BB->addSuccessor(copy0MBB);
12978
    BB->addSuccessor(sinkMBB);
12979

12980
    if (IsSelect(MI)) {
12981
      BuildMI(BB, dl, TII->get(PPC::BC))
12982
          .addReg(MI.getOperand(1).getReg())
12983
          .addMBB(sinkMBB);
12984
    } else {
12985
      unsigned SelectPred = MI.getOperand(4).getImm();
12986
      BuildMI(BB, dl, TII->get(PPC::BCC))
12987
          .addImm(SelectPred)
12988
          .addReg(MI.getOperand(1).getReg())
12989
          .addMBB(sinkMBB);
12990
    }
12991

12992
    //  copy0MBB:
12993
    //   %FalseValue = ...
12994
    //   # fallthrough to sinkMBB
12995
    BB = copy0MBB;
12996

12997
    // Update machine-CFG edges
12998
    BB->addSuccessor(sinkMBB);
12999

13000
    //  sinkMBB:
13001
    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13002
    //  ...
13003
    BB = sinkMBB;
13004
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13005
        .addReg(MI.getOperand(3).getReg())
13006
        .addMBB(copy0MBB)
13007
        .addReg(MI.getOperand(2).getReg())
13008
        .addMBB(thisMBB);
13009
  } else if (MI.getOpcode() == PPC::ReadTB) {
13010
    // To read the 64-bit time-base register on a 32-bit target, we read the
13011
    // two halves. Should the counter have wrapped while it was being read, we
13012
    // need to try again.
13013
    // ...
13014
    // readLoop:
13015
    // mfspr Rx,TBU # load from TBU
13016
    // mfspr Ry,TB  # load from TB
13017
    // mfspr Rz,TBU # load from TBU
13018
    // cmpw crX,Rx,Rz # check if 'old'='new'
13019
    // bne readLoop   # branch if they're not equal
13020
    // ...
13021

13022
    MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13023
    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13024
    DebugLoc dl = MI.getDebugLoc();
13025
    F->insert(It, readMBB);
13026
    F->insert(It, sinkMBB);
13027

13028
    // Transfer the remainder of BB and its successor edges to sinkMBB.
13029
    sinkMBB->splice(sinkMBB->begin(), BB,
13030
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
13031
    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
13032

13033
    BB->addSuccessor(readMBB);
13034
    BB = readMBB;
13035

13036
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13037
    Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13038
    Register LoReg = MI.getOperand(0).getReg();
13039
    Register HiReg = MI.getOperand(1).getReg();
13040

13041
    BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13042
    BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13043
    BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13044

13045
    Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13046

13047
    BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13048
        .addReg(HiReg)
13049
        .addReg(ReadAgainReg);
13050
    BuildMI(BB, dl, TII->get(PPC::BCC))
13051
        .addImm(PPC::PRED_NE)
13052
        .addReg(CmpReg)
13053
        .addMBB(readMBB);
13054

13055
    BB->addSuccessor(readMBB);
13056
    BB->addSuccessor(sinkMBB);
13057
  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13058
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13059
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13060
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13061
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13062
    BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13063
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13064
    BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13065

13066
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13067
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13068
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13069
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13070
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13071
    BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13072
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13073
    BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13074

13075
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13076
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13077
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13078
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13079
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13080
    BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13081
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13082
    BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13083

13084
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13085
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13086
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13087
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13088
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13089
    BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13090
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13091
    BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13092

13093
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13094
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13095
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13096
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13097
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13098
    BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13099
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13100
    BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13101

13102
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13103
    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13104
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13105
    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13106
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13107
    BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13108
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13109
    BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13110

13111
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13112
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13113
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13114
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13115
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13116
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13117
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13118
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13119

13120
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13121
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13122
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13123
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13124
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13125
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13126
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13127
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13128

13129
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13130
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13131
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13132
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13133
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13134
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13135
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13136
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13137

13138
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13139
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13140
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13141
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13142
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13143
    BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13144
  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13145
    BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13146

13147
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13148
    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13149
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13150
    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13151
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13152
    BB = EmitAtomicBinary(MI, BB, 4, 0);
13153
  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13154
    BB = EmitAtomicBinary(MI, BB, 8, 0);
13155
  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13156
           MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13157
           (Subtarget.hasPartwordAtomics() &&
13158
            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13159
           (Subtarget.hasPartwordAtomics() &&
13160
            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13161
    bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13162

13163
    auto LoadMnemonic = PPC::LDARX;
13164
    auto StoreMnemonic = PPC::STDCX;
13165
    switch (MI.getOpcode()) {
13166
    default:
13167
      llvm_unreachable("Compare and swap of unknown size");
13168
    case PPC::ATOMIC_CMP_SWAP_I8:
13169
      LoadMnemonic = PPC::LBARX;
13170
      StoreMnemonic = PPC::STBCX;
13171
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13172
      break;
13173
    case PPC::ATOMIC_CMP_SWAP_I16:
13174
      LoadMnemonic = PPC::LHARX;
13175
      StoreMnemonic = PPC::STHCX;
13176
      assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13177
      break;
13178
    case PPC::ATOMIC_CMP_SWAP_I32:
13179
      LoadMnemonic = PPC::LWARX;
13180
      StoreMnemonic = PPC::STWCX;
13181
      break;
13182
    case PPC::ATOMIC_CMP_SWAP_I64:
13183
      LoadMnemonic = PPC::LDARX;
13184
      StoreMnemonic = PPC::STDCX;
13185
      break;
13186
    }
13187
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13188
    Register dest = MI.getOperand(0).getReg();
13189
    Register ptrA = MI.getOperand(1).getReg();
13190
    Register ptrB = MI.getOperand(2).getReg();
13191
    Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13192
    Register oldval = MI.getOperand(3).getReg();
13193
    Register newval = MI.getOperand(4).getReg();
13194
    DebugLoc dl = MI.getDebugLoc();
13195

13196
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13197
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13198
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13199
    F->insert(It, loop1MBB);
13200
    F->insert(It, loop2MBB);
13201
    F->insert(It, exitMBB);
13202
    exitMBB->splice(exitMBB->begin(), BB,
13203
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
13204
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13205

13206
    //  thisMBB:
13207
    //   ...
13208
    //   fallthrough --> loopMBB
13209
    BB->addSuccessor(loop1MBB);
13210

13211
    // loop1MBB:
13212
    //   l[bhwd]arx dest, ptr
13213
    //   cmp[wd] dest, oldval
13214
    //   bne- exitBB
13215
    // loop2MBB:
13216
    //   st[bhwd]cx. newval, ptr
13217
    //   bne- loopMBB
13218
    //   b exitBB
13219
    // exitBB:
13220
    BB = loop1MBB;
13221
    BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13222
    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13223
        .addReg(dest)
13224
        .addReg(oldval);
13225
    BuildMI(BB, dl, TII->get(PPC::BCC))
13226
        .addImm(PPC::PRED_NE)
13227
        .addReg(CrReg)
13228
        .addMBB(exitMBB);
13229
    BB->addSuccessor(loop2MBB);
13230
    BB->addSuccessor(exitMBB);
13231

13232
    BB = loop2MBB;
13233
    BuildMI(BB, dl, TII->get(StoreMnemonic))
13234
        .addReg(newval)
13235
        .addReg(ptrA)
13236
        .addReg(ptrB);
13237
    BuildMI(BB, dl, TII->get(PPC::BCC))
13238
        .addImm(PPC::PRED_NE)
13239
        .addReg(PPC::CR0)
13240
        .addMBB(loop1MBB);
13241
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13242
    BB->addSuccessor(loop1MBB);
13243
    BB->addSuccessor(exitMBB);
13244

13245
    //  exitMBB:
13246
    //   ...
13247
    BB = exitMBB;
13248
  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13249
             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13250
    // We must use 64-bit registers for addresses when targeting 64-bit,
13251
    // since we're actually doing arithmetic on them.  Other registers
13252
    // can be 32-bit.
13253
    bool is64bit = Subtarget.isPPC64();
13254
    bool isLittleEndian = Subtarget.isLittleEndian();
13255
    bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13256

13257
    Register dest = MI.getOperand(0).getReg();
13258
    Register ptrA = MI.getOperand(1).getReg();
13259
    Register ptrB = MI.getOperand(2).getReg();
13260
    Register oldval = MI.getOperand(3).getReg();
13261
    Register newval = MI.getOperand(4).getReg();
13262
    DebugLoc dl = MI.getDebugLoc();
13263

13264
    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13265
    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13266
    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13267
    F->insert(It, loop1MBB);
13268
    F->insert(It, loop2MBB);
13269
    F->insert(It, exitMBB);
13270
    exitMBB->splice(exitMBB->begin(), BB,
13271
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
13272
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13273

13274
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13275
    const TargetRegisterClass *RC =
13276
        is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13277
    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13278

13279
    Register PtrReg = RegInfo.createVirtualRegister(RC);
13280
    Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13281
    Register ShiftReg =
13282
        isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13283
    Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13284
    Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13285
    Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13286
    Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13287
    Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13288
    Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13289
    Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13290
    Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13291
    Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13292
    Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13293
    Register Ptr1Reg;
13294
    Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13295
    Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13296
    Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13297
    //  thisMBB:
13298
    //   ...
13299
    //   fallthrough --> loopMBB
13300
    BB->addSuccessor(loop1MBB);
13301

13302
    // The 4-byte load must be aligned, while a char or short may be
13303
    // anywhere in the word.  Hence all this nasty bookkeeping code.
13304
    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
13305
    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13306
    //   xori shift, shift1, 24 [16]
13307
    //   rlwinm ptr, ptr1, 0, 0, 29
13308
    //   slw newval2, newval, shift
13309
    //   slw oldval2, oldval,shift
13310
    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13311
    //   slw mask, mask2, shift
13312
    //   and newval3, newval2, mask
13313
    //   and oldval3, oldval2, mask
13314
    // loop1MBB:
13315
    //   lwarx tmpDest, ptr
13316
    //   and tmp, tmpDest, mask
13317
    //   cmpw tmp, oldval3
13318
    //   bne- exitBB
13319
    // loop2MBB:
13320
    //   andc tmp2, tmpDest, mask
13321
    //   or tmp4, tmp2, newval3
13322
    //   stwcx. tmp4, ptr
13323
    //   bne- loop1MBB
13324
    //   b exitBB
13325
    // exitBB:
13326
    //   srw dest, tmpDest, shift
13327
    if (ptrA != ZeroReg) {
13328
      Ptr1Reg = RegInfo.createVirtualRegister(RC);
13329
      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13330
          .addReg(ptrA)
13331
          .addReg(ptrB);
13332
    } else {
13333
      Ptr1Reg = ptrB;
13334
    }
13335

13336
    // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13337
    // mode.
13338
    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13339
        .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13340
        .addImm(3)
13341
        .addImm(27)
13342
        .addImm(is8bit ? 28 : 27);
13343
    if (!isLittleEndian)
13344
      BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13345
          .addReg(Shift1Reg)
13346
          .addImm(is8bit ? 24 : 16);
13347
    if (is64bit)
13348
      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13349
          .addReg(Ptr1Reg)
13350
          .addImm(0)
13351
          .addImm(61);
13352
    else
13353
      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13354
          .addReg(Ptr1Reg)
13355
          .addImm(0)
13356
          .addImm(0)
13357
          .addImm(29);
13358
    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13359
        .addReg(newval)
13360
        .addReg(ShiftReg);
13361
    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13362
        .addReg(oldval)
13363
        .addReg(ShiftReg);
13364
    if (is8bit)
13365
      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13366
    else {
13367
      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13368
      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13369
          .addReg(Mask3Reg)
13370
          .addImm(65535);
13371
    }
13372
    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13373
        .addReg(Mask2Reg)
13374
        .addReg(ShiftReg);
13375
    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13376
        .addReg(NewVal2Reg)
13377
        .addReg(MaskReg);
13378
    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13379
        .addReg(OldVal2Reg)
13380
        .addReg(MaskReg);
13381

13382
    BB = loop1MBB;
13383
    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13384
        .addReg(ZeroReg)
13385
        .addReg(PtrReg);
13386
    BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13387
        .addReg(TmpDestReg)
13388
        .addReg(MaskReg);
13389
    BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13390
        .addReg(TmpReg)
13391
        .addReg(OldVal3Reg);
13392
    BuildMI(BB, dl, TII->get(PPC::BCC))
13393
        .addImm(PPC::PRED_NE)
13394
        .addReg(CrReg)
13395
        .addMBB(exitMBB);
13396
    BB->addSuccessor(loop2MBB);
13397
    BB->addSuccessor(exitMBB);
13398

13399
    BB = loop2MBB;
13400
    BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13401
        .addReg(TmpDestReg)
13402
        .addReg(MaskReg);
13403
    BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13404
        .addReg(Tmp2Reg)
13405
        .addReg(NewVal3Reg);
13406
    BuildMI(BB, dl, TII->get(PPC::STWCX))
13407
        .addReg(Tmp4Reg)
13408
        .addReg(ZeroReg)
13409
        .addReg(PtrReg);
13410
    BuildMI(BB, dl, TII->get(PPC::BCC))
13411
        .addImm(PPC::PRED_NE)
13412
        .addReg(PPC::CR0)
13413
        .addMBB(loop1MBB);
13414
    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13415
    BB->addSuccessor(loop1MBB);
13416
    BB->addSuccessor(exitMBB);
13417

13418
    //  exitMBB:
13419
    //   ...
13420
    BB = exitMBB;
13421
    BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13422
        .addReg(TmpReg)
13423
        .addReg(ShiftReg);
13424
  } else if (MI.getOpcode() == PPC::FADDrtz) {
13425
    // This pseudo performs an FADD with rounding mode temporarily forced
13426
    // to round-to-zero.  We emit this via custom inserter since the FPSCR
13427
    // is not modeled at the SelectionDAG level.
13428
    Register Dest = MI.getOperand(0).getReg();
13429
    Register Src1 = MI.getOperand(1).getReg();
13430
    Register Src2 = MI.getOperand(2).getReg();
13431
    DebugLoc dl = MI.getDebugLoc();
13432

13433
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13434
    Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13435

13436
    // Save FPSCR value.
13437
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13438

13439
    // Set rounding mode to round-to-zero.
13440
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13441
        .addImm(31)
13442
        .addReg(PPC::RM, RegState::ImplicitDefine);
13443

13444
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13445
        .addImm(30)
13446
        .addReg(PPC::RM, RegState::ImplicitDefine);
13447

13448
    // Perform addition.
13449
    auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13450
                   .addReg(Src1)
13451
                   .addReg(Src2);
13452
    if (MI.getFlag(MachineInstr::NoFPExcept))
13453
      MIB.setMIFlag(MachineInstr::NoFPExcept);
13454

13455
    // Restore FPSCR value.
13456
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13457
  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13458
             MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13459
             MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13460
             MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13461
    unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13462
                       MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13463
                          ? PPC::ANDI8_rec
13464
                          : PPC::ANDI_rec;
13465
    bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13466
                 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13467

13468
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13469
    Register Dest = RegInfo.createVirtualRegister(
13470
        Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13471

13472
    DebugLoc Dl = MI.getDebugLoc();
13473
    BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13474
        .addReg(MI.getOperand(1).getReg())
13475
        .addImm(1);
13476
    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13477
            MI.getOperand(0).getReg())
13478
        .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13479
  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13480
    DebugLoc Dl = MI.getDebugLoc();
13481
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13482
    Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13483
    BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13484
    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13485
            MI.getOperand(0).getReg())
13486
        .addReg(CRReg);
13487
  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13488
    DebugLoc Dl = MI.getDebugLoc();
13489
    unsigned Imm = MI.getOperand(1).getImm();
13490
    BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13491
    BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13492
            MI.getOperand(0).getReg())
13493
        .addReg(PPC::CR0EQ);
13494
  } else if (MI.getOpcode() == PPC::SETRNDi) {
13495
    DebugLoc dl = MI.getDebugLoc();
13496
    Register OldFPSCRReg = MI.getOperand(0).getReg();
13497

13498
    // Save FPSCR value.
13499
    if (MRI.use_empty(OldFPSCRReg))
13500
      BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13501
    else
13502
      BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13503

13504
    // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13505
    // the following settings:
13506
    //   00 Round to nearest
13507
    //   01 Round to 0
13508
    //   10 Round to +inf
13509
    //   11 Round to -inf
13510

13511
    // When the operand is immediate, using the two least significant bits of
13512
    // the immediate to set the bits 62:63 of FPSCR.
13513
    unsigned Mode = MI.getOperand(1).getImm();
13514
    BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13515
        .addImm(31)
13516
        .addReg(PPC::RM, RegState::ImplicitDefine);
13517

13518
    BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13519
        .addImm(30)
13520
        .addReg(PPC::RM, RegState::ImplicitDefine);
13521
  } else if (MI.getOpcode() == PPC::SETRND) {
13522
    DebugLoc dl = MI.getDebugLoc();
13523

13524
    // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13525
    // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13526
    // If the target doesn't have DirectMove, we should use stack to do the
13527
    // conversion, because the target doesn't have the instructions like mtvsrd
13528
    // or mfvsrd to do this conversion directly.
13529
    auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13530
      if (Subtarget.hasDirectMove()) {
13531
        BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13532
          .addReg(SrcReg);
13533
      } else {
13534
        // Use stack to do the register copy.
13535
        unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13536
        MachineRegisterInfo &RegInfo = F->getRegInfo();
13537
        const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13538
        if (RC == &PPC::F8RCRegClass) {
13539
          // Copy register from F8RCRegClass to G8RCRegclass.
13540
          assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13541
                 "Unsupported RegClass.");
13542

13543
          StoreOp = PPC::STFD;
13544
          LoadOp = PPC::LD;
13545
        } else {
13546
          // Copy register from G8RCRegClass to F8RCRegclass.
13547
          assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13548
                 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13549
                 "Unsupported RegClass.");
13550
        }
13551

13552
        MachineFrameInfo &MFI = F->getFrameInfo();
13553
        int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13554

13555
        MachineMemOperand *MMOStore = F->getMachineMemOperand(
13556
            MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13557
            MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
13558
            MFI.getObjectAlign(FrameIdx));
13559

13560
        // Store the SrcReg into the stack.
13561
        BuildMI(*BB, MI, dl, TII->get(StoreOp))
13562
          .addReg(SrcReg)
13563
          .addImm(0)
13564
          .addFrameIndex(FrameIdx)
13565
          .addMemOperand(MMOStore);
13566

13567
        MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13568
            MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13569
            MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
13570
            MFI.getObjectAlign(FrameIdx));
13571

13572
        // Load from the stack where SrcReg is stored, and save to DestReg,
13573
        // so we have done the RegClass conversion from RegClass::SrcReg to
13574
        // RegClass::DestReg.
13575
        BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13576
          .addImm(0)
13577
          .addFrameIndex(FrameIdx)
13578
          .addMemOperand(MMOLoad);
13579
      }
13580
    };
13581

13582
    Register OldFPSCRReg = MI.getOperand(0).getReg();
13583

13584
    // Save FPSCR value.
13585
    BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13586

13587
    // When the operand is gprc register, use two least significant bits of the
13588
    // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13589
    //
13590
    // copy OldFPSCRTmpReg, OldFPSCRReg
13591
    // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13592
    // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13593
    // copy NewFPSCRReg, NewFPSCRTmpReg
13594
    // mtfsf 255, NewFPSCRReg
13595
    MachineOperand SrcOp = MI.getOperand(1);
13596
    MachineRegisterInfo &RegInfo = F->getRegInfo();
13597
    Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13598

13599
    copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13600

13601
    Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13602
    Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13603

13604
    // The first operand of INSERT_SUBREG should be a register which has
13605
    // subregisters, we only care about its RegClass, so we should use an
13606
    // IMPLICIT_DEF register.
13607
    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13608
    BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13609
      .addReg(ImDefReg)
13610
      .add(SrcOp)
13611
      .addImm(1);
13612

13613
    Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13614
    BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13615
      .addReg(OldFPSCRTmpReg)
13616
      .addReg(ExtSrcReg)
13617
      .addImm(0)
13618
      .addImm(62);
13619

13620
    Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13621
    copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13622

13623
    // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13624
    // bits of FPSCR.
13625
    BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13626
      .addImm(255)
13627
      .addReg(NewFPSCRReg)
13628
      .addImm(0)
13629
      .addImm(0);
13630
  } else if (MI.getOpcode() == PPC::SETFLM) {
13631
    DebugLoc Dl = MI.getDebugLoc();
13632

13633
    // Result of setflm is previous FPSCR content, so we need to save it first.
13634
    Register OldFPSCRReg = MI.getOperand(0).getReg();
13635
    if (MRI.use_empty(OldFPSCRReg))
13636
      BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13637
    else
13638
      BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13639

13640
    // Put bits in 32:63 to FPSCR.
13641
    Register NewFPSCRReg = MI.getOperand(1).getReg();
13642
    BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13643
        .addImm(255)
13644
        .addReg(NewFPSCRReg)
13645
        .addImm(0)
13646
        .addImm(0);
13647
  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13648
             MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13649
    return emitProbedAlloca(MI, BB);
13650
  } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13651
    DebugLoc DL = MI.getDebugLoc();
13652
    Register Src = MI.getOperand(2).getReg();
13653
    Register Lo = MI.getOperand(0).getReg();
13654
    Register Hi = MI.getOperand(1).getReg();
13655
    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13656
        .addDef(Lo)
13657
        .addUse(Src, 0, PPC::sub_gp8_x1);
13658
    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13659
        .addDef(Hi)
13660
        .addUse(Src, 0, PPC::sub_gp8_x0);
13661
  } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13662
             MI.getOpcode() == PPC::STQX_PSEUDO) {
13663
    DebugLoc DL = MI.getDebugLoc();
13664
    // Ptr is used as the ptr_rc_no_r0 part
13665
    // of LQ/STQ's memory operand and adding result of RA and RB,
13666
    // so it has to be g8rc_and_g8rc_nox0.
13667
    Register Ptr =
13668
        F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13669
    Register Val = MI.getOperand(0).getReg();
13670
    Register RA = MI.getOperand(1).getReg();
13671
    Register RB = MI.getOperand(2).getReg();
13672
    BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13673
    BuildMI(*BB, MI, DL,
13674
            MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13675
                                              : TII->get(PPC::STQ))
13676
        .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13677
        .addImm(0)
13678
        .addReg(Ptr);
13679
  } else {
13680
    llvm_unreachable("Unexpected instr type to insert");
13681
  }
13682

13683
  MI.eraseFromParent(); // The pseudo instruction is gone now.
13684
  return BB;
13685
}
13686

13687
//===----------------------------------------------------------------------===//
13688
// Target Optimization Hooks
13689
//===----------------------------------------------------------------------===//
13690

13691
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13692
  // For the estimates, convergence is quadratic, so we essentially double the
13693
  // number of digits correct after every iteration. For both FRE and FRSQRTE,
13694
  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13695
  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13696
  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13697
  if (VT.getScalarType() == MVT::f64)
13698
    RefinementSteps++;
13699
  return RefinementSteps;
13700
}
13701

13702
SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13703
                                            const DenormalMode &Mode) const {
13704
  // We only have VSX Vector Test for software Square Root.
13705
  EVT VT = Op.getValueType();
13706
  if (!isTypeLegal(MVT::i1) ||
13707
      (VT != MVT::f64 &&
13708
       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13709
    return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13710

13711
  SDLoc DL(Op);
13712
  // The output register of FTSQRT is CR field.
13713
  SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13714
  // ftsqrt BF,FRB
13715
  // Let e_b be the unbiased exponent of the double-precision
13716
  // floating-point operand in register FRB.
13717
  // fe_flag is set to 1 if either of the following conditions occurs.
13718
  //   - The double-precision floating-point operand in register FRB is a zero,
13719
  //     a NaN, or an infinity, or a negative value.
13720
  //   - e_b is less than or equal to -970.
13721
  // Otherwise fe_flag is set to 0.
13722
  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13723
  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13724
  // exponent is less than -970)
13725
  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13726
  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13727
                                    FTSQRT, SRIdxVal),
13728
                 0);
13729
}
13730

13731
SDValue
13732
PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13733
                                               SelectionDAG &DAG) const {
13734
  // We only have VSX Vector Square Root.
13735
  EVT VT = Op.getValueType();
13736
  if (VT != MVT::f64 &&
13737
      ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13738
    return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
13739

13740
  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13741
}
13742

13743
SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13744
                                           int Enabled, int &RefinementSteps,
13745
                                           bool &UseOneConstNR,
13746
                                           bool Reciprocal) const {
13747
  EVT VT = Operand.getValueType();
13748
  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13749
      (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13750
      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13751
      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13752
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
13753
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13754

13755
    // The Newton-Raphson computation with a single constant does not provide
13756
    // enough accuracy on some CPUs.
13757
    UseOneConstNR = !Subtarget.needsTwoConstNR();
13758
    return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13759
  }
13760
  return SDValue();
13761
}
13762

13763
SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13764
                                            int Enabled,
13765
                                            int &RefinementSteps) const {
13766
  EVT VT = Operand.getValueType();
13767
  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13768
      (VT == MVT::f64 && Subtarget.hasFRE()) ||
13769
      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13770
      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13771
    if (RefinementSteps == ReciprocalEstimate::Unspecified)
13772
      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13773
    return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13774
  }
13775
  return SDValue();
13776
}
13777

13778
unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13779
  // Note: This functionality is used only when unsafe-fp-math is enabled, and
13780
  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13781
  // enabled for division), this functionality is redundant with the default
13782
  // combiner logic (once the division -> reciprocal/multiply transformation
13783
  // has taken place). As a result, this matters more for older cores than for
13784
  // newer ones.
13785

13786
  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13787
  // reciprocal if there are two or more FDIVs (for embedded cores with only
13788
  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13789
  switch (Subtarget.getCPUDirective()) {
13790
  default:
13791
    return 3;
13792
  case PPC::DIR_440:
13793
  case PPC::DIR_A2:
13794
  case PPC::DIR_E500:
13795
  case PPC::DIR_E500mc:
13796
  case PPC::DIR_E5500:
13797
    return 2;
13798
  }
13799
}
13800

13801
// isConsecutiveLSLoc needs to work even if all adds have not yet been
13802
// collapsed, and so we need to look through chains of them.
13803
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
13804
                                     int64_t& Offset, SelectionDAG &DAG) {
13805
  if (DAG.isBaseWithConstantOffset(Loc)) {
13806
    Base = Loc.getOperand(0);
13807
    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13808

13809
    // The base might itself be a base plus an offset, and if so, accumulate
13810
    // that as well.
13811
    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13812
  }
13813
}
13814

13815
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13816
                            unsigned Bytes, int Dist,
13817
                            SelectionDAG &DAG) {
13818
  if (VT.getSizeInBits() / 8 != Bytes)
13819
    return false;
13820

13821
  SDValue BaseLoc = Base->getBasePtr();
13822
  if (Loc.getOpcode() == ISD::FrameIndex) {
13823
    if (BaseLoc.getOpcode() != ISD::FrameIndex)
13824
      return false;
13825
    const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13826
    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
13827
    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13828
    int FS  = MFI.getObjectSize(FI);
13829
    int BFS = MFI.getObjectSize(BFI);
13830
    if (FS != BFS || FS != (int)Bytes) return false;
13831
    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13832
  }
13833

13834
  SDValue Base1 = Loc, Base2 = BaseLoc;
13835
  int64_t Offset1 = 0, Offset2 = 0;
13836
  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13837
  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13838
  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13839
    return true;
13840

13841
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13842
  const GlobalValue *GV1 = nullptr;
13843
  const GlobalValue *GV2 = nullptr;
13844
  Offset1 = 0;
13845
  Offset2 = 0;
13846
  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13847
  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13848
  if (isGA1 && isGA2 && GV1 == GV2)
13849
    return Offset1 == (Offset2 + Dist*Bytes);
13850
  return false;
13851
}
13852

13853
// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13854
// not enforce equality of the chain operands.
13855
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
13856
                            unsigned Bytes, int Dist,
13857
                            SelectionDAG &DAG) {
13858
  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13859
    EVT VT = LS->getMemoryVT();
13860
    SDValue Loc = LS->getBasePtr();
13861
    return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13862
  }
13863

13864
  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13865
    EVT VT;
13866
    switch (N->getConstantOperandVal(1)) {
13867
    default: return false;
13868
    case Intrinsic::ppc_altivec_lvx:
13869
    case Intrinsic::ppc_altivec_lvxl:
13870
    case Intrinsic::ppc_vsx_lxvw4x:
13871
    case Intrinsic::ppc_vsx_lxvw4x_be:
13872
      VT = MVT::v4i32;
13873
      break;
13874
    case Intrinsic::ppc_vsx_lxvd2x:
13875
    case Intrinsic::ppc_vsx_lxvd2x_be:
13876
      VT = MVT::v2f64;
13877
      break;
13878
    case Intrinsic::ppc_altivec_lvebx:
13879
      VT = MVT::i8;
13880
      break;
13881
    case Intrinsic::ppc_altivec_lvehx:
13882
      VT = MVT::i16;
13883
      break;
13884
    case Intrinsic::ppc_altivec_lvewx:
13885
      VT = MVT::i32;
13886
      break;
13887
    }
13888

13889
    return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13890
  }
13891

13892
  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13893
    EVT VT;
13894
    switch (N->getConstantOperandVal(1)) {
13895
    default: return false;
13896
    case Intrinsic::ppc_altivec_stvx:
13897
    case Intrinsic::ppc_altivec_stvxl:
13898
    case Intrinsic::ppc_vsx_stxvw4x:
13899
      VT = MVT::v4i32;
13900
      break;
13901
    case Intrinsic::ppc_vsx_stxvd2x:
13902
      VT = MVT::v2f64;
13903
      break;
13904
    case Intrinsic::ppc_vsx_stxvw4x_be:
13905
      VT = MVT::v4i32;
13906
      break;
13907
    case Intrinsic::ppc_vsx_stxvd2x_be:
13908
      VT = MVT::v2f64;
13909
      break;
13910
    case Intrinsic::ppc_altivec_stvebx:
13911
      VT = MVT::i8;
13912
      break;
13913
    case Intrinsic::ppc_altivec_stvehx:
13914
      VT = MVT::i16;
13915
      break;
13916
    case Intrinsic::ppc_altivec_stvewx:
13917
      VT = MVT::i32;
13918
      break;
13919
    }
13920

13921
    return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13922
  }
13923

13924
  return false;
13925
}
13926

13927
// Return true is there is a nearyby consecutive load to the one provided
13928
// (regardless of alignment). We search up and down the chain, looking though
13929
// token factors and other loads (but nothing else). As a result, a true result
13930
// indicates that it is safe to create a new consecutive load adjacent to the
13931
// load provided.
13932
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
13933
  SDValue Chain = LD->getChain();
13934
  EVT VT = LD->getMemoryVT();
13935

13936
  SmallSet<SDNode *, 16> LoadRoots;
13937
  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13938
  SmallSet<SDNode *, 16> Visited;
13939

13940
  // First, search up the chain, branching to follow all token-factor operands.
13941
  // If we find a consecutive load, then we're done, otherwise, record all
13942
  // nodes just above the top-level loads and token factors.
13943
  while (!Queue.empty()) {
13944
    SDNode *ChainNext = Queue.pop_back_val();
13945
    if (!Visited.insert(ChainNext).second)
13946
      continue;
13947

13948
    if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13949
      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13950
        return true;
13951

13952
      if (!Visited.count(ChainLD->getChain().getNode()))
13953
        Queue.push_back(ChainLD->getChain().getNode());
13954
    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13955
      for (const SDUse &O : ChainNext->ops())
13956
        if (!Visited.count(O.getNode()))
13957
          Queue.push_back(O.getNode());
13958
    } else
13959
      LoadRoots.insert(ChainNext);
13960
  }
13961

13962
  // Second, search down the chain, starting from the top-level nodes recorded
13963
  // in the first phase. These top-level nodes are the nodes just above all
13964
  // loads and token factors. Starting with their uses, recursively look though
13965
  // all loads (just the chain uses) and token factors to find a consecutive
13966
  // load.
13967
  Visited.clear();
13968
  Queue.clear();
13969

13970
  for (SDNode *I : LoadRoots) {
13971
    Queue.push_back(I);
13972

13973
    while (!Queue.empty()) {
13974
      SDNode *LoadRoot = Queue.pop_back_val();
13975
      if (!Visited.insert(LoadRoot).second)
13976
        continue;
13977

13978
      if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13979
        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13980
          return true;
13981

13982
      for (SDNode *U : LoadRoot->uses())
13983
        if (((isa<MemSDNode>(U) &&
13984
              cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13985
             U->getOpcode() == ISD::TokenFactor) &&
13986
            !Visited.count(U))
13987
          Queue.push_back(U);
13988
    }
13989
  }
13990

13991
  return false;
13992
}
13993

13994
/// This function is called when we have proved that a SETCC node can be replaced
13995
/// by subtraction (and other supporting instructions) so that the result of
13996
/// comparison is kept in a GPR instead of CR. This function is purely for
13997
/// codegen purposes and has some flags to guide the codegen process.
13998
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13999
                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14000
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14001

14002
  // Zero extend the operands to the largest legal integer. Originally, they
14003
  // must be of a strictly smaller size.
14004
  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14005
                         DAG.getConstant(Size, DL, MVT::i32));
14006
  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14007
                         DAG.getConstant(Size, DL, MVT::i32));
14008

14009
  // Swap if needed. Depends on the condition code.
14010
  if (Swap)
14011
    std::swap(Op0, Op1);
14012

14013
  // Subtract extended integers.
14014
  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14015

14016
  // Move the sign bit to the least significant position and zero out the rest.
14017
  // Now the least significant bit carries the result of original comparison.
14018
  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14019
                             DAG.getConstant(Size - 1, DL, MVT::i32));
14020
  auto Final = Shifted;
14021

14022
  // Complement the result if needed. Based on the condition code.
14023
  if (Complement)
14024
    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14025
                        DAG.getConstant(1, DL, MVT::i64));
14026

14027
  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14028
}
14029

14030
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14031
                                                  DAGCombinerInfo &DCI) const {
14032
  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14033

14034
  SelectionDAG &DAG = DCI.DAG;
14035
  SDLoc DL(N);
14036

14037
  // Size of integers being compared has a critical role in the following
14038
  // analysis, so we prefer to do this when all types are legal.
14039
  if (!DCI.isAfterLegalizeDAG())
14040
    return SDValue();
14041

14042
  // If all users of SETCC extend its value to a legal integer type
14043
  // then we replace SETCC with a subtraction
14044
  for (const SDNode *U : N->uses())
14045
    if (U->getOpcode() != ISD::ZERO_EXTEND)
14046
      return SDValue();
14047

14048
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14049
  auto OpSize = N->getOperand(0).getValueSizeInBits();
14050

14051
  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
14052

14053
  if (OpSize < Size) {
14054
    switch (CC) {
14055
    default: break;
14056
    case ISD::SETULT:
14057
      return generateEquivalentSub(N, Size, false, false, DL, DAG);
14058
    case ISD::SETULE:
14059
      return generateEquivalentSub(N, Size, true, true, DL, DAG);
14060
    case ISD::SETUGT:
14061
      return generateEquivalentSub(N, Size, false, true, DL, DAG);
14062
    case ISD::SETUGE:
14063
      return generateEquivalentSub(N, Size, true, false, DL, DAG);
14064
    }
14065
  }
14066

14067
  return SDValue();
14068
}
14069

14070
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14071
                                                  DAGCombinerInfo &DCI) const {
14072
  SelectionDAG &DAG = DCI.DAG;
14073
  SDLoc dl(N);
14074

14075
  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14076
  // If we're tracking CR bits, we need to be careful that we don't have:
14077
  //   trunc(binary-ops(zext(x), zext(y)))
14078
  // or
14079
  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14080
  // such that we're unnecessarily moving things into GPRs when it would be
14081
  // better to keep them in CR bits.
14082

14083
  // Note that trunc here can be an actual i1 trunc, or can be the effective
14084
  // truncation that comes from a setcc or select_cc.
14085
  if (N->getOpcode() == ISD::TRUNCATE &&
14086
      N->getValueType(0) != MVT::i1)
14087
    return SDValue();
14088

14089
  if (N->getOperand(0).getValueType() != MVT::i32 &&
14090
      N->getOperand(0).getValueType() != MVT::i64)
14091
    return SDValue();
14092

14093
  if (N->getOpcode() == ISD::SETCC ||
14094
      N->getOpcode() == ISD::SELECT_CC) {
14095
    // If we're looking at a comparison, then we need to make sure that the
14096
    // high bits (all except for the first) don't matter the result.
14097
    ISD::CondCode CC =
14098
      cast<CondCodeSDNode>(N->getOperand(
14099
        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14100
    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14101

14102
    if (ISD::isSignedIntSetCC(CC)) {
14103
      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14104
          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14105
        return SDValue();
14106
    } else if (ISD::isUnsignedIntSetCC(CC)) {
14107
      if (!DAG.MaskedValueIsZero(N->getOperand(0),
14108
                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14109
          !DAG.MaskedValueIsZero(N->getOperand(1),
14110
                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
14111
        return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14112
                                             : SDValue());
14113
    } else {
14114
      // This is neither a signed nor an unsigned comparison, just make sure
14115
      // that the high bits are equal.
14116
      KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14117
      KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14118

14119
      // We don't really care about what is known about the first bit (if
14120
      // anything), so pretend that it is known zero for both to ensure they can
14121
      // be compared as constants.
14122
      Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14123
      Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14124

14125
      if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14126
          Op1Known.getConstant() != Op2Known.getConstant())
14127
        return SDValue();
14128
    }
14129
  }
14130

14131
  // We now know that the higher-order bits are irrelevant, we just need to
14132
  // make sure that all of the intermediate operations are bit operations, and
14133
  // all inputs are extensions.
14134
  if (N->getOperand(0).getOpcode() != ISD::AND &&
14135
      N->getOperand(0).getOpcode() != ISD::OR  &&
14136
      N->getOperand(0).getOpcode() != ISD::XOR &&
14137
      N->getOperand(0).getOpcode() != ISD::SELECT &&
14138
      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14139
      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14140
      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14141
      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14142
      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14143
    return SDValue();
14144

14145
  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14146
      N->getOperand(1).getOpcode() != ISD::AND &&
14147
      N->getOperand(1).getOpcode() != ISD::OR  &&
14148
      N->getOperand(1).getOpcode() != ISD::XOR &&
14149
      N->getOperand(1).getOpcode() != ISD::SELECT &&
14150
      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14151
      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14152
      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14153
      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14154
      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14155
    return SDValue();
14156

14157
  SmallVector<SDValue, 4> Inputs;
14158
  SmallVector<SDValue, 8> BinOps, PromOps;
14159
  SmallPtrSet<SDNode *, 16> Visited;
14160

14161
  for (unsigned i = 0; i < 2; ++i) {
14162
    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14163
          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14164
          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14165
          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14166
        isa<ConstantSDNode>(N->getOperand(i)))
14167
      Inputs.push_back(N->getOperand(i));
14168
    else
14169
      BinOps.push_back(N->getOperand(i));
14170

14171
    if (N->getOpcode() == ISD::TRUNCATE)
14172
      break;
14173
  }
14174

14175
  // Visit all inputs, collect all binary operations (and, or, xor and
14176
  // select) that are all fed by extensions.
14177
  while (!BinOps.empty()) {
14178
    SDValue BinOp = BinOps.pop_back_val();
14179

14180
    if (!Visited.insert(BinOp.getNode()).second)
14181
      continue;
14182

14183
    PromOps.push_back(BinOp);
14184

14185
    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14186
      // The condition of the select is not promoted.
14187
      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14188
        continue;
14189
      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14190
        continue;
14191

14192
      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14193
            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14194
            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14195
           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14196
          isa<ConstantSDNode>(BinOp.getOperand(i))) {
14197
        Inputs.push_back(BinOp.getOperand(i));
14198
      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14199
                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14200
                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14201
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14202
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14203
                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14204
                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14205
                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14206
                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14207
        BinOps.push_back(BinOp.getOperand(i));
14208
      } else {
14209
        // We have an input that is not an extension or another binary
14210
        // operation; we'll abort this transformation.
14211
        return SDValue();
14212
      }
14213
    }
14214
  }
14215

14216
  // Make sure that this is a self-contained cluster of operations (which
14217
  // is not quite the same thing as saying that everything has only one
14218
  // use).
14219
  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14220
    if (isa<ConstantSDNode>(Inputs[i]))
14221
      continue;
14222

14223
    for (const SDNode *User : Inputs[i].getNode()->uses()) {
14224
      if (User != N && !Visited.count(User))
14225
        return SDValue();
14226

14227
      // Make sure that we're not going to promote the non-output-value
14228
      // operand(s) or SELECT or SELECT_CC.
14229
      // FIXME: Although we could sometimes handle this, and it does occur in
14230
      // practice that one of the condition inputs to the select is also one of
14231
      // the outputs, we currently can't deal with this.
14232
      if (User->getOpcode() == ISD::SELECT) {
14233
        if (User->getOperand(0) == Inputs[i])
14234
          return SDValue();
14235
      } else if (User->getOpcode() == ISD::SELECT_CC) {
14236
        if (User->getOperand(0) == Inputs[i] ||
14237
            User->getOperand(1) == Inputs[i])
14238
          return SDValue();
14239
      }
14240
    }
14241
  }
14242

14243
  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14244
    for (const SDNode *User : PromOps[i].getNode()->uses()) {
14245
      if (User != N && !Visited.count(User))
14246
        return SDValue();
14247

14248
      // Make sure that we're not going to promote the non-output-value
14249
      // operand(s) or SELECT or SELECT_CC.
14250
      // FIXME: Although we could sometimes handle this, and it does occur in
14251
      // practice that one of the condition inputs to the select is also one of
14252
      // the outputs, we currently can't deal with this.
14253
      if (User->getOpcode() == ISD::SELECT) {
14254
        if (User->getOperand(0) == PromOps[i])
14255
          return SDValue();
14256
      } else if (User->getOpcode() == ISD::SELECT_CC) {
14257
        if (User->getOperand(0) == PromOps[i] ||
14258
            User->getOperand(1) == PromOps[i])
14259
          return SDValue();
14260
      }
14261
    }
14262
  }
14263

14264
  // Replace all inputs with the extension operand.
14265
  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14266
    // Constants may have users outside the cluster of to-be-promoted nodes,
14267
    // and so we need to replace those as we do the promotions.
14268
    if (isa<ConstantSDNode>(Inputs[i]))
14269
      continue;
14270
    else
14271
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14272
  }
14273

14274
  std::list<HandleSDNode> PromOpHandles;
14275
  for (auto &PromOp : PromOps)
14276
    PromOpHandles.emplace_back(PromOp);
14277

14278
  // Replace all operations (these are all the same, but have a different
14279
  // (i1) return type). DAG.getNode will validate that the types of
14280
  // a binary operator match, so go through the list in reverse so that
14281
  // we've likely promoted both operands first. Any intermediate truncations or
14282
  // extensions disappear.
14283
  while (!PromOpHandles.empty()) {
14284
    SDValue PromOp = PromOpHandles.back().getValue();
14285
    PromOpHandles.pop_back();
14286

14287
    if (PromOp.getOpcode() == ISD::TRUNCATE ||
14288
        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14289
        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14290
        PromOp.getOpcode() == ISD::ANY_EXTEND) {
14291
      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14292
          PromOp.getOperand(0).getValueType() != MVT::i1) {
14293
        // The operand is not yet ready (see comment below).
14294
        PromOpHandles.emplace_front(PromOp);
14295
        continue;
14296
      }
14297

14298
      SDValue RepValue = PromOp.getOperand(0);
14299
      if (isa<ConstantSDNode>(RepValue))
14300
        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14301

14302
      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14303
      continue;
14304
    }
14305

14306
    unsigned C;
14307
    switch (PromOp.getOpcode()) {
14308
    default:             C = 0; break;
14309
    case ISD::SELECT:    C = 1; break;
14310
    case ISD::SELECT_CC: C = 2; break;
14311
    }
14312

14313
    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14314
         PromOp.getOperand(C).getValueType() != MVT::i1) ||
14315
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14316
         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14317
      // The to-be-promoted operands of this node have not yet been
14318
      // promoted (this should be rare because we're going through the
14319
      // list backward, but if one of the operands has several users in
14320
      // this cluster of to-be-promoted nodes, it is possible).
14321
      PromOpHandles.emplace_front(PromOp);
14322
      continue;
14323
    }
14324

14325
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14326
                                PromOp.getNode()->op_end());
14327

14328
    // If there are any constant inputs, make sure they're replaced now.
14329
    for (unsigned i = 0; i < 2; ++i)
14330
      if (isa<ConstantSDNode>(Ops[C+i]))
14331
        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14332

14333
    DAG.ReplaceAllUsesOfValueWith(PromOp,
14334
      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14335
  }
14336

14337
  // Now we're left with the initial truncation itself.
14338
  if (N->getOpcode() == ISD::TRUNCATE)
14339
    return N->getOperand(0);
14340

14341
  // Otherwise, this is a comparison. The operands to be compared have just
14342
  // changed type (to i1), but everything else is the same.
14343
  return SDValue(N, 0);
14344
}
14345

14346
SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14347
                                                  DAGCombinerInfo &DCI) const {
14348
  SelectionDAG &DAG = DCI.DAG;
14349
  SDLoc dl(N);
14350

14351
  // If we're tracking CR bits, we need to be careful that we don't have:
14352
  //   zext(binary-ops(trunc(x), trunc(y)))
14353
  // or
14354
  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14355
  // such that we're unnecessarily moving things into CR bits that can more
14356
  // efficiently stay in GPRs. Note that if we're not certain that the high
14357
  // bits are set as required by the final extension, we still may need to do
14358
  // some masking to get the proper behavior.
14359

14360
  // This same functionality is important on PPC64 when dealing with
14361
  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14362
  // the return values of functions. Because it is so similar, it is handled
14363
  // here as well.
14364

14365
  if (N->getValueType(0) != MVT::i32 &&
14366
      N->getValueType(0) != MVT::i64)
14367
    return SDValue();
14368

14369
  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14370
        (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14371
    return SDValue();
14372

14373
  if (N->getOperand(0).getOpcode() != ISD::AND &&
14374
      N->getOperand(0).getOpcode() != ISD::OR  &&
14375
      N->getOperand(0).getOpcode() != ISD::XOR &&
14376
      N->getOperand(0).getOpcode() != ISD::SELECT &&
14377
      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14378
    return SDValue();
14379

14380
  SmallVector<SDValue, 4> Inputs;
14381
  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14382
  SmallPtrSet<SDNode *, 16> Visited;
14383

14384
  // Visit all inputs, collect all binary operations (and, or, xor and
14385
  // select) that are all fed by truncations.
14386
  while (!BinOps.empty()) {
14387
    SDValue BinOp = BinOps.pop_back_val();
14388

14389
    if (!Visited.insert(BinOp.getNode()).second)
14390
      continue;
14391

14392
    PromOps.push_back(BinOp);
14393

14394
    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14395
      // The condition of the select is not promoted.
14396
      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14397
        continue;
14398
      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14399
        continue;
14400

14401
      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14402
          isa<ConstantSDNode>(BinOp.getOperand(i))) {
14403
        Inputs.push_back(BinOp.getOperand(i));
14404
      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14405
                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14406
                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14407
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14408
                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14409
        BinOps.push_back(BinOp.getOperand(i));
14410
      } else {
14411
        // We have an input that is not a truncation or another binary
14412
        // operation; we'll abort this transformation.
14413
        return SDValue();
14414
      }
14415
    }
14416
  }
14417

14418
  // The operands of a select that must be truncated when the select is
14419
  // promoted because the operand is actually part of the to-be-promoted set.
14420
  DenseMap<SDNode *, EVT> SelectTruncOp[2];
14421

14422
  // Make sure that this is a self-contained cluster of operations (which
14423
  // is not quite the same thing as saying that everything has only one
14424
  // use).
14425
  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14426
    if (isa<ConstantSDNode>(Inputs[i]))
14427
      continue;
14428

14429
    for (SDNode *User : Inputs[i].getNode()->uses()) {
14430
      if (User != N && !Visited.count(User))
14431
        return SDValue();
14432

14433
      // If we're going to promote the non-output-value operand(s) or SELECT or
14434
      // SELECT_CC, record them for truncation.
14435
      if (User->getOpcode() == ISD::SELECT) {
14436
        if (User->getOperand(0) == Inputs[i])
14437
          SelectTruncOp[0].insert(std::make_pair(User,
14438
                                    User->getOperand(0).getValueType()));
14439
      } else if (User->getOpcode() == ISD::SELECT_CC) {
14440
        if (User->getOperand(0) == Inputs[i])
14441
          SelectTruncOp[0].insert(std::make_pair(User,
14442
                                    User->getOperand(0).getValueType()));
14443
        if (User->getOperand(1) == Inputs[i])
14444
          SelectTruncOp[1].insert(std::make_pair(User,
14445
                                    User->getOperand(1).getValueType()));
14446
      }
14447
    }
14448
  }
14449

14450
  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14451
    for (SDNode *User : PromOps[i].getNode()->uses()) {
14452
      if (User != N && !Visited.count(User))
14453
        return SDValue();
14454

14455
      // If we're going to promote the non-output-value operand(s) or SELECT or
14456
      // SELECT_CC, record them for truncation.
14457
      if (User->getOpcode() == ISD::SELECT) {
14458
        if (User->getOperand(0) == PromOps[i])
14459
          SelectTruncOp[0].insert(std::make_pair(User,
14460
                                    User->getOperand(0).getValueType()));
14461
      } else if (User->getOpcode() == ISD::SELECT_CC) {
14462
        if (User->getOperand(0) == PromOps[i])
14463
          SelectTruncOp[0].insert(std::make_pair(User,
14464
                                    User->getOperand(0).getValueType()));
14465
        if (User->getOperand(1) == PromOps[i])
14466
          SelectTruncOp[1].insert(std::make_pair(User,
14467
                                    User->getOperand(1).getValueType()));
14468
      }
14469
    }
14470
  }
14471

14472
  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14473
  bool ReallyNeedsExt = false;
14474
  if (N->getOpcode() != ISD::ANY_EXTEND) {
14475
    // If all of the inputs are not already sign/zero extended, then
14476
    // we'll still need to do that at the end.
14477
    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14478
      if (isa<ConstantSDNode>(Inputs[i]))
14479
        continue;
14480

14481
      unsigned OpBits =
14482
        Inputs[i].getOperand(0).getValueSizeInBits();
14483
      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14484

14485
      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14486
           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14487
                                  APInt::getHighBitsSet(OpBits,
14488
                                                        OpBits-PromBits))) ||
14489
          (N->getOpcode() == ISD::SIGN_EXTEND &&
14490
           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14491
             (OpBits-(PromBits-1)))) {
14492
        ReallyNeedsExt = true;
14493
        break;
14494
      }
14495
    }
14496
  }
14497

14498
  // Replace all inputs, either with the truncation operand, or a
14499
  // truncation or extension to the final output type.
14500
  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14501
    // Constant inputs need to be replaced with the to-be-promoted nodes that
14502
    // use them because they might have users outside of the cluster of
14503
    // promoted nodes.
14504
    if (isa<ConstantSDNode>(Inputs[i]))
14505
      continue;
14506

14507
    SDValue InSrc = Inputs[i].getOperand(0);
14508
    if (Inputs[i].getValueType() == N->getValueType(0))
14509
      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14510
    else if (N->getOpcode() == ISD::SIGN_EXTEND)
14511
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14512
        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14513
    else if (N->getOpcode() == ISD::ZERO_EXTEND)
14514
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14515
        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14516
    else
14517
      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14518
        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14519
  }
14520

14521
  std::list<HandleSDNode> PromOpHandles;
14522
  for (auto &PromOp : PromOps)
14523
    PromOpHandles.emplace_back(PromOp);
14524

14525
  // Replace all operations (these are all the same, but have a different
14526
  // (promoted) return type). DAG.getNode will validate that the types of
14527
  // a binary operator match, so go through the list in reverse so that
14528
  // we've likely promoted both operands first.
14529
  while (!PromOpHandles.empty()) {
14530
    SDValue PromOp = PromOpHandles.back().getValue();
14531
    PromOpHandles.pop_back();
14532

14533
    unsigned C;
14534
    switch (PromOp.getOpcode()) {
14535
    default:             C = 0; break;
14536
    case ISD::SELECT:    C = 1; break;
14537
    case ISD::SELECT_CC: C = 2; break;
14538
    }
14539

14540
    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14541
         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14542
        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14543
         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14544
      // The to-be-promoted operands of this node have not yet been
14545
      // promoted (this should be rare because we're going through the
14546
      // list backward, but if one of the operands has several users in
14547
      // this cluster of to-be-promoted nodes, it is possible).
14548
      PromOpHandles.emplace_front(PromOp);
14549
      continue;
14550
    }
14551

14552
    // For SELECT and SELECT_CC nodes, we do a similar check for any
14553
    // to-be-promoted comparison inputs.
14554
    if (PromOp.getOpcode() == ISD::SELECT ||
14555
        PromOp.getOpcode() == ISD::SELECT_CC) {
14556
      if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14557
           PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14558
          (SelectTruncOp[1].count(PromOp.getNode()) &&
14559
           PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14560
        PromOpHandles.emplace_front(PromOp);
14561
        continue;
14562
      }
14563
    }
14564

14565
    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14566
                                PromOp.getNode()->op_end());
14567

14568
    // If this node has constant inputs, then they'll need to be promoted here.
14569
    for (unsigned i = 0; i < 2; ++i) {
14570
      if (!isa<ConstantSDNode>(Ops[C+i]))
14571
        continue;
14572
      if (Ops[C+i].getValueType() == N->getValueType(0))
14573
        continue;
14574

14575
      if (N->getOpcode() == ISD::SIGN_EXTEND)
14576
        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14577
      else if (N->getOpcode() == ISD::ZERO_EXTEND)
14578
        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14579
      else
14580
        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14581
    }
14582

14583
    // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14584
    // truncate them again to the original value type.
14585
    if (PromOp.getOpcode() == ISD::SELECT ||
14586
        PromOp.getOpcode() == ISD::SELECT_CC) {
14587
      auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14588
      if (SI0 != SelectTruncOp[0].end())
14589
        Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14590
      auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14591
      if (SI1 != SelectTruncOp[1].end())
14592
        Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14593
    }
14594

14595
    DAG.ReplaceAllUsesOfValueWith(PromOp,
14596
      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14597
  }
14598

14599
  // Now we're left with the initial extension itself.
14600
  if (!ReallyNeedsExt)
14601
    return N->getOperand(0);
14602

14603
  // To zero extend, just mask off everything except for the first bit (in the
14604
  // i1 case).
14605
  if (N->getOpcode() == ISD::ZERO_EXTEND)
14606
    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14607
                       DAG.getConstant(APInt::getLowBitsSet(
14608
                                         N->getValueSizeInBits(0), PromBits),
14609
                                       dl, N->getValueType(0)));
14610

14611
  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14612
         "Invalid extension type");
14613
  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14614
  SDValue ShiftCst =
14615
      DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14616
  return DAG.getNode(
14617
      ISD::SRA, dl, N->getValueType(0),
14618
      DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14619
      ShiftCst);
14620
}
14621

14622
SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14623
                                        DAGCombinerInfo &DCI) const {
14624
  assert(N->getOpcode() == ISD::SETCC &&
14625
         "Should be called with a SETCC node");
14626

14627
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14628
  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14629
    SDValue LHS = N->getOperand(0);
14630
    SDValue RHS = N->getOperand(1);
14631

14632
    // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14633
    if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14634
        LHS.hasOneUse())
14635
      std::swap(LHS, RHS);
14636

14637
    // x == 0-y --> x+y == 0
14638
    // x != 0-y --> x+y != 0
14639
    if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14640
        RHS.hasOneUse()) {
14641
      SDLoc DL(N);
14642
      SelectionDAG &DAG = DCI.DAG;
14643
      EVT VT = N->getValueType(0);
14644
      EVT OpVT = LHS.getValueType();
14645
      SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14646
      return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14647
    }
14648
  }
14649

14650
  return DAGCombineTruncBoolExt(N, DCI);
14651
}
14652

14653
// Is this an extending load from an f32 to an f64?
14654
static bool isFPExtLoad(SDValue Op) {
14655
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14656
    return LD->getExtensionType() == ISD::EXTLOAD &&
14657
      Op.getValueType() == MVT::f64;
14658
  return false;
14659
}
14660

14661
/// Reduces the number of fp-to-int conversion when building a vector.
14662
///
14663
/// If this vector is built out of floating to integer conversions,
14664
/// transform it to a vector built out of floating point values followed by a
14665
/// single floating to integer conversion of the vector.
14666
/// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
14667
/// becomes (fptosi (build_vector ($A, $B, ...)))
14668
SDValue PPCTargetLowering::
14669
combineElementTruncationToVectorTruncation(SDNode *N,
14670
                                           DAGCombinerInfo &DCI) const {
14671
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14672
         "Should be called with a BUILD_VECTOR node");
14673

14674
  SelectionDAG &DAG = DCI.DAG;
14675
  SDLoc dl(N);
14676

14677
  SDValue FirstInput = N->getOperand(0);
14678
  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14679
         "The input operand must be an fp-to-int conversion.");
14680

14681
  // This combine happens after legalization so the fp_to_[su]i nodes are
14682
  // already converted to PPCSISD nodes.
14683
  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14684
  if (FirstConversion == PPCISD::FCTIDZ ||
14685
      FirstConversion == PPCISD::FCTIDUZ ||
14686
      FirstConversion == PPCISD::FCTIWZ ||
14687
      FirstConversion == PPCISD::FCTIWUZ) {
14688
    bool IsSplat = true;
14689
    bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14690
      FirstConversion == PPCISD::FCTIWUZ;
14691
    EVT SrcVT = FirstInput.getOperand(0).getValueType();
14692
    SmallVector<SDValue, 4> Ops;
14693
    EVT TargetVT = N->getValueType(0);
14694
    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14695
      SDValue NextOp = N->getOperand(i);
14696
      if (NextOp.getOpcode() != PPCISD::MFVSR)
14697
        return SDValue();
14698
      unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14699
      if (NextConversion != FirstConversion)
14700
        return SDValue();
14701
      // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14702
      // This is not valid if the input was originally double precision. It is
14703
      // also not profitable to do unless this is an extending load in which
14704
      // case doing this combine will allow us to combine consecutive loads.
14705
      if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14706
        return SDValue();
14707
      if (N->getOperand(i) != FirstInput)
14708
        IsSplat = false;
14709
    }
14710

14711
    // If this is a splat, we leave it as-is since there will be only a single
14712
    // fp-to-int conversion followed by a splat of the integer. This is better
14713
    // for 32-bit and smaller ints and neutral for 64-bit ints.
14714
    if (IsSplat)
14715
      return SDValue();
14716

14717
    // Now that we know we have the right type of node, get its operands
14718
    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14719
      SDValue In = N->getOperand(i).getOperand(0);
14720
      if (Is32Bit) {
14721
        // For 32-bit values, we need to add an FP_ROUND node (if we made it
14722
        // here, we know that all inputs are extending loads so this is safe).
14723
        if (In.isUndef())
14724
          Ops.push_back(DAG.getUNDEF(SrcVT));
14725
        else {
14726
          SDValue Trunc =
14727
              DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14728
                          DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14729
          Ops.push_back(Trunc);
14730
        }
14731
      } else
14732
        Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14733
    }
14734

14735
    unsigned Opcode;
14736
    if (FirstConversion == PPCISD::FCTIDZ ||
14737
        FirstConversion == PPCISD::FCTIWZ)
14738
      Opcode = ISD::FP_TO_SINT;
14739
    else
14740
      Opcode = ISD::FP_TO_UINT;
14741

14742
    EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14743
    SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14744
    return DAG.getNode(Opcode, dl, TargetVT, BV);
14745
  }
14746
  return SDValue();
14747
}
14748

14749
/// Reduce the number of loads when building a vector.
14750
///
14751
/// Building a vector out of multiple loads can be converted to a load
14752
/// of the vector type if the loads are consecutive. If the loads are
14753
/// consecutive but in descending order, a shuffle is added at the end
14754
/// to reorder the vector.
14755
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
14756
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14757
         "Should be called with a BUILD_VECTOR node");
14758

14759
  SDLoc dl(N);
14760

14761
  // Return early for non byte-sized type, as they can't be consecutive.
14762
  if (!N->getValueType(0).getVectorElementType().isByteSized())
14763
    return SDValue();
14764

14765
  bool InputsAreConsecutiveLoads = true;
14766
  bool InputsAreReverseConsecutive = true;
14767
  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14768
  SDValue FirstInput = N->getOperand(0);
14769
  bool IsRoundOfExtLoad = false;
14770
  LoadSDNode *FirstLoad = nullptr;
14771

14772
  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14773
      FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14774
    FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14775
    IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14776
  }
14777
  // Not a build vector of (possibly fp_rounded) loads.
14778
  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14779
      N->getNumOperands() == 1)
14780
    return SDValue();
14781

14782
  if (!IsRoundOfExtLoad)
14783
    FirstLoad = cast<LoadSDNode>(FirstInput);
14784

14785
  SmallVector<LoadSDNode *, 4> InputLoads;
14786
  InputLoads.push_back(FirstLoad);
14787
  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14788
    // If any inputs are fp_round(extload), they all must be.
14789
    if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14790
      return SDValue();
14791

14792
    SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14793
      N->getOperand(i);
14794
    if (NextInput.getOpcode() != ISD::LOAD)
14795
      return SDValue();
14796

14797
    SDValue PreviousInput =
14798
      IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14799
    LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14800
    LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14801

14802
    // If any inputs are fp_round(extload), they all must be.
14803
    if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14804
      return SDValue();
14805

14806
    // We only care about regular loads. The PPC-specific load intrinsics
14807
    // will not lead to a merge opportunity.
14808
    if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14809
      InputsAreConsecutiveLoads = false;
14810
    if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14811
      InputsAreReverseConsecutive = false;
14812

14813
    // Exit early if the loads are neither consecutive nor reverse consecutive.
14814
    if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14815
      return SDValue();
14816
    InputLoads.push_back(LD2);
14817
  }
14818

14819
  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14820
         "The loads cannot be both consecutive and reverse consecutive.");
14821

14822
  SDValue WideLoad;
14823
  SDValue ReturnSDVal;
14824
  if (InputsAreConsecutiveLoads) {
14825
    assert(FirstLoad && "Input needs to be a LoadSDNode.");
14826
    WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14827
                           FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14828
                           FirstLoad->getAlign());
14829
    ReturnSDVal = WideLoad;
14830
  } else if (InputsAreReverseConsecutive) {
14831
    LoadSDNode *LastLoad = InputLoads.back();
14832
    assert(LastLoad && "Input needs to be a LoadSDNode.");
14833
    WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14834
                           LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14835
                           LastLoad->getAlign());
14836
    SmallVector<int, 16> Ops;
14837
    for (int i = N->getNumOperands() - 1; i >= 0; i--)
14838
      Ops.push_back(i);
14839

14840
    ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14841
                                       DAG.getUNDEF(N->getValueType(0)), Ops);
14842
  } else
14843
    return SDValue();
14844

14845
  for (auto *LD : InputLoads)
14846
    DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14847
  return ReturnSDVal;
14848
}
14849

14850
// This function adds the required vector_shuffle needed to get
14851
// the elements of the vector extract in the correct position
14852
// as specified by the CorrectElems encoding.
14853
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
14854
                                      SDValue Input, uint64_t Elems,
14855
                                      uint64_t CorrectElems) {
14856
  SDLoc dl(N);
14857

14858
  unsigned NumElems = Input.getValueType().getVectorNumElements();
14859
  SmallVector<int, 16> ShuffleMask(NumElems, -1);
14860

14861
  // Knowing the element indices being extracted from the original
14862
  // vector and the order in which they're being inserted, just put
14863
  // them at element indices required for the instruction.
14864
  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14865
    if (DAG.getDataLayout().isLittleEndian())
14866
      ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14867
    else
14868
      ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14869
    CorrectElems = CorrectElems >> 8;
14870
    Elems = Elems >> 8;
14871
  }
14872

14873
  SDValue Shuffle =
14874
      DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14875
                           DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14876

14877
  EVT VT = N->getValueType(0);
14878
  SDValue Conv = DAG.getBitcast(VT, Shuffle);
14879

14880
  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14881
                               Input.getValueType().getVectorElementType(),
14882
                               VT.getVectorNumElements());
14883
  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14884
                     DAG.getValueType(ExtVT));
14885
}
14886

14887
// Look for build vector patterns where input operands come from sign
14888
// extended vector_extract elements of specific indices. If the correct indices
14889
// aren't used, add a vector shuffle to fix up the indices and create
14890
// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14891
// during instruction selection.
14892
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
14893
  // This array encodes the indices that the vector sign extend instructions
14894
  // extract from when extending from one type to another for both BE and LE.
14895
  // The right nibble of each byte corresponds to the LE incides.
14896
  // and the left nibble of each byte corresponds to the BE incides.
14897
  // For example: 0x3074B8FC  byte->word
14898
  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14899
  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14900
  // For example: 0x000070F8  byte->double word
14901
  // For LE: the allowed indices are: 0x0,0x8
14902
  // For BE: the allowed indices are: 0x7,0xF
14903
  uint64_t TargetElems[] = {
14904
      0x3074B8FC, // b->w
14905
      0x000070F8, // b->d
14906
      0x10325476, // h->w
14907
      0x00003074, // h->d
14908
      0x00001032, // w->d
14909
  };
14910

14911
  uint64_t Elems = 0;
14912
  int Index;
14913
  SDValue Input;
14914

14915
  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14916
    if (!Op)
14917
      return false;
14918
    if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14919
        Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14920
      return false;
14921

14922
    // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14923
    // of the right width.
14924
    SDValue Extract = Op.getOperand(0);
14925
    if (Extract.getOpcode() == ISD::ANY_EXTEND)
14926
      Extract = Extract.getOperand(0);
14927
    if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14928
      return false;
14929

14930
    ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14931
    if (!ExtOp)
14932
      return false;
14933

14934
    Index = ExtOp->getZExtValue();
14935
    if (Input && Input != Extract.getOperand(0))
14936
      return false;
14937

14938
    if (!Input)
14939
      Input = Extract.getOperand(0);
14940

14941
    Elems = Elems << 8;
14942
    Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14943
    Elems |= Index;
14944

14945
    return true;
14946
  };
14947

14948
  // If the build vector operands aren't sign extended vector extracts,
14949
  // of the same input vector, then return.
14950
  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14951
    if (!isSExtOfVecExtract(N->getOperand(i))) {
14952
      return SDValue();
14953
    }
14954
  }
14955

14956
  // If the vector extract indices are not correct, add the appropriate
14957
  // vector_shuffle.
14958
  int TgtElemArrayIdx;
14959
  int InputSize = Input.getValueType().getScalarSizeInBits();
14960
  int OutputSize = N->getValueType(0).getScalarSizeInBits();
14961
  if (InputSize + OutputSize == 40)
14962
    TgtElemArrayIdx = 0;
14963
  else if (InputSize + OutputSize == 72)
14964
    TgtElemArrayIdx = 1;
14965
  else if (InputSize + OutputSize == 48)
14966
    TgtElemArrayIdx = 2;
14967
  else if (InputSize + OutputSize == 80)
14968
    TgtElemArrayIdx = 3;
14969
  else if (InputSize + OutputSize == 96)
14970
    TgtElemArrayIdx = 4;
14971
  else
14972
    return SDValue();
14973

14974
  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14975
  CorrectElems = DAG.getDataLayout().isLittleEndian()
14976
                     ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14977
                     : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14978
  if (Elems != CorrectElems) {
14979
    return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14980
  }
14981

14982
  // Regular lowering will catch cases where a shuffle is not needed.
14983
  return SDValue();
14984
}
14985

14986
// Look for the pattern of a load from a narrow width to i128, feeding
14987
// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14988
// (LXVRZX). This node represents a zero extending load that will be matched
14989
// to the Load VSX Vector Rightmost instructions.
14990
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14991
  SDLoc DL(N);
14992

14993
  // This combine is only eligible for a BUILD_VECTOR of v1i128.
14994
  if (N->getValueType(0) != MVT::v1i128)
14995
    return SDValue();
14996

14997
  SDValue Operand = N->getOperand(0);
14998
  // Proceed with the transformation if the operand to the BUILD_VECTOR
14999
  // is a load instruction.
15000
  if (Operand.getOpcode() != ISD::LOAD)
15001
    return SDValue();
15002

15003
  auto *LD = cast<LoadSDNode>(Operand);
15004
  EVT MemoryType = LD->getMemoryVT();
15005

15006
  // This transformation is only valid if the we are loading either a byte,
15007
  // halfword, word, or doubleword.
15008
  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15009
                     MemoryType == MVT::i32 || MemoryType == MVT::i64;
15010

15011
  // Ensure that the load from the narrow width is being zero extended to i128.
15012
  if (!ValidLDType ||
15013
      (LD->getExtensionType() != ISD::ZEXTLOAD &&
15014
       LD->getExtensionType() != ISD::EXTLOAD))
15015
    return SDValue();
15016

15017
  SDValue LoadOps[] = {
15018
      LD->getChain(), LD->getBasePtr(),
15019
      DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15020

15021
  return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
15022
                                 DAG.getVTList(MVT::v1i128, MVT::Other),
15023
                                 LoadOps, MemoryType, LD->getMemOperand());
15024
}
15025

15026
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15027
                                                 DAGCombinerInfo &DCI) const {
15028
  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15029
         "Should be called with a BUILD_VECTOR node");
15030

15031
  SelectionDAG &DAG = DCI.DAG;
15032
  SDLoc dl(N);
15033

15034
  if (!Subtarget.hasVSX())
15035
    return SDValue();
15036

15037
  // The target independent DAG combiner will leave a build_vector of
15038
  // float-to-int conversions intact. We can generate MUCH better code for
15039
  // a float-to-int conversion of a vector of floats.
15040
  SDValue FirstInput = N->getOperand(0);
15041
  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15042
    SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15043
    if (Reduced)
15044
      return Reduced;
15045
  }
15046

15047
  // If we're building a vector out of consecutive loads, just load that
15048
  // vector type.
15049
  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15050
  if (Reduced)
15051
    return Reduced;
15052

15053
  // If we're building a vector out of extended elements from another vector
15054
  // we have P9 vector integer extend instructions. The code assumes legal
15055
  // input types (i.e. it can't handle things like v4i16) so do not run before
15056
  // legalization.
15057
  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15058
    Reduced = combineBVOfVecSExt(N, DAG);
15059
    if (Reduced)
15060
      return Reduced;
15061
  }
15062

15063
  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15064
  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15065
  // is a load from <valid narrow width> to i128.
15066
  if (Subtarget.isISA3_1()) {
15067
    SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15068
    if (BVOfZLoad)
15069
      return BVOfZLoad;
15070
  }
15071

15072
  if (N->getValueType(0) != MVT::v2f64)
15073
    return SDValue();
15074

15075
  // Looking for:
15076
  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15077
  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15078
      FirstInput.getOpcode() != ISD::UINT_TO_FP)
15079
    return SDValue();
15080
  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15081
      N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15082
    return SDValue();
15083
  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15084
    return SDValue();
15085

15086
  SDValue Ext1 = FirstInput.getOperand(0);
15087
  SDValue Ext2 = N->getOperand(1).getOperand(0);
15088
  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15089
     Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15090
    return SDValue();
15091

15092
  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15093
  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15094
  if (!Ext1Op || !Ext2Op)
15095
    return SDValue();
15096
  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15097
      Ext1.getOperand(0) != Ext2.getOperand(0))
15098
    return SDValue();
15099

15100
  int FirstElem = Ext1Op->getZExtValue();
15101
  int SecondElem = Ext2Op->getZExtValue();
15102
  int SubvecIdx;
15103
  if (FirstElem == 0 && SecondElem == 1)
15104
    SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15105
  else if (FirstElem == 2 && SecondElem == 3)
15106
    SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15107
  else
15108
    return SDValue();
15109

15110
  SDValue SrcVec = Ext1.getOperand(0);
15111
  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15112
    PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15113
  return DAG.getNode(NodeType, dl, MVT::v2f64,
15114
                     SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15115
}
15116

15117
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15118
                                              DAGCombinerInfo &DCI) const {
15119
  assert((N->getOpcode() == ISD::SINT_TO_FP ||
15120
          N->getOpcode() == ISD::UINT_TO_FP) &&
15121
         "Need an int -> FP conversion node here");
15122

15123
  if (useSoftFloat() || !Subtarget.has64BitSupport())
15124
    return SDValue();
15125

15126
  SelectionDAG &DAG = DCI.DAG;
15127
  SDLoc dl(N);
15128
  SDValue Op(N, 0);
15129

15130
  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15131
  // from the hardware.
15132
  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15133
    return SDValue();
15134
  if (!Op.getOperand(0).getValueType().isSimple())
15135
    return SDValue();
15136
  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15137
      Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15138
    return SDValue();
15139

15140
  SDValue FirstOperand(Op.getOperand(0));
15141
  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15142
    (FirstOperand.getValueType() == MVT::i8 ||
15143
     FirstOperand.getValueType() == MVT::i16);
15144
  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15145
    bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15146
    bool DstDouble = Op.getValueType() == MVT::f64;
15147
    unsigned ConvOp = Signed ?
15148
      (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
15149
      (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15150
    SDValue WidthConst =
15151
      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15152
                            dl, false);
15153
    LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15154
    SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15155
    SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
15156
                                         DAG.getVTList(MVT::f64, MVT::Other),
15157
                                         Ops, MVT::i8, LDN->getMemOperand());
15158
    DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15159

15160
    // For signed conversion, we need to sign-extend the value in the VSR
15161
    if (Signed) {
15162
      SDValue ExtOps[] = { Ld, WidthConst };
15163
      SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15164
      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15165
    } else
15166
      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15167
  }
15168

15169

15170
  // For i32 intermediate values, unfortunately, the conversion functions
15171
  // leave the upper 32 bits of the value are undefined. Within the set of
15172
  // scalar instructions, we have no method for zero- or sign-extending the
15173
  // value. Thus, we cannot handle i32 intermediate values here.
15174
  if (Op.getOperand(0).getValueType() == MVT::i32)
15175
    return SDValue();
15176

15177
  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15178
         "UINT_TO_FP is supported only with FPCVT");
15179

15180
  // If we have FCFIDS, then use it when converting to single-precision.
15181
  // Otherwise, convert to double-precision and then round.
15182
  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15183
                       ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15184
                                                            : PPCISD::FCFIDS)
15185
                       : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15186
                                                            : PPCISD::FCFID);
15187
  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15188
                  ? MVT::f32
15189
                  : MVT::f64;
15190

15191
  // If we're converting from a float, to an int, and back to a float again,
15192
  // then we don't need the store/load pair at all.
15193
  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15194
       Subtarget.hasFPCVT()) ||
15195
      (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15196
    SDValue Src = Op.getOperand(0).getOperand(0);
15197
    if (Src.getValueType() == MVT::f32) {
15198
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15199
      DCI.AddToWorklist(Src.getNode());
15200
    } else if (Src.getValueType() != MVT::f64) {
15201
      // Make sure that we don't pick up a ppc_fp128 source value.
15202
      return SDValue();
15203
    }
15204

15205
    unsigned FCTOp =
15206
      Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15207
                                                        PPCISD::FCTIDUZ;
15208

15209
    SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15210
    SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15211

15212
    if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15213
      FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15214
                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15215
      DCI.AddToWorklist(FP.getNode());
15216
    }
15217

15218
    return FP;
15219
  }
15220

15221
  return SDValue();
15222
}
15223

15224
// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15225
// builtins) into loads with swaps.
15226
SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15227
                                              DAGCombinerInfo &DCI) const {
15228
  // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15229
  // load combines.
15230
  if (DCI.isBeforeLegalizeOps())
15231
    return SDValue();
15232

15233
  SelectionDAG &DAG = DCI.DAG;
15234
  SDLoc dl(N);
15235
  SDValue Chain;
15236
  SDValue Base;
15237
  MachineMemOperand *MMO;
15238

15239
  switch (N->getOpcode()) {
15240
  default:
15241
    llvm_unreachable("Unexpected opcode for little endian VSX load");
15242
  case ISD::LOAD: {
15243
    LoadSDNode *LD = cast<LoadSDNode>(N);
15244
    Chain = LD->getChain();
15245
    Base = LD->getBasePtr();
15246
    MMO = LD->getMemOperand();
15247
    // If the MMO suggests this isn't a load of a full vector, leave
15248
    // things alone.  For a built-in, we have to make the change for
15249
    // correctness, so if there is a size problem that will be a bug.
15250
    if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15251
      return SDValue();
15252
    break;
15253
  }
15254
  case ISD::INTRINSIC_W_CHAIN: {
15255
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15256
    Chain = Intrin->getChain();
15257
    // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15258
    // us what we want. Get operand 2 instead.
15259
    Base = Intrin->getOperand(2);
15260
    MMO = Intrin->getMemOperand();
15261
    break;
15262
  }
15263
  }
15264

15265
  MVT VecTy = N->getValueType(0).getSimpleVT();
15266

15267
  SDValue LoadOps[] = { Chain, Base };
15268
  SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
15269
                                         DAG.getVTList(MVT::v2f64, MVT::Other),
15270
                                         LoadOps, MVT::v2f64, MMO);
15271

15272
  DCI.AddToWorklist(Load.getNode());
15273
  Chain = Load.getValue(1);
15274
  SDValue Swap = DAG.getNode(
15275
      PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15276
  DCI.AddToWorklist(Swap.getNode());
15277

15278
  // Add a bitcast if the resulting load type doesn't match v2f64.
15279
  if (VecTy != MVT::v2f64) {
15280
    SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15281
    DCI.AddToWorklist(N.getNode());
15282
    // Package {bitcast value, swap's chain} to match Load's shape.
15283
    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15284
                       N, Swap.getValue(1));
15285
  }
15286

15287
  return Swap;
15288
}
15289

15290
// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15291
// builtins) into stores with swaps.
15292
SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
15293
                                               DAGCombinerInfo &DCI) const {
15294
  // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15295
  // store combines.
15296
  if (DCI.isBeforeLegalizeOps())
15297
    return SDValue();
15298

15299
  SelectionDAG &DAG = DCI.DAG;
15300
  SDLoc dl(N);
15301
  SDValue Chain;
15302
  SDValue Base;
15303
  unsigned SrcOpnd;
15304
  MachineMemOperand *MMO;
15305

15306
  switch (N->getOpcode()) {
15307
  default:
15308
    llvm_unreachable("Unexpected opcode for little endian VSX store");
15309
  case ISD::STORE: {
15310
    StoreSDNode *ST = cast<StoreSDNode>(N);
15311
    Chain = ST->getChain();
15312
    Base = ST->getBasePtr();
15313
    MMO = ST->getMemOperand();
15314
    SrcOpnd = 1;
15315
    // If the MMO suggests this isn't a store of a full vector, leave
15316
    // things alone.  For a built-in, we have to make the change for
15317
    // correctness, so if there is a size problem that will be a bug.
15318
    if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15319
      return SDValue();
15320
    break;
15321
  }
15322
  case ISD::INTRINSIC_VOID: {
15323
    MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15324
    Chain = Intrin->getChain();
15325
    // Intrin->getBasePtr() oddly does not get what we want.
15326
    Base = Intrin->getOperand(3);
15327
    MMO = Intrin->getMemOperand();
15328
    SrcOpnd = 2;
15329
    break;
15330
  }
15331
  }
15332

15333
  SDValue Src = N->getOperand(SrcOpnd);
15334
  MVT VecTy = Src.getValueType().getSimpleVT();
15335

15336
  // All stores are done as v2f64 and possible bit cast.
15337
  if (VecTy != MVT::v2f64) {
15338
    Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15339
    DCI.AddToWorklist(Src.getNode());
15340
  }
15341

15342
  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15343
                             DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15344
  DCI.AddToWorklist(Swap.getNode());
15345
  Chain = Swap.getValue(1);
15346
  SDValue StoreOps[] = { Chain, Swap, Base };
15347
  SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
15348
                                          DAG.getVTList(MVT::Other),
15349
                                          StoreOps, VecTy, MMO);
15350
  DCI.AddToWorklist(Store.getNode());
15351
  return Store;
15352
}
15353

15354
// Handle DAG combine for STORE (FP_TO_INT F).
15355
SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15356
                                               DAGCombinerInfo &DCI) const {
15357
  SelectionDAG &DAG = DCI.DAG;
15358
  SDLoc dl(N);
15359
  unsigned Opcode = N->getOperand(1).getOpcode();
15360
  (void)Opcode;
15361
  bool Strict = N->getOperand(1)->isStrictFPOpcode();
15362

15363
  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15364
          Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15365
         && "Not a FP_TO_INT Instruction!");
15366

15367
  SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15368
  EVT Op1VT = N->getOperand(1).getValueType();
15369
  EVT ResVT = Val.getValueType();
15370

15371
  if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15372
    return SDValue();
15373

15374
  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15375
  bool ValidTypeForStoreFltAsInt =
15376
        (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15377
         (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15378

15379
  // TODO: Lower conversion from f128 on all VSX targets
15380
  if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15381
    return SDValue();
15382

15383
  if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15384
      cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15385
    return SDValue();
15386

15387
  Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15388

15389
  // Set number of bytes being converted.
15390
  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15391
  SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15392
                   DAG.getIntPtrConstant(ByteSize, dl, false),
15393
                   DAG.getValueType(Op1VT)};
15394

15395
  Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
15396
          DAG.getVTList(MVT::Other), Ops,
15397
          cast<StoreSDNode>(N)->getMemoryVT(),
15398
          cast<StoreSDNode>(N)->getMemOperand());
15399

15400
  return Val;
15401
}
15402

15403
static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15404
  // Check that the source of the element keeps flipping
15405
  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15406
  bool PrevElemFromFirstVec = Mask[0] < NumElts;
15407
  for (int i = 1, e = Mask.size(); i < e; i++) {
15408
    if (PrevElemFromFirstVec && Mask[i] < NumElts)
15409
      return false;
15410
    if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15411
      return false;
15412
    PrevElemFromFirstVec = !PrevElemFromFirstVec;
15413
  }
15414
  return true;
15415
}
15416

15417
static bool isSplatBV(SDValue Op) {
15418
  if (Op.getOpcode() != ISD::BUILD_VECTOR)
15419
    return false;
15420
  SDValue FirstOp;
15421

15422
  // Find first non-undef input.
15423
  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15424
    FirstOp = Op.getOperand(i);
15425
    if (!FirstOp.isUndef())
15426
      break;
15427
  }
15428

15429
  // All inputs are undef or the same as the first non-undef input.
15430
  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15431
    if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15432
      return false;
15433
  return true;
15434
}
15435

15436
static SDValue isScalarToVec(SDValue Op) {
15437
  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15438
    return Op;
15439
  if (Op.getOpcode() != ISD::BITCAST)
15440
    return SDValue();
15441
  Op = Op.getOperand(0);
15442
  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15443
    return Op;
15444
  return SDValue();
15445
}
15446

15447
// Fix up the shuffle mask to account for the fact that the result of
15448
// scalar_to_vector is not in lane zero. This just takes all values in
15449
// the ranges specified by the min/max indices and adds the number of
15450
// elements required to ensure each element comes from the respective
15451
// position in the valid lane.
15452
// On little endian, that's just the corresponding element in the other
15453
// half of the vector. On big endian, it is in the same half but right
15454
// justified rather than left justified in that half.
15455
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15456
                                            int LHSMaxIdx, int RHSMinIdx,
15457
                                            int RHSMaxIdx, int HalfVec,
15458
                                            unsigned ValidLaneWidth,
15459
                                            const PPCSubtarget &Subtarget) {
15460
  for (int i = 0, e = ShuffV.size(); i < e; i++) {
15461
    int Idx = ShuffV[i];
15462
    if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15463
      ShuffV[i] +=
15464
          Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15465
  }
15466
}
15467

15468
// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15469
// the original is:
15470
// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15471
// In such a case, just change the shuffle mask to extract the element
15472
// from the permuted index.
15473
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15474
                               const PPCSubtarget &Subtarget) {
15475
  SDLoc dl(OrigSToV);
15476
  EVT VT = OrigSToV.getValueType();
15477
  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15478
         "Expecting a SCALAR_TO_VECTOR here");
15479
  SDValue Input = OrigSToV.getOperand(0);
15480

15481
  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15482
    ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15483
    SDValue OrigVector = Input.getOperand(0);
15484

15485
    // Can't handle non-const element indices or different vector types
15486
    // for the input to the extract and the output of the scalar_to_vector.
15487
    if (Idx && VT == OrigVector.getValueType()) {
15488
      unsigned NumElts = VT.getVectorNumElements();
15489
      assert(
15490
          NumElts > 1 &&
15491
          "Cannot produce a permuted scalar_to_vector for one element vector");
15492
      SmallVector<int, 16> NewMask(NumElts, -1);
15493
      unsigned ResultInElt = NumElts / 2;
15494
      ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15495
      NewMask[ResultInElt] = Idx->getZExtValue();
15496
      return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15497
    }
15498
  }
15499
  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15500
                     OrigSToV.getOperand(0));
15501
}
15502

15503
// On little endian subtargets, combine shuffles such as:
15504
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15505
// into:
15506
// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15507
// because the latter can be matched to a single instruction merge.
15508
// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15509
// to put the value into element zero. Adjust the shuffle mask so that the
15510
// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15511
// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15512
// nodes with elements smaller than doubleword because all the ways
15513
// of getting scalar data into a vector register put the value in the
15514
// rightmost element of the left half of the vector.
15515
SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15516
                                                SelectionDAG &DAG) const {
15517
  SDValue LHS = SVN->getOperand(0);
15518
  SDValue RHS = SVN->getOperand(1);
15519
  auto Mask = SVN->getMask();
15520
  int NumElts = LHS.getValueType().getVectorNumElements();
15521
  SDValue Res(SVN, 0);
15522
  SDLoc dl(SVN);
15523
  bool IsLittleEndian = Subtarget.isLittleEndian();
15524

15525
  // On big endian targets this is only useful for subtargets with direct moves.
15526
  // On little endian targets it would be useful for all subtargets with VSX.
15527
  // However adding special handling for LE subtargets without direct moves
15528
  // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15529
  // which includes direct moves.
15530
  if (!Subtarget.hasDirectMove())
15531
    return Res;
15532

15533
  // If this is not a shuffle of a shuffle and the first element comes from
15534
  // the second vector, canonicalize to the commuted form. This will make it
15535
  // more likely to match one of the single instruction patterns.
15536
  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15537
      RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15538
    std::swap(LHS, RHS);
15539
    Res = DAG.getCommutedVectorShuffle(*SVN);
15540
    Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15541
  }
15542

15543
  // Adjust the shuffle mask if either input vector comes from a
15544
  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15545
  // form (to prevent the need for a swap).
15546
  SmallVector<int, 16> ShuffV(Mask);
15547
  SDValue SToVLHS = isScalarToVec(LHS);
15548
  SDValue SToVRHS = isScalarToVec(RHS);
15549
  if (SToVLHS || SToVRHS) {
15550
    // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15551
    // same type and have differing element sizes, then do not perform
15552
    // the following transformation. The current transformation for
15553
    // SCALAR_TO_VECTOR assumes that both input vectors have the same
15554
    // element size. This will be updated in the future to account for
15555
    // differing sizes of the LHS and RHS.
15556
    if (SToVLHS && SToVRHS &&
15557
        (SToVLHS.getValueType().getScalarSizeInBits() !=
15558
         SToVRHS.getValueType().getScalarSizeInBits()))
15559
      return Res;
15560

15561
    int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15562
                            : SToVRHS.getValueType().getVectorNumElements();
15563
    int NumEltsOut = ShuffV.size();
15564
    // The width of the "valid lane" (i.e. the lane that contains the value that
15565
    // is vectorized) needs to be expressed in terms of the number of elements
15566
    // of the shuffle. It is thereby the ratio of the values before and after
15567
    // any bitcast.
15568
    unsigned ValidLaneWidth =
15569
        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15570
                      LHS.getValueType().getScalarSizeInBits()
15571
                : SToVRHS.getValueType().getScalarSizeInBits() /
15572
                      RHS.getValueType().getScalarSizeInBits();
15573

15574
    // Initially assume that neither input is permuted. These will be adjusted
15575
    // accordingly if either input is.
15576
    int LHSMaxIdx = -1;
15577
    int RHSMinIdx = -1;
15578
    int RHSMaxIdx = -1;
15579
    int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15580

15581
    // Get the permuted scalar to vector nodes for the source(s) that come from
15582
    // ISD::SCALAR_TO_VECTOR.
15583
    // On big endian systems, this only makes sense for element sizes smaller
15584
    // than 64 bits since for 64-bit elements, all instructions already put
15585
    // the value into element zero. Since scalar size of LHS and RHS may differ
15586
    // after isScalarToVec, this should be checked using their own sizes.
15587
    if (SToVLHS) {
15588
      if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15589
        return Res;
15590
      // Set up the values for the shuffle vector fixup.
15591
      LHSMaxIdx = NumEltsOut / NumEltsIn;
15592
      SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15593
      if (SToVLHS.getValueType() != LHS.getValueType())
15594
        SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15595
      LHS = SToVLHS;
15596
    }
15597
    if (SToVRHS) {
15598
      if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15599
        return Res;
15600
      RHSMinIdx = NumEltsOut;
15601
      RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15602
      SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15603
      if (SToVRHS.getValueType() != RHS.getValueType())
15604
        SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15605
      RHS = SToVRHS;
15606
    }
15607

15608
    // Fix up the shuffle mask to reflect where the desired element actually is.
15609
    // The minimum and maximum indices that correspond to element zero for both
15610
    // the LHS and RHS are computed and will control which shuffle mask entries
15611
    // are to be changed. For example, if the RHS is permuted, any shuffle mask
15612
    // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15613
    fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15614
                                    HalfVec, ValidLaneWidth, Subtarget);
15615
    Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15616

15617
    // We may have simplified away the shuffle. We won't be able to do anything
15618
    // further with it here.
15619
    if (!isa<ShuffleVectorSDNode>(Res))
15620
      return Res;
15621
    Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15622
  }
15623

15624
  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15625
  // The common case after we commuted the shuffle is that the RHS is a splat
15626
  // and we have elements coming in from the splat at indices that are not
15627
  // conducive to using a merge.
15628
  // Example:
15629
  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15630
  if (!isSplatBV(TheSplat))
15631
    return Res;
15632

15633
  // We are looking for a mask such that all even elements are from
15634
  // one vector and all odd elements from the other.
15635
  if (!isAlternatingShuffMask(Mask, NumElts))
15636
    return Res;
15637

15638
  // Adjust the mask so we are pulling in the same index from the splat
15639
  // as the index from the interesting vector in consecutive elements.
15640
  if (IsLittleEndian) {
15641
    // Example (even elements from first vector):
15642
    // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15643
    if (Mask[0] < NumElts)
15644
      for (int i = 1, e = Mask.size(); i < e; i += 2) {
15645
        if (ShuffV[i] < 0)
15646
          continue;
15647
        // If element from non-splat is undef, pick first element from splat.
15648
        ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15649
      }
15650
    // Example (odd elements from first vector):
15651
    // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15652
    else
15653
      for (int i = 0, e = Mask.size(); i < e; i += 2) {
15654
        if (ShuffV[i] < 0)
15655
          continue;
15656
        // If element from non-splat is undef, pick first element from splat.
15657
        ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15658
      }
15659
  } else {
15660
    // Example (even elements from first vector):
15661
    // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15662
    if (Mask[0] < NumElts)
15663
      for (int i = 0, e = Mask.size(); i < e; i += 2) {
15664
        if (ShuffV[i] < 0)
15665
          continue;
15666
        // If element from non-splat is undef, pick first element from splat.
15667
        ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15668
      }
15669
    // Example (odd elements from first vector):
15670
    // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15671
    else
15672
      for (int i = 1, e = Mask.size(); i < e; i += 2) {
15673
        if (ShuffV[i] < 0)
15674
          continue;
15675
        // If element from non-splat is undef, pick first element from splat.
15676
        ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15677
      }
15678
  }
15679

15680
  // If the RHS has undefs, we need to remove them since we may have created
15681
  // a shuffle that adds those instead of the splat value.
15682
  SDValue SplatVal =
15683
      cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15684
  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15685

15686
  if (IsLittleEndian)
15687
    RHS = TheSplat;
15688
  else
15689
    LHS = TheSplat;
15690
  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15691
}
15692

15693
SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15694
                                                LSBaseSDNode *LSBase,
15695
                                                DAGCombinerInfo &DCI) const {
15696
  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15697
        "Not a reverse memop pattern!");
15698

15699
  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15700
    auto Mask = SVN->getMask();
15701
    int i = 0;
15702
    auto I = Mask.rbegin();
15703
    auto E = Mask.rend();
15704

15705
    for (; I != E; ++I) {
15706
      if (*I != i)
15707
        return false;
15708
      i++;
15709
    }
15710
    return true;
15711
  };
15712

15713
  SelectionDAG &DAG = DCI.DAG;
15714
  EVT VT = SVN->getValueType(0);
15715

15716
  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15717
    return SDValue();
15718

15719
  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15720
  // See comment in PPCVSXSwapRemoval.cpp.
15721
  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15722
  if (!Subtarget.hasP9Vector())
15723
    return SDValue();
15724

15725
  if(!IsElementReverse(SVN))
15726
    return SDValue();
15727

15728
  if (LSBase->getOpcode() == ISD::LOAD) {
15729
    // If the load return value 0 has more than one user except the
15730
    // shufflevector instruction, it is not profitable to replace the
15731
    // shufflevector with a reverse load.
15732
    for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15733
         UI != UE; ++UI)
15734
      if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15735
        return SDValue();
15736

15737
    SDLoc dl(LSBase);
15738
    SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15739
    return DAG.getMemIntrinsicNode(
15740
        PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15741
        LSBase->getMemoryVT(), LSBase->getMemOperand());
15742
  }
15743

15744
  if (LSBase->getOpcode() == ISD::STORE) {
15745
    // If there are other uses of the shuffle, the swap cannot be avoided.
15746
    // Forcing the use of an X-Form (since swapped stores only have
15747
    // X-Forms) without removing the swap is unprofitable.
15748
    if (!SVN->hasOneUse())
15749
      return SDValue();
15750

15751
    SDLoc dl(LSBase);
15752
    SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15753
                          LSBase->getBasePtr()};
15754
    return DAG.getMemIntrinsicNode(
15755
        PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15756
        LSBase->getMemoryVT(), LSBase->getMemOperand());
15757
  }
15758

15759
  llvm_unreachable("Expected a load or store node here");
15760
}
15761

15762
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15763
  unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15764
  if (IntrinsicID == Intrinsic::ppc_stdcx)
15765
    StoreWidth = 8;
15766
  else if (IntrinsicID == Intrinsic::ppc_stwcx)
15767
    StoreWidth = 4;
15768
  else if (IntrinsicID == Intrinsic::ppc_sthcx)
15769
    StoreWidth = 2;
15770
  else if (IntrinsicID == Intrinsic::ppc_stbcx)
15771
    StoreWidth = 1;
15772
  else
15773
    return false;
15774
  return true;
15775
}
15776

15777
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
15778
                                             DAGCombinerInfo &DCI) const {
15779
  SelectionDAG &DAG = DCI.DAG;
15780
  SDLoc dl(N);
15781
  switch (N->getOpcode()) {
15782
  default: break;
15783
  case ISD::ADD:
15784
    return combineADD(N, DCI);
15785
  case ISD::AND: {
15786
    // We don't want (and (zext (shift...)), C) if C fits in the width of the
15787
    // original input as that will prevent us from selecting optimal rotates.
15788
    // This only matters if the input to the extend is i32 widened to i64.
15789
    SDValue Op1 = N->getOperand(0);
15790
    SDValue Op2 = N->getOperand(1);
15791
    if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15792
         Op1.getOpcode() != ISD::ANY_EXTEND) ||
15793
        !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15794
        Op1.getOperand(0).getValueType() != MVT::i32)
15795
      break;
15796
    SDValue NarrowOp = Op1.getOperand(0);
15797
    if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15798
        NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15799
      break;
15800

15801
    uint64_t Imm = Op2->getAsZExtVal();
15802
    // Make sure that the constant is narrow enough to fit in the narrow type.
15803
    if (!isUInt<32>(Imm))
15804
      break;
15805
    SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15806
    SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15807
    return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15808
  }
15809
  case ISD::SHL:
15810
    return combineSHL(N, DCI);
15811
  case ISD::SRA:
15812
    return combineSRA(N, DCI);
15813
  case ISD::SRL:
15814
    return combineSRL(N, DCI);
15815
  case ISD::MUL:
15816
    return combineMUL(N, DCI);
15817
  case ISD::FMA:
15818
  case PPCISD::FNMSUB:
15819
    return combineFMALike(N, DCI);
15820
  case PPCISD::SHL:
15821
    if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15822
        return N->getOperand(0);
15823
    break;
15824
  case PPCISD::SRL:
15825
    if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15826
        return N->getOperand(0);
15827
    break;
15828
  case PPCISD::SRA:
15829
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15830
      if (C->isZero() ||  //  0 >>s V -> 0.
15831
          C->isAllOnes()) // -1 >>s V -> -1.
15832
        return N->getOperand(0);
15833
    }
15834
    break;
15835
  case ISD::SIGN_EXTEND:
15836
  case ISD::ZERO_EXTEND:
15837
  case ISD::ANY_EXTEND:
15838
    return DAGCombineExtBoolTrunc(N, DCI);
15839
  case ISD::TRUNCATE:
15840
    return combineTRUNCATE(N, DCI);
15841
  case ISD::SETCC:
15842
    if (SDValue CSCC = combineSetCC(N, DCI))
15843
      return CSCC;
15844
    [[fallthrough]];
15845
  case ISD::SELECT_CC:
15846
    return DAGCombineTruncBoolExt(N, DCI);
15847
  case ISD::SINT_TO_FP:
15848
  case ISD::UINT_TO_FP:
15849
    return combineFPToIntToFP(N, DCI);
15850
  case ISD::VECTOR_SHUFFLE:
15851
    if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15852
      LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15853
      return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15854
    }
15855
    return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15856
  case ISD::STORE: {
15857

15858
    EVT Op1VT = N->getOperand(1).getValueType();
15859
    unsigned Opcode = N->getOperand(1).getOpcode();
15860

15861
    if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15862
        Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15863
      SDValue Val = combineStoreFPToInt(N, DCI);
15864
      if (Val)
15865
        return Val;
15866
    }
15867

15868
    if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15869
      ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15870
      SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15871
      if (Val)
15872
        return Val;
15873
    }
15874

15875
    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15876
    if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15877
        N->getOperand(1).getNode()->hasOneUse() &&
15878
        (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15879
         (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15880

15881
      // STBRX can only handle simple types and it makes no sense to store less
15882
      // two bytes in byte-reversed order.
15883
      EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15884
      if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15885
        break;
15886

15887
      SDValue BSwapOp = N->getOperand(1).getOperand(0);
15888
      // Do an any-extend to 32-bits if this is a half-word input.
15889
      if (BSwapOp.getValueType() == MVT::i16)
15890
        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15891

15892
      // If the type of BSWAP operand is wider than stored memory width
15893
      // it need to be shifted to the right side before STBRX.
15894
      if (Op1VT.bitsGT(mVT)) {
15895
        int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15896
        BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15897
                              DAG.getConstant(Shift, dl, MVT::i32));
15898
        // Need to truncate if this is a bswap of i64 stored as i32/i16.
15899
        if (Op1VT == MVT::i64)
15900
          BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15901
      }
15902

15903
      SDValue Ops[] = {
15904
        N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15905
      };
15906
      return
15907
        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15908
                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15909
                                cast<StoreSDNode>(N)->getMemOperand());
15910
    }
15911

15912
    // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
15913
    // So it can increase the chance of CSE constant construction.
15914
    if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15915
        isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15916
      // Need to sign-extended to 64-bits to handle negative values.
15917
      EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15918
      uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15919
                                    MemVT.getSizeInBits());
15920
      SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15921

15922
      // DAG.getTruncStore() can't be used here because it doesn't accept
15923
      // the general (base + offset) addressing mode.
15924
      // So we use UpdateNodeOperands and setTruncatingStore instead.
15925
      DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15926
                             N->getOperand(3));
15927
      cast<StoreSDNode>(N)->setTruncatingStore(true);
15928
      return SDValue(N, 0);
15929
    }
15930

15931
    // For little endian, VSX stores require generating xxswapd/lxvd2x.
15932
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15933
    if (Op1VT.isSimple()) {
15934
      MVT StoreVT = Op1VT.getSimpleVT();
15935
      if (Subtarget.needsSwapsForVSXMemOps() &&
15936
          (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15937
           StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15938
        return expandVSXStoreForLE(N, DCI);
15939
    }
15940
    break;
15941
  }
15942
  case ISD::LOAD: {
15943
    LoadSDNode *LD = cast<LoadSDNode>(N);
15944
    EVT VT = LD->getValueType(0);
15945

15946
    // For little endian, VSX loads require generating lxvd2x/xxswapd.
15947
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15948
    if (VT.isSimple()) {
15949
      MVT LoadVT = VT.getSimpleVT();
15950
      if (Subtarget.needsSwapsForVSXMemOps() &&
15951
          (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15952
           LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15953
        return expandVSXLoadForLE(N, DCI);
15954
    }
15955

15956
    // We sometimes end up with a 64-bit integer load, from which we extract
15957
    // two single-precision floating-point numbers. This happens with
15958
    // std::complex<float>, and other similar structures, because of the way we
15959
    // canonicalize structure copies. However, if we lack direct moves,
15960
    // then the final bitcasts from the extracted integer values to the
15961
    // floating-point numbers turn into store/load pairs. Even with direct moves,
15962
    // just loading the two floating-point numbers is likely better.
15963
    auto ReplaceTwoFloatLoad = [&]() {
15964
      if (VT != MVT::i64)
15965
        return false;
15966

15967
      if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15968
          LD->isVolatile())
15969
        return false;
15970

15971
      //  We're looking for a sequence like this:
15972
      //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15973
      //      t16: i64 = srl t13, Constant:i32<32>
15974
      //    t17: i32 = truncate t16
15975
      //  t18: f32 = bitcast t17
15976
      //    t19: i32 = truncate t13
15977
      //  t20: f32 = bitcast t19
15978

15979
      if (!LD->hasNUsesOfValue(2, 0))
15980
        return false;
15981

15982
      auto UI = LD->use_begin();
15983
      while (UI.getUse().getResNo() != 0) ++UI;
15984
      SDNode *Trunc = *UI++;
15985
      while (UI.getUse().getResNo() != 0) ++UI;
15986
      SDNode *RightShift = *UI;
15987
      if (Trunc->getOpcode() != ISD::TRUNCATE)
15988
        std::swap(Trunc, RightShift);
15989

15990
      if (Trunc->getOpcode() != ISD::TRUNCATE ||
15991
          Trunc->getValueType(0) != MVT::i32 ||
15992
          !Trunc->hasOneUse())
15993
        return false;
15994
      if (RightShift->getOpcode() != ISD::SRL ||
15995
          !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15996
          RightShift->getConstantOperandVal(1) != 32 ||
15997
          !RightShift->hasOneUse())
15998
        return false;
15999

16000
      SDNode *Trunc2 = *RightShift->use_begin();
16001
      if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16002
          Trunc2->getValueType(0) != MVT::i32 ||
16003
          !Trunc2->hasOneUse())
16004
        return false;
16005

16006
      SDNode *Bitcast = *Trunc->use_begin();
16007
      SDNode *Bitcast2 = *Trunc2->use_begin();
16008

16009
      if (Bitcast->getOpcode() != ISD::BITCAST ||
16010
          Bitcast->getValueType(0) != MVT::f32)
16011
        return false;
16012
      if (Bitcast2->getOpcode() != ISD::BITCAST ||
16013
          Bitcast2->getValueType(0) != MVT::f32)
16014
        return false;
16015

16016
      if (Subtarget.isLittleEndian())
16017
        std::swap(Bitcast, Bitcast2);
16018

16019
      // Bitcast has the second float (in memory-layout order) and Bitcast2
16020
      // has the first one.
16021

16022
      SDValue BasePtr = LD->getBasePtr();
16023
      if (LD->isIndexed()) {
16024
        assert(LD->getAddressingMode() == ISD::PRE_INC &&
16025
               "Non-pre-inc AM on PPC?");
16026
        BasePtr =
16027
          DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16028
                      LD->getOffset());
16029
      }
16030

16031
      auto MMOFlags =
16032
          LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16033
      SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16034
                                      LD->getPointerInfo(), LD->getAlign(),
16035
                                      MMOFlags, LD->getAAInfo());
16036
      SDValue AddPtr =
16037
        DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16038
                    BasePtr, DAG.getIntPtrConstant(4, dl));
16039
      SDValue FloatLoad2 = DAG.getLoad(
16040
          MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16041
          LD->getPointerInfo().getWithOffset(4),
16042
          commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16043

16044
      if (LD->isIndexed()) {
16045
        // Note that DAGCombine should re-form any pre-increment load(s) from
16046
        // what is produced here if that makes sense.
16047
        DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16048
      }
16049

16050
      DCI.CombineTo(Bitcast2, FloatLoad);
16051
      DCI.CombineTo(Bitcast, FloatLoad2);
16052

16053
      DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16054
                                    SDValue(FloatLoad2.getNode(), 1));
16055
      return true;
16056
    };
16057

16058
    if (ReplaceTwoFloatLoad())
16059
      return SDValue(N, 0);
16060

16061
    EVT MemVT = LD->getMemoryVT();
16062
    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16063
    Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16064
    if (LD->isUnindexed() && VT.isVector() &&
16065
        ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16066
          // P8 and later hardware should just use LOAD.
16067
          !Subtarget.hasP8Vector() &&
16068
          (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16069
           VT == MVT::v4f32))) &&
16070
        LD->getAlign() < ABIAlignment) {
16071
      // This is a type-legal unaligned Altivec load.
16072
      SDValue Chain = LD->getChain();
16073
      SDValue Ptr = LD->getBasePtr();
16074
      bool isLittleEndian = Subtarget.isLittleEndian();
16075

16076
      // This implements the loading of unaligned vectors as described in
16077
      // the venerable Apple Velocity Engine overview. Specifically:
16078
      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16079
      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16080
      //
16081
      // The general idea is to expand a sequence of one or more unaligned
16082
      // loads into an alignment-based permutation-control instruction (lvsl
16083
      // or lvsr), a series of regular vector loads (which always truncate
16084
      // their input address to an aligned address), and a series of
16085
      // permutations.  The results of these permutations are the requested
16086
      // loaded values.  The trick is that the last "extra" load is not taken
16087
      // from the address you might suspect (sizeof(vector) bytes after the
16088
      // last requested load), but rather sizeof(vector) - 1 bytes after the
16089
      // last requested vector. The point of this is to avoid a page fault if
16090
      // the base address happened to be aligned. This works because if the
16091
      // base address is aligned, then adding less than a full vector length
16092
      // will cause the last vector in the sequence to be (re)loaded.
16093
      // Otherwise, the next vector will be fetched as you might suspect was
16094
      // necessary.
16095

16096
      // We might be able to reuse the permutation generation from
16097
      // a different base address offset from this one by an aligned amount.
16098
      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16099
      // optimization later.
16100
      Intrinsic::ID Intr, IntrLD, IntrPerm;
16101
      MVT PermCntlTy, PermTy, LDTy;
16102
      Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16103
                            : Intrinsic::ppc_altivec_lvsl;
16104
      IntrLD = Intrinsic::ppc_altivec_lvx;
16105
      IntrPerm = Intrinsic::ppc_altivec_vperm;
16106
      PermCntlTy = MVT::v16i8;
16107
      PermTy = MVT::v4i32;
16108
      LDTy = MVT::v4i32;
16109

16110
      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16111

16112
      // Create the new MMO for the new base load. It is like the original MMO,
16113
      // but represents an area in memory almost twice the vector size centered
16114
      // on the original address. If the address is unaligned, we might start
16115
      // reading up to (sizeof(vector)-1) bytes below the address of the
16116
      // original unaligned load.
16117
      MachineFunction &MF = DAG.getMachineFunction();
16118
      MachineMemOperand *BaseMMO =
16119
        MF.getMachineMemOperand(LD->getMemOperand(),
16120
                                -(int64_t)MemVT.getStoreSize()+1,
16121
                                2*MemVT.getStoreSize()-1);
16122

16123
      // Create the new base load.
16124
      SDValue LDXIntID =
16125
          DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16126
      SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16127
      SDValue BaseLoad =
16128
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16129
                                DAG.getVTList(PermTy, MVT::Other),
16130
                                BaseLoadOps, LDTy, BaseMMO);
16131

16132
      // Note that the value of IncOffset (which is provided to the next
16133
      // load's pointer info offset value, and thus used to calculate the
16134
      // alignment), and the value of IncValue (which is actually used to
16135
      // increment the pointer value) are different! This is because we
16136
      // require the next load to appear to be aligned, even though it
16137
      // is actually offset from the base pointer by a lesser amount.
16138
      int IncOffset = VT.getSizeInBits() / 8;
16139
      int IncValue = IncOffset;
16140

16141
      // Walk (both up and down) the chain looking for another load at the real
16142
      // (aligned) offset (the alignment of the other load does not matter in
16143
      // this case). If found, then do not use the offset reduction trick, as
16144
      // that will prevent the loads from being later combined (as they would
16145
      // otherwise be duplicates).
16146
      if (!findConsecutiveLoad(LD, DAG))
16147
        --IncValue;
16148

16149
      SDValue Increment =
16150
          DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16151
      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16152

16153
      MachineMemOperand *ExtraMMO =
16154
        MF.getMachineMemOperand(LD->getMemOperand(),
16155
                                1, 2*MemVT.getStoreSize()-1);
16156
      SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16157
      SDValue ExtraLoad =
16158
        DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16159
                                DAG.getVTList(PermTy, MVT::Other),
16160
                                ExtraLoadOps, LDTy, ExtraMMO);
16161

16162
      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16163
        BaseLoad.getValue(1), ExtraLoad.getValue(1));
16164

16165
      // Because vperm has a big-endian bias, we must reverse the order
16166
      // of the input vectors and complement the permute control vector
16167
      // when generating little endian code.  We have already handled the
16168
      // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16169
      // and ExtraLoad here.
16170
      SDValue Perm;
16171
      if (isLittleEndian)
16172
        Perm = BuildIntrinsicOp(IntrPerm,
16173
                                ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16174
      else
16175
        Perm = BuildIntrinsicOp(IntrPerm,
16176
                                BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16177

16178
      if (VT != PermTy)
16179
        Perm = Subtarget.hasAltivec()
16180
                   ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16181
                   : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16182
                                 DAG.getTargetConstant(1, dl, MVT::i64));
16183
                               // second argument is 1 because this rounding
16184
                               // is always exact.
16185

16186
      // The output of the permutation is our loaded result, the TokenFactor is
16187
      // our new chain.
16188
      DCI.CombineTo(N, Perm, TF);
16189
      return SDValue(N, 0);
16190
    }
16191
    }
16192
    break;
16193
    case ISD::INTRINSIC_WO_CHAIN: {
16194
      bool isLittleEndian = Subtarget.isLittleEndian();
16195
      unsigned IID = N->getConstantOperandVal(0);
16196
      Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16197
                                           : Intrinsic::ppc_altivec_lvsl);
16198
      if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16199
        SDValue Add = N->getOperand(1);
16200

16201
        int Bits = 4 /* 16 byte alignment */;
16202

16203
        if (DAG.MaskedValueIsZero(Add->getOperand(1),
16204
                                  APInt::getAllOnes(Bits /* alignment */)
16205
                                      .zext(Add.getScalarValueSizeInBits()))) {
16206
          SDNode *BasePtr = Add->getOperand(0).getNode();
16207
          for (SDNode *U : BasePtr->uses()) {
16208
          if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16209
              U->getConstantOperandVal(0) == IID) {
16210
            // We've found another LVSL/LVSR, and this address is an aligned
16211
            // multiple of that one. The results will be the same, so use the
16212
            // one we've just found instead.
16213

16214
            return SDValue(U, 0);
16215
          }
16216
          }
16217
        }
16218

16219
        if (isa<ConstantSDNode>(Add->getOperand(1))) {
16220
          SDNode *BasePtr = Add->getOperand(0).getNode();
16221
          for (SDNode *U : BasePtr->uses()) {
16222
          if (U->getOpcode() == ISD::ADD &&
16223
              isa<ConstantSDNode>(U->getOperand(1)) &&
16224
              (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16225
                      (1ULL << Bits) ==
16226
                  0) {
16227
            SDNode *OtherAdd = U;
16228
            for (SDNode *V : OtherAdd->uses()) {
16229
              if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16230
                  V->getConstantOperandVal(0) == IID) {
16231
                return SDValue(V, 0);
16232
              }
16233
            }
16234
          }
16235
          }
16236
        }
16237
      }
16238

16239
      // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16240
      // Expose the vabsduw/h/b opportunity for down stream
16241
      if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16242
          (IID == Intrinsic::ppc_altivec_vmaxsw ||
16243
           IID == Intrinsic::ppc_altivec_vmaxsh ||
16244
           IID == Intrinsic::ppc_altivec_vmaxsb)) {
16245
        SDValue V1 = N->getOperand(1);
16246
        SDValue V2 = N->getOperand(2);
16247
        if ((V1.getSimpleValueType() == MVT::v4i32 ||
16248
             V1.getSimpleValueType() == MVT::v8i16 ||
16249
             V1.getSimpleValueType() == MVT::v16i8) &&
16250
            V1.getSimpleValueType() == V2.getSimpleValueType()) {
16251
          // (0-a, a)
16252
          if (V1.getOpcode() == ISD::SUB &&
16253
              ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
16254
              V1.getOperand(1) == V2) {
16255
            return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16256
          }
16257
          // (a, 0-a)
16258
          if (V2.getOpcode() == ISD::SUB &&
16259
              ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16260
              V2.getOperand(1) == V1) {
16261
            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16262
          }
16263
          // (x-y, y-x)
16264
          if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16265
              V1.getOperand(0) == V2.getOperand(1) &&
16266
              V1.getOperand(1) == V2.getOperand(0)) {
16267
            return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16268
          }
16269
        }
16270
      }
16271
    }
16272

16273
    break;
16274
  case ISD::INTRINSIC_W_CHAIN:
16275
      switch (N->getConstantOperandVal(1)) {
16276
      default:
16277
        break;
16278
      case Intrinsic::ppc_altivec_vsum4sbs:
16279
      case Intrinsic::ppc_altivec_vsum4shs:
16280
      case Intrinsic::ppc_altivec_vsum4ubs: {
16281
        // These sum-across intrinsics only have a chain due to the side effect
16282
        // that they may set the SAT bit. If we know the SAT bit will not be set
16283
        // for some inputs, we can replace any uses of their chain with the
16284
        // input chain.
16285
        if (BuildVectorSDNode *BVN =
16286
                dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16287
          APInt APSplatBits, APSplatUndef;
16288
          unsigned SplatBitSize;
16289
          bool HasAnyUndefs;
16290
          bool BVNIsConstantSplat = BVN->isConstantSplat(
16291
              APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16292
              !Subtarget.isLittleEndian());
16293
          // If the constant splat vector is 0, the SAT bit will not be set.
16294
          if (BVNIsConstantSplat && APSplatBits == 0)
16295
            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16296
        }
16297
        return SDValue();
16298
      }
16299
    case Intrinsic::ppc_vsx_lxvw4x:
16300
    case Intrinsic::ppc_vsx_lxvd2x:
16301
      // For little endian, VSX loads require generating lxvd2x/xxswapd.
16302
      // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16303
      if (Subtarget.needsSwapsForVSXMemOps())
16304
        return expandVSXLoadForLE(N, DCI);
16305
      break;
16306
    }
16307
    break;
16308
  case ISD::INTRINSIC_VOID:
16309
    // For little endian, VSX stores require generating xxswapd/stxvd2x.
16310
    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16311
    if (Subtarget.needsSwapsForVSXMemOps()) {
16312
      switch (N->getConstantOperandVal(1)) {
16313
      default:
16314
        break;
16315
      case Intrinsic::ppc_vsx_stxvw4x:
16316
      case Intrinsic::ppc_vsx_stxvd2x:
16317
        return expandVSXStoreForLE(N, DCI);
16318
      }
16319
    }
16320
    break;
16321
  case ISD::BSWAP: {
16322
    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16323
    // For subtargets without LDBRX, we can still do better than the default
16324
    // expansion even for 64-bit BSWAP (LOAD).
16325
    bool Is64BitBswapOn64BitTgt =
16326
        Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16327
    bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16328
                               N->getOperand(0).hasOneUse();
16329
    if (IsSingleUseNormalLd &&
16330
        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16331
         (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16332
      SDValue Load = N->getOperand(0);
16333
      LoadSDNode *LD = cast<LoadSDNode>(Load);
16334
      // Create the byte-swapping load.
16335
      SDValue Ops[] = {
16336
        LD->getChain(),    // Chain
16337
        LD->getBasePtr(),  // Ptr
16338
        DAG.getValueType(N->getValueType(0)) // VT
16339
      };
16340
      SDValue BSLoad =
16341
        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
16342
                                DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16343
                                              MVT::i64 : MVT::i32, MVT::Other),
16344
                                Ops, LD->getMemoryVT(), LD->getMemOperand());
16345

16346
      // If this is an i16 load, insert the truncate.
16347
      SDValue ResVal = BSLoad;
16348
      if (N->getValueType(0) == MVT::i16)
16349
        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16350

16351
      // First, combine the bswap away.  This makes the value produced by the
16352
      // load dead.
16353
      DCI.CombineTo(N, ResVal);
16354

16355
      // Next, combine the load away, we give it a bogus result value but a real
16356
      // chain result.  The result value is dead because the bswap is dead.
16357
      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16358

16359
      // Return N so it doesn't get rechecked!
16360
      return SDValue(N, 0);
16361
    }
16362
    // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16363
    // before legalization so that the BUILD_PAIR is handled correctly.
16364
    if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16365
        !IsSingleUseNormalLd)
16366
      return SDValue();
16367
    LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16368

16369
    // Can't split volatile or atomic loads.
16370
    if (!LD->isSimple())
16371
      return SDValue();
16372
    SDValue BasePtr = LD->getBasePtr();
16373
    SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16374
                             LD->getPointerInfo(), LD->getAlign());
16375
    Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16376
    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16377
                          DAG.getIntPtrConstant(4, dl));
16378
    MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
16379
        LD->getMemOperand(), 4, 4);
16380
    SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16381
    Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16382
    SDValue Res;
16383
    if (Subtarget.isLittleEndian())
16384
      Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16385
    else
16386
      Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16387
    SDValue TF =
16388
        DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16389
                    Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16390
    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16391
    return Res;
16392
  }
16393
  case PPCISD::VCMP:
16394
    // If a VCMP_rec node already exists with exactly the same operands as this
16395
    // node, use its result instead of this node (VCMP_rec computes both a CR6
16396
    // and a normal output).
16397
    //
16398
    if (!N->getOperand(0).hasOneUse() &&
16399
        !N->getOperand(1).hasOneUse() &&
16400
        !N->getOperand(2).hasOneUse()) {
16401

16402
      // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16403
      SDNode *VCMPrecNode = nullptr;
16404

16405
      SDNode *LHSN = N->getOperand(0).getNode();
16406
      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16407
           UI != E; ++UI)
16408
        if (UI->getOpcode() == PPCISD::VCMP_rec &&
16409
            UI->getOperand(1) == N->getOperand(1) &&
16410
            UI->getOperand(2) == N->getOperand(2) &&
16411
            UI->getOperand(0) == N->getOperand(0)) {
16412
          VCMPrecNode = *UI;
16413
          break;
16414
        }
16415

16416
      // If there is no VCMP_rec node, or if the flag value has a single use,
16417
      // don't transform this.
16418
      if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16419
        break;
16420

16421
      // Look at the (necessarily single) use of the flag value.  If it has a
16422
      // chain, this transformation is more complex.  Note that multiple things
16423
      // could use the value result, which we should ignore.
16424
      SDNode *FlagUser = nullptr;
16425
      for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16426
           FlagUser == nullptr; ++UI) {
16427
        assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16428
        SDNode *User = *UI;
16429
        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16430
          if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16431
            FlagUser = User;
16432
            break;
16433
          }
16434
        }
16435
      }
16436

16437
      // If the user is a MFOCRF instruction, we know this is safe.
16438
      // Otherwise we give up for right now.
16439
      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16440
        return SDValue(VCMPrecNode, 0);
16441
    }
16442
    break;
16443
  case ISD::BR_CC: {
16444
    // If this is a branch on an altivec predicate comparison, lower this so
16445
    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
16446
    // lowering is done pre-legalize, because the legalizer lowers the predicate
16447
    // compare down to code that is difficult to reassemble.
16448
    // This code also handles branches that depend on the result of a store
16449
    // conditional.
16450
    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16451
    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16452

16453
    int CompareOpc;
16454
    bool isDot;
16455

16456
    if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16457
      break;
16458

16459
    // Since we are doing this pre-legalize, the RHS can be a constant of
16460
    // arbitrary bitwidth which may cause issues when trying to get the value
16461
    // from the underlying APInt.
16462
    auto RHSAPInt = RHS->getAsAPIntVal();
16463
    if (!RHSAPInt.isIntN(64))
16464
      break;
16465

16466
    unsigned Val = RHSAPInt.getZExtValue();
16467
    auto isImpossibleCompare = [&]() {
16468
      // If this is a comparison against something other than 0/1, then we know
16469
      // that the condition is never/always true.
16470
      if (Val != 0 && Val != 1) {
16471
        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
16472
          return N->getOperand(0);
16473
        // Always !=, turn it into an unconditional branch.
16474
        return DAG.getNode(ISD::BR, dl, MVT::Other,
16475
                           N->getOperand(0), N->getOperand(4));
16476
      }
16477
      return SDValue();
16478
    };
16479
    // Combine branches fed by store conditional instructions (st[bhwd]cx).
16480
    unsigned StoreWidth = 0;
16481
    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16482
        isStoreConditional(LHS, StoreWidth)) {
16483
      if (SDValue Impossible = isImpossibleCompare())
16484
        return Impossible;
16485
      PPC::Predicate CompOpc;
16486
      // eq 0 => ne
16487
      // ne 0 => eq
16488
      // eq 1 => eq
16489
      // ne 1 => ne
16490
      if (Val == 0)
16491
        CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16492
      else
16493
        CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16494

16495
      SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16496
                       DAG.getConstant(StoreWidth, dl, MVT::i32)};
16497
      auto *MemNode = cast<MemSDNode>(LHS);
16498
      SDValue ConstSt = DAG.getMemIntrinsicNode(
16499
          PPCISD::STORE_COND, dl,
16500
          DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16501
          MemNode->getMemoryVT(), MemNode->getMemOperand());
16502

16503
      SDValue InChain;
16504
      // Unchain the branch from the original store conditional.
16505
      if (N->getOperand(0) == LHS.getValue(1))
16506
        InChain = LHS.getOperand(0);
16507
      else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16508
        SmallVector<SDValue, 4> InChains;
16509
        SDValue InTF = N->getOperand(0);
16510
        for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16511
          if (InTF.getOperand(i) != LHS.getValue(1))
16512
            InChains.push_back(InTF.getOperand(i));
16513
        InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16514
      }
16515

16516
      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16517
                         DAG.getConstant(CompOpc, dl, MVT::i32),
16518
                         DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16519
                         ConstSt.getValue(2));
16520
    }
16521

16522
    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16523
        getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16524
      assert(isDot && "Can't compare against a vector result!");
16525

16526
      if (SDValue Impossible = isImpossibleCompare())
16527
        return Impossible;
16528

16529
      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16530
      // Create the PPCISD altivec 'dot' comparison node.
16531
      SDValue Ops[] = {
16532
        LHS.getOperand(2),  // LHS of compare
16533
        LHS.getOperand(3),  // RHS of compare
16534
        DAG.getConstant(CompareOpc, dl, MVT::i32)
16535
      };
16536
      EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16537
      SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16538

16539
      // Unpack the result based on how the target uses it.
16540
      PPC::Predicate CompOpc;
16541
      switch (LHS.getConstantOperandVal(1)) {
16542
      default:  // Can't happen, don't crash on invalid number though.
16543
      case 0:   // Branch on the value of the EQ bit of CR6.
16544
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16545
        break;
16546
      case 1:   // Branch on the inverted value of the EQ bit of CR6.
16547
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16548
        break;
16549
      case 2:   // Branch on the value of the LT bit of CR6.
16550
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16551
        break;
16552
      case 3:   // Branch on the inverted value of the LT bit of CR6.
16553
        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16554
        break;
16555
      }
16556

16557
      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16558
                         DAG.getConstant(CompOpc, dl, MVT::i32),
16559
                         DAG.getRegister(PPC::CR6, MVT::i32),
16560
                         N->getOperand(4), CompNode.getValue(1));
16561
    }
16562
    break;
16563
  }
16564
  case ISD::BUILD_VECTOR:
16565
    return DAGCombineBuildVector(N, DCI);
16566
  }
16567

16568
  return SDValue();
16569
}
16570

16571
SDValue
16572
PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
16573
                                 SelectionDAG &DAG,
16574
                                 SmallVectorImpl<SDNode *> &Created) const {
16575
  // fold (sdiv X, pow2)
16576
  EVT VT = N->getValueType(0);
16577
  if (VT == MVT::i64 && !Subtarget.isPPC64())
16578
    return SDValue();
16579
  if ((VT != MVT::i32 && VT != MVT::i64) ||
16580
      !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16581
    return SDValue();
16582

16583
  SDLoc DL(N);
16584
  SDValue N0 = N->getOperand(0);
16585

16586
  bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16587
  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16588
  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16589

16590
  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16591
  Created.push_back(Op.getNode());
16592

16593
  if (IsNegPow2) {
16594
    Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16595
    Created.push_back(Op.getNode());
16596
  }
16597

16598
  return Op;
16599
}
16600

16601
//===----------------------------------------------------------------------===//
16602
// Inline Assembly Support
16603
//===----------------------------------------------------------------------===//
16604

16605
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16606
                                                      KnownBits &Known,
16607
                                                      const APInt &DemandedElts,
16608
                                                      const SelectionDAG &DAG,
16609
                                                      unsigned Depth) const {
16610
  Known.resetAll();
16611
  switch (Op.getOpcode()) {
16612
  default: break;
16613
  case PPCISD::LBRX: {
16614
    // lhbrx is known to have the top bits cleared out.
16615
    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16616
      Known.Zero = 0xFFFF0000;
16617
    break;
16618
  }
16619
  case ISD::INTRINSIC_WO_CHAIN: {
16620
    switch (Op.getConstantOperandVal(0)) {
16621
    default: break;
16622
    case Intrinsic::ppc_altivec_vcmpbfp_p:
16623
    case Intrinsic::ppc_altivec_vcmpeqfp_p:
16624
    case Intrinsic::ppc_altivec_vcmpequb_p:
16625
    case Intrinsic::ppc_altivec_vcmpequh_p:
16626
    case Intrinsic::ppc_altivec_vcmpequw_p:
16627
    case Intrinsic::ppc_altivec_vcmpequd_p:
16628
    case Intrinsic::ppc_altivec_vcmpequq_p:
16629
    case Intrinsic::ppc_altivec_vcmpgefp_p:
16630
    case Intrinsic::ppc_altivec_vcmpgtfp_p:
16631
    case Intrinsic::ppc_altivec_vcmpgtsb_p:
16632
    case Intrinsic::ppc_altivec_vcmpgtsh_p:
16633
    case Intrinsic::ppc_altivec_vcmpgtsw_p:
16634
    case Intrinsic::ppc_altivec_vcmpgtsd_p:
16635
    case Intrinsic::ppc_altivec_vcmpgtsq_p:
16636
    case Intrinsic::ppc_altivec_vcmpgtub_p:
16637
    case Intrinsic::ppc_altivec_vcmpgtuh_p:
16638
    case Intrinsic::ppc_altivec_vcmpgtuw_p:
16639
    case Intrinsic::ppc_altivec_vcmpgtud_p:
16640
    case Intrinsic::ppc_altivec_vcmpgtuq_p:
16641
      Known.Zero = ~1U;  // All bits but the low one are known to be zero.
16642
      break;
16643
    }
16644
    break;
16645
  }
16646
  case ISD::INTRINSIC_W_CHAIN: {
16647
    switch (Op.getConstantOperandVal(1)) {
16648
    default:
16649
      break;
16650
    case Intrinsic::ppc_load2r:
16651
      // Top bits are cleared for load2r (which is the same as lhbrx).
16652
      Known.Zero = 0xFFFF0000;
16653
      break;
16654
    }
16655
    break;
16656
  }
16657
  }
16658
}
16659

16660
Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
16661
  switch (Subtarget.getCPUDirective()) {
16662
  default: break;
16663
  case PPC::DIR_970:
16664
  case PPC::DIR_PWR4:
16665
  case PPC::DIR_PWR5:
16666
  case PPC::DIR_PWR5X:
16667
  case PPC::DIR_PWR6:
16668
  case PPC::DIR_PWR6X:
16669
  case PPC::DIR_PWR7:
16670
  case PPC::DIR_PWR8:
16671
  case PPC::DIR_PWR9:
16672
  case PPC::DIR_PWR10:
16673
  case PPC::DIR_PWR11:
16674
  case PPC::DIR_PWR_FUTURE: {
16675
    if (!ML)
16676
      break;
16677

16678
    if (!DisableInnermostLoopAlign32) {
16679
      // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16680
      // so that we can decrease cache misses and branch-prediction misses.
16681
      // Actual alignment of the loop will depend on the hotness check and other
16682
      // logic in alignBlocks.
16683
      if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16684
        return Align(32);
16685
    }
16686

16687
    const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16688

16689
    // For small loops (between 5 and 8 instructions), align to a 32-byte
16690
    // boundary so that the entire loop fits in one instruction-cache line.
16691
    uint64_t LoopSize = 0;
16692
    for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16693
      for (const MachineInstr &J : **I) {
16694
        LoopSize += TII->getInstSizeInBytes(J);
16695
        if (LoopSize > 32)
16696
          break;
16697
      }
16698

16699
    if (LoopSize > 16 && LoopSize <= 32)
16700
      return Align(32);
16701

16702
    break;
16703
  }
16704
  }
16705

16706
  return TargetLowering::getPrefLoopAlignment(ML);
16707
}
16708

16709
/// getConstraintType - Given a constraint, return the type of
16710
/// constraint it is for this target.
16711
PPCTargetLowering::ConstraintType
16712
PPCTargetLowering::getConstraintType(StringRef Constraint) const {
16713
  if (Constraint.size() == 1) {
16714
    switch (Constraint[0]) {
16715
    default: break;
16716
    case 'b':
16717
    case 'r':
16718
    case 'f':
16719
    case 'd':
16720
    case 'v':
16721
    case 'y':
16722
      return C_RegisterClass;
16723
    case 'Z':
16724
      // FIXME: While Z does indicate a memory constraint, it specifically
16725
      // indicates an r+r address (used in conjunction with the 'y' modifier
16726
      // in the replacement string). Currently, we're forcing the base
16727
      // register to be r0 in the asm printer (which is interpreted as zero)
16728
      // and forming the complete address in the second register. This is
16729
      // suboptimal.
16730
      return C_Memory;
16731
    }
16732
  } else if (Constraint == "wc") { // individual CR bits.
16733
    return C_RegisterClass;
16734
  } else if (Constraint == "wa" || Constraint == "wd" ||
16735
             Constraint == "wf" || Constraint == "ws" ||
16736
             Constraint == "wi" || Constraint == "ww") {
16737
    return C_RegisterClass; // VSX registers.
16738
  }
16739
  return TargetLowering::getConstraintType(Constraint);
16740
}
16741

16742
/// Examine constraint type and operand type and determine a weight value.
16743
/// This object must already have been set up with the operand type
16744
/// and the current alternative constraint selected.
16745
TargetLowering::ConstraintWeight
16746
PPCTargetLowering::getSingleConstraintMatchWeight(
16747
    AsmOperandInfo &info, const char *constraint) const {
16748
  ConstraintWeight weight = CW_Invalid;
16749
  Value *CallOperandVal = info.CallOperandVal;
16750
    // If we don't have a value, we can't do a match,
16751
    // but allow it at the lowest weight.
16752
  if (!CallOperandVal)
16753
    return CW_Default;
16754
  Type *type = CallOperandVal->getType();
16755

16756
  // Look at the constraint type.
16757
  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16758
    return CW_Register; // an individual CR bit.
16759
  else if ((StringRef(constraint) == "wa" ||
16760
            StringRef(constraint) == "wd" ||
16761
            StringRef(constraint) == "wf") &&
16762
           type->isVectorTy())
16763
    return CW_Register;
16764
  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16765
    return CW_Register; // just hold 64-bit integers data.
16766
  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16767
    return CW_Register;
16768
  else if (StringRef(constraint) == "ww" && type->isFloatTy())
16769
    return CW_Register;
16770

16771
  switch (*constraint) {
16772
  default:
16773
    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
16774
    break;
16775
  case 'b':
16776
    if (type->isIntegerTy())
16777
      weight = CW_Register;
16778
    break;
16779
  case 'f':
16780
    if (type->isFloatTy())
16781
      weight = CW_Register;
16782
    break;
16783
  case 'd':
16784
    if (type->isDoubleTy())
16785
      weight = CW_Register;
16786
    break;
16787
  case 'v':
16788
    if (type->isVectorTy())
16789
      weight = CW_Register;
16790
    break;
16791
  case 'y':
16792
    weight = CW_Register;
16793
    break;
16794
  case 'Z':
16795
    weight = CW_Memory;
16796
    break;
16797
  }
16798
  return weight;
16799
}
16800

16801
std::pair<unsigned, const TargetRegisterClass *>
16802
PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16803
                                                StringRef Constraint,
16804
                                                MVT VT) const {
16805
  if (Constraint.size() == 1) {
16806
    // GCC RS6000 Constraint Letters
16807
    switch (Constraint[0]) {
16808
    case 'b':   // R1-R31
16809
      if (VT == MVT::i64 && Subtarget.isPPC64())
16810
        return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16811
      return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16812
    case 'r':   // R0-R31
16813
      if (VT == MVT::i64 && Subtarget.isPPC64())
16814
        return std::make_pair(0U, &PPC::G8RCRegClass);
16815
      return std::make_pair(0U, &PPC::GPRCRegClass);
16816
    // 'd' and 'f' constraints are both defined to be "the floating point
16817
    // registers", where one is for 32-bit and the other for 64-bit. We don't
16818
    // really care overly much here so just give them all the same reg classes.
16819
    case 'd':
16820
    case 'f':
16821
      if (Subtarget.hasSPE()) {
16822
        if (VT == MVT::f32 || VT == MVT::i32)
16823
          return std::make_pair(0U, &PPC::GPRCRegClass);
16824
        if (VT == MVT::f64 || VT == MVT::i64)
16825
          return std::make_pair(0U, &PPC::SPERCRegClass);
16826
      } else {
16827
        if (VT == MVT::f32 || VT == MVT::i32)
16828
          return std::make_pair(0U, &PPC::F4RCRegClass);
16829
        if (VT == MVT::f64 || VT == MVT::i64)
16830
          return std::make_pair(0U, &PPC::F8RCRegClass);
16831
      }
16832
      break;
16833
    case 'v':
16834
      if (Subtarget.hasAltivec() && VT.isVector())
16835
        return std::make_pair(0U, &PPC::VRRCRegClass);
16836
      else if (Subtarget.hasVSX())
16837
        // Scalars in Altivec registers only make sense with VSX.
16838
        return std::make_pair(0U, &PPC::VFRCRegClass);
16839
      break;
16840
    case 'y':   // crrc
16841
      return std::make_pair(0U, &PPC::CRRCRegClass);
16842
    }
16843
  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16844
    // An individual CR bit.
16845
    return std::make_pair(0U, &PPC::CRBITRCRegClass);
16846
  } else if ((Constraint == "wa" || Constraint == "wd" ||
16847
             Constraint == "wf" || Constraint == "wi") &&
16848
             Subtarget.hasVSX()) {
16849
    // A VSX register for either a scalar (FP) or vector. There is no
16850
    // support for single precision scalars on subtargets prior to Power8.
16851
    if (VT.isVector())
16852
      return std::make_pair(0U, &PPC::VSRCRegClass);
16853
    if (VT == MVT::f32 && Subtarget.hasP8Vector())
16854
      return std::make_pair(0U, &PPC::VSSRCRegClass);
16855
    return std::make_pair(0U, &PPC::VSFRCRegClass);
16856
  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16857
    if (VT == MVT::f32 && Subtarget.hasP8Vector())
16858
      return std::make_pair(0U, &PPC::VSSRCRegClass);
16859
    else
16860
      return std::make_pair(0U, &PPC::VSFRCRegClass);
16861
  } else if (Constraint == "lr") {
16862
    if (VT == MVT::i64)
16863
      return std::make_pair(0U, &PPC::LR8RCRegClass);
16864
    else
16865
      return std::make_pair(0U, &PPC::LRRCRegClass);
16866
  }
16867

16868
  // Handle special cases of physical registers that are not properly handled
16869
  // by the base class.
16870
  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16871
    // If we name a VSX register, we can't defer to the base class because it
16872
    // will not recognize the correct register (their names will be VSL{0-31}
16873
    // and V{0-31} so they won't match). So we match them here.
16874
    if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16875
      int VSNum = atoi(Constraint.data() + 3);
16876
      assert(VSNum >= 0 && VSNum <= 63 &&
16877
             "Attempted to access a vsr out of range");
16878
      if (VSNum < 32)
16879
        return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16880
      return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16881
    }
16882

16883
    // For float registers, we can't defer to the base class as it will match
16884
    // the SPILLTOVSRRC class.
16885
    if (Constraint.size() > 3 && Constraint[1] == 'f') {
16886
      int RegNum = atoi(Constraint.data() + 2);
16887
      if (RegNum > 31 || RegNum < 0)
16888
        report_fatal_error("Invalid floating point register number");
16889
      if (VT == MVT::f32 || VT == MVT::i32)
16890
        return Subtarget.hasSPE()
16891
                   ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16892
                   : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16893
      if (VT == MVT::f64 || VT == MVT::i64)
16894
        return Subtarget.hasSPE()
16895
                   ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16896
                   : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16897
    }
16898
  }
16899

16900
  std::pair<unsigned, const TargetRegisterClass *> R =
16901
      TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16902

16903
  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16904
  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16905
  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16906
  // register.
16907
  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16908
  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16909
  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16910
      PPC::GPRCRegClass.contains(R.first))
16911
    return std::make_pair(TRI->getMatchingSuperReg(R.first,
16912
                            PPC::sub_32, &PPC::G8RCRegClass),
16913
                          &PPC::G8RCRegClass);
16914

16915
  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16916
  if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16917
    R.first = PPC::CR0;
16918
    R.second = &PPC::CRRCRegClass;
16919
  }
16920
  // FIXME: This warning should ideally be emitted in the front end.
16921
  const auto &TM = getTargetMachine();
16922
  if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16923
    if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16924
         (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16925
        (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16926
      errs() << "warning: vector registers 20 to 32 are reserved in the "
16927
                "default AIX AltiVec ABI and cannot be used\n";
16928
  }
16929

16930
  return R;
16931
}
16932

16933
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16934
/// vector.  If it is invalid, don't add anything to Ops.
16935
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
16936
                                                     StringRef Constraint,
16937
                                                     std::vector<SDValue> &Ops,
16938
                                                     SelectionDAG &DAG) const {
16939
  SDValue Result;
16940

16941
  // Only support length 1 constraints.
16942
  if (Constraint.size() > 1)
16943
    return;
16944

16945
  char Letter = Constraint[0];
16946
  switch (Letter) {
16947
  default: break;
16948
  case 'I':
16949
  case 'J':
16950
  case 'K':
16951
  case 'L':
16952
  case 'M':
16953
  case 'N':
16954
  case 'O':
16955
  case 'P': {
16956
    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16957
    if (!CST) return; // Must be an immediate to match.
16958
    SDLoc dl(Op);
16959
    int64_t Value = CST->getSExtValue();
16960
    EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16961
                         // numbers are printed as such.
16962
    switch (Letter) {
16963
    default: llvm_unreachable("Unknown constraint letter!");
16964
    case 'I':  // "I" is a signed 16-bit constant.
16965
      if (isInt<16>(Value))
16966
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16967
      break;
16968
    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
16969
      if (isShiftedUInt<16, 16>(Value))
16970
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16971
      break;
16972
    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
16973
      if (isShiftedInt<16, 16>(Value))
16974
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16975
      break;
16976
    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
16977
      if (isUInt<16>(Value))
16978
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16979
      break;
16980
    case 'M':  // "M" is a constant that is greater than 31.
16981
      if (Value > 31)
16982
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16983
      break;
16984
    case 'N':  // "N" is a positive constant that is an exact power of two.
16985
      if (Value > 0 && isPowerOf2_64(Value))
16986
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16987
      break;
16988
    case 'O':  // "O" is the constant zero.
16989
      if (Value == 0)
16990
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16991
      break;
16992
    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
16993
      if (isInt<16>(-Value))
16994
        Result = DAG.getTargetConstant(Value, dl, TCVT);
16995
      break;
16996
    }
16997
    break;
16998
  }
16999
  }
17000

17001
  if (Result.getNode()) {
17002
    Ops.push_back(Result);
17003
    return;
17004
  }
17005

17006
  // Handle standard constraint letters.
17007
  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17008
}
17009

17010
void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
17011
                                              SmallVectorImpl<SDValue> &Ops,
17012
                                              SelectionDAG &DAG) const {
17013
  if (I.getNumOperands() <= 1)
17014
    return;
17015
  if (!isa<ConstantSDNode>(Ops[1].getNode()))
17016
    return;
17017
  auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17018
  if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17019
      IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17020
    return;
17021

17022
  if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17023
    Ops.push_back(DAG.getMDNode(MDN));
17024
}
17025

17026
// isLegalAddressingMode - Return true if the addressing mode represented
17027
// by AM is legal for this target, for a load/store of the specified type.
17028
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17029
                                              const AddrMode &AM, Type *Ty,
17030
                                              unsigned AS,
17031
                                              Instruction *I) const {
17032
  // Vector type r+i form is supported since power9 as DQ form. We don't check
17033
  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17034
  // imm form is preferred and the offset can be adjusted to use imm form later
17035
  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17036
  // max offset to check legal addressing mode, we should be a little aggressive
17037
  // to contain other offsets for that LSRUse.
17038
  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17039
    return false;
17040

17041
  // PPC allows a sign-extended 16-bit immediate field.
17042
  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17043
    return false;
17044

17045
  // No global is ever allowed as a base.
17046
  if (AM.BaseGV)
17047
    return false;
17048

17049
  // PPC only support r+r,
17050
  switch (AM.Scale) {
17051
  case 0:  // "r+i" or just "i", depending on HasBaseReg.
17052
    break;
17053
  case 1:
17054
    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
17055
      return false;
17056
    // Otherwise we have r+r or r+i.
17057
    break;
17058
  case 2:
17059
    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
17060
      return false;
17061
    // Allow 2*r as r+r.
17062
    break;
17063
  default:
17064
    // No other scales are supported.
17065
    return false;
17066
  }
17067

17068
  return true;
17069
}
17070

17071
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17072
                                           SelectionDAG &DAG) const {
17073
  MachineFunction &MF = DAG.getMachineFunction();
17074
  MachineFrameInfo &MFI = MF.getFrameInfo();
17075
  MFI.setReturnAddressIsTaken(true);
17076

17077
  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17078
    return SDValue();
17079

17080
  SDLoc dl(Op);
17081
  unsigned Depth = Op.getConstantOperandVal(0);
17082

17083
  // Make sure the function does not optimize away the store of the RA to
17084
  // the stack.
17085
  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17086
  FuncInfo->setLRStoreRequired();
17087
  bool isPPC64 = Subtarget.isPPC64();
17088
  auto PtrVT = getPointerTy(MF.getDataLayout());
17089

17090
  if (Depth > 0) {
17091
    // The link register (return address) is saved in the caller's frame
17092
    // not the callee's stack frame. So we must get the caller's frame
17093
    // address and load the return address at the LR offset from there.
17094
    SDValue FrameAddr =
17095
        DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17096
                    LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17097
    SDValue Offset =
17098
        DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17099
                        isPPC64 ? MVT::i64 : MVT::i32);
17100
    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17101
                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17102
                       MachinePointerInfo());
17103
  }
17104

17105
  // Just load the return address off the stack.
17106
  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17107
  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17108
                     MachinePointerInfo());
17109
}
17110

17111
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17112
                                          SelectionDAG &DAG) const {
17113
  SDLoc dl(Op);
17114
  unsigned Depth = Op.getConstantOperandVal(0);
17115

17116
  MachineFunction &MF = DAG.getMachineFunction();
17117
  MachineFrameInfo &MFI = MF.getFrameInfo();
17118
  MFI.setFrameAddressIsTaken(true);
17119

17120
  EVT PtrVT = getPointerTy(MF.getDataLayout());
17121
  bool isPPC64 = PtrVT == MVT::i64;
17122

17123
  // Naked functions never have a frame pointer, and so we use r1. For all
17124
  // other functions, this decision must be delayed until during PEI.
17125
  unsigned FrameReg;
17126
  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17127
    FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17128
  else
17129
    FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17130

17131
  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17132
                                         PtrVT);
17133
  while (Depth--)
17134
    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17135
                            FrameAddr, MachinePointerInfo());
17136
  return FrameAddr;
17137
}
17138

17139
// FIXME? Maybe this could be a TableGen attribute on some registers and
17140
// this table could be generated automatically from RegInfo.
17141
Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
17142
                                              const MachineFunction &MF) const {
17143
  bool isPPC64 = Subtarget.isPPC64();
17144

17145
  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17146
  if (!is64Bit && VT != LLT::scalar(32))
17147
    report_fatal_error("Invalid register global variable type");
17148

17149
  Register Reg = StringSwitch<Register>(RegName)
17150
                     .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17151
                     .Case("r2", isPPC64 ? Register() : PPC::R2)
17152
                     .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17153
                     .Default(Register());
17154

17155
  if (Reg)
17156
    return Reg;
17157
  report_fatal_error("Invalid register name global variable");
17158
}
17159

17160
bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
17161
  // 32-bit SVR4 ABI access everything as got-indirect.
17162
  if (Subtarget.is32BitELFABI())
17163
    return true;
17164

17165
  // AIX accesses everything indirectly through the TOC, which is similar to
17166
  // the GOT.
17167
  if (Subtarget.isAIXABI())
17168
    return true;
17169

17170
  CodeModel::Model CModel = getTargetMachine().getCodeModel();
17171
  // If it is small or large code model, module locals are accessed
17172
  // indirectly by loading their address from .toc/.got.
17173
  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17174
    return true;
17175

17176
  // JumpTable and BlockAddress are accessed as got-indirect.
17177
  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17178
    return true;
17179

17180
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17181
    return Subtarget.isGVIndirectSymbol(G->getGlobal());
17182

17183
  return false;
17184
}
17185

17186
bool
17187
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
17188
  // The PowerPC target isn't yet aware of offsets.
17189
  return false;
17190
}
17191

17192
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
17193
                                           const CallInst &I,
17194
                                           MachineFunction &MF,
17195
                                           unsigned Intrinsic) const {
17196
  switch (Intrinsic) {
17197
  case Intrinsic::ppc_atomicrmw_xchg_i128:
17198
  case Intrinsic::ppc_atomicrmw_add_i128:
17199
  case Intrinsic::ppc_atomicrmw_sub_i128:
17200
  case Intrinsic::ppc_atomicrmw_nand_i128:
17201
  case Intrinsic::ppc_atomicrmw_and_i128:
17202
  case Intrinsic::ppc_atomicrmw_or_i128:
17203
  case Intrinsic::ppc_atomicrmw_xor_i128:
17204
  case Intrinsic::ppc_cmpxchg_i128:
17205
    Info.opc = ISD::INTRINSIC_W_CHAIN;
17206
    Info.memVT = MVT::i128;
17207
    Info.ptrVal = I.getArgOperand(0);
17208
    Info.offset = 0;
17209
    Info.align = Align(16);
17210
    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
17211
                 MachineMemOperand::MOVolatile;
17212
    return true;
17213
  case Intrinsic::ppc_atomic_load_i128:
17214
    Info.opc = ISD::INTRINSIC_W_CHAIN;
17215
    Info.memVT = MVT::i128;
17216
    Info.ptrVal = I.getArgOperand(0);
17217
    Info.offset = 0;
17218
    Info.align = Align(16);
17219
    Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
17220
    return true;
17221
  case Intrinsic::ppc_atomic_store_i128:
17222
    Info.opc = ISD::INTRINSIC_VOID;
17223
    Info.memVT = MVT::i128;
17224
    Info.ptrVal = I.getArgOperand(2);
17225
    Info.offset = 0;
17226
    Info.align = Align(16);
17227
    Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17228
    return true;
17229
  case Intrinsic::ppc_altivec_lvx:
17230
  case Intrinsic::ppc_altivec_lvxl:
17231
  case Intrinsic::ppc_altivec_lvebx:
17232
  case Intrinsic::ppc_altivec_lvehx:
17233
  case Intrinsic::ppc_altivec_lvewx:
17234
  case Intrinsic::ppc_vsx_lxvd2x:
17235
  case Intrinsic::ppc_vsx_lxvw4x:
17236
  case Intrinsic::ppc_vsx_lxvd2x_be:
17237
  case Intrinsic::ppc_vsx_lxvw4x_be:
17238
  case Intrinsic::ppc_vsx_lxvl:
17239
  case Intrinsic::ppc_vsx_lxvll: {
17240
    EVT VT;
17241
    switch (Intrinsic) {
17242
    case Intrinsic::ppc_altivec_lvebx:
17243
      VT = MVT::i8;
17244
      break;
17245
    case Intrinsic::ppc_altivec_lvehx:
17246
      VT = MVT::i16;
17247
      break;
17248
    case Intrinsic::ppc_altivec_lvewx:
17249
      VT = MVT::i32;
17250
      break;
17251
    case Intrinsic::ppc_vsx_lxvd2x:
17252
    case Intrinsic::ppc_vsx_lxvd2x_be:
17253
      VT = MVT::v2f64;
17254
      break;
17255
    default:
17256
      VT = MVT::v4i32;
17257
      break;
17258
    }
17259

17260
    Info.opc = ISD::INTRINSIC_W_CHAIN;
17261
    Info.memVT = VT;
17262
    Info.ptrVal = I.getArgOperand(0);
17263
    Info.offset = -VT.getStoreSize()+1;
17264
    Info.size = 2*VT.getStoreSize()-1;
17265
    Info.align = Align(1);
17266
    Info.flags = MachineMemOperand::MOLoad;
17267
    return true;
17268
  }
17269
  case Intrinsic::ppc_altivec_stvx:
17270
  case Intrinsic::ppc_altivec_stvxl:
17271
  case Intrinsic::ppc_altivec_stvebx:
17272
  case Intrinsic::ppc_altivec_stvehx:
17273
  case Intrinsic::ppc_altivec_stvewx:
17274
  case Intrinsic::ppc_vsx_stxvd2x:
17275
  case Intrinsic::ppc_vsx_stxvw4x:
17276
  case Intrinsic::ppc_vsx_stxvd2x_be:
17277
  case Intrinsic::ppc_vsx_stxvw4x_be:
17278
  case Intrinsic::ppc_vsx_stxvl:
17279
  case Intrinsic::ppc_vsx_stxvll: {
17280
    EVT VT;
17281
    switch (Intrinsic) {
17282
    case Intrinsic::ppc_altivec_stvebx:
17283
      VT = MVT::i8;
17284
      break;
17285
    case Intrinsic::ppc_altivec_stvehx:
17286
      VT = MVT::i16;
17287
      break;
17288
    case Intrinsic::ppc_altivec_stvewx:
17289
      VT = MVT::i32;
17290
      break;
17291
    case Intrinsic::ppc_vsx_stxvd2x:
17292
    case Intrinsic::ppc_vsx_stxvd2x_be:
17293
      VT = MVT::v2f64;
17294
      break;
17295
    default:
17296
      VT = MVT::v4i32;
17297
      break;
17298
    }
17299

17300
    Info.opc = ISD::INTRINSIC_VOID;
17301
    Info.memVT = VT;
17302
    Info.ptrVal = I.getArgOperand(1);
17303
    Info.offset = -VT.getStoreSize()+1;
17304
    Info.size = 2*VT.getStoreSize()-1;
17305
    Info.align = Align(1);
17306
    Info.flags = MachineMemOperand::MOStore;
17307
    return true;
17308
  }
17309
  case Intrinsic::ppc_stdcx:
17310
  case Intrinsic::ppc_stwcx:
17311
  case Intrinsic::ppc_sthcx:
17312
  case Intrinsic::ppc_stbcx: {
17313
    EVT VT;
17314
    auto Alignment = Align(8);
17315
    switch (Intrinsic) {
17316
    case Intrinsic::ppc_stdcx:
17317
      VT = MVT::i64;
17318
      break;
17319
    case Intrinsic::ppc_stwcx:
17320
      VT = MVT::i32;
17321
      Alignment = Align(4);
17322
      break;
17323
    case Intrinsic::ppc_sthcx:
17324
      VT = MVT::i16;
17325
      Alignment = Align(2);
17326
      break;
17327
    case Intrinsic::ppc_stbcx:
17328
      VT = MVT::i8;
17329
      Alignment = Align(1);
17330
      break;
17331
    }
17332
    Info.opc = ISD::INTRINSIC_W_CHAIN;
17333
    Info.memVT = VT;
17334
    Info.ptrVal = I.getArgOperand(0);
17335
    Info.offset = 0;
17336
    Info.align = Alignment;
17337
    Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17338
    return true;
17339
  }
17340
  default:
17341
    break;
17342
  }
17343

17344
  return false;
17345
}
17346

17347
/// It returns EVT::Other if the type should be determined using generic
17348
/// target-independent logic.
17349
EVT PPCTargetLowering::getOptimalMemOpType(
17350
    const MemOp &Op, const AttributeList &FuncAttributes) const {
17351
  if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17352
    // We should use Altivec/VSX loads and stores when available. For unaligned
17353
    // addresses, unaligned VSX loads are only fast starting with the P8.
17354
    if (Subtarget.hasAltivec() && Op.size() >= 16) {
17355
      if (Op.isMemset() && Subtarget.hasVSX()) {
17356
        uint64_t TailSize = Op.size() % 16;
17357
        // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17358
        // element if vector element type matches tail store. For tail size
17359
        // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17360
        if (TailSize > 2 && TailSize <= 4) {
17361
          return MVT::v8i16;
17362
        }
17363
        return MVT::v4i32;
17364
      }
17365
      if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17366
        return MVT::v4i32;
17367
    }
17368
  }
17369

17370
  if (Subtarget.isPPC64()) {
17371
    return MVT::i64;
17372
  }
17373

17374
  return MVT::i32;
17375
}
17376

17377
/// Returns true if it is beneficial to convert a load of a constant
17378
/// to just the constant itself.
17379
bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
17380
                                                          Type *Ty) const {
17381
  assert(Ty->isIntegerTy());
17382

17383
  unsigned BitSize = Ty->getPrimitiveSizeInBits();
17384
  return !(BitSize == 0 || BitSize > 64);
17385
}
17386

17387
bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
17388
  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17389
    return false;
17390
  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17391
  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17392
  return NumBits1 == 64 && NumBits2 == 32;
17393
}
17394

17395
bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
17396
  if (!VT1.isInteger() || !VT2.isInteger())
17397
    return false;
17398
  unsigned NumBits1 = VT1.getSizeInBits();
17399
  unsigned NumBits2 = VT2.getSizeInBits();
17400
  return NumBits1 == 64 && NumBits2 == 32;
17401
}
17402

17403
bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
17404
  // Generally speaking, zexts are not free, but they are free when they can be
17405
  // folded with other operations.
17406
  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17407
    EVT MemVT = LD->getMemoryVT();
17408
    if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17409
         (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17410
        (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17411
         LD->getExtensionType() == ISD::ZEXTLOAD))
17412
      return true;
17413
  }
17414

17415
  // FIXME: Add other cases...
17416
  //  - 32-bit shifts with a zext to i64
17417
  //  - zext after ctlz, bswap, etc.
17418
  //  - zext after and by a constant mask
17419

17420
  return TargetLowering::isZExtFree(Val, VT2);
17421
}
17422

17423
bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17424
  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17425
         "invalid fpext types");
17426
  // Extending to float128 is not free.
17427
  if (DestVT == MVT::f128)
17428
    return false;
17429
  return true;
17430
}
17431

17432
bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17433
  return isInt<16>(Imm) || isUInt<16>(Imm);
17434
}
17435

17436
bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17437
  return isInt<16>(Imm) || isUInt<16>(Imm);
17438
}
17439

17440
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
17441
                                                       MachineMemOperand::Flags,
17442
                                                       unsigned *Fast) const {
17443
  if (DisablePPCUnaligned)
17444
    return false;
17445

17446
  // PowerPC supports unaligned memory access for simple non-vector types.
17447
  // Although accessing unaligned addresses is not as efficient as accessing
17448
  // aligned addresses, it is generally more efficient than manual expansion,
17449
  // and generally only traps for software emulation when crossing page
17450
  // boundaries.
17451

17452
  if (!VT.isSimple())
17453
    return false;
17454

17455
  if (VT.isFloatingPoint() && !VT.isVector() &&
17456
      !Subtarget.allowsUnalignedFPAccess())
17457
    return false;
17458

17459
  if (VT.getSimpleVT().isVector()) {
17460
    if (Subtarget.hasVSX()) {
17461
      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17462
          VT != MVT::v4f32 && VT != MVT::v4i32)
17463
        return false;
17464
    } else {
17465
      return false;
17466
    }
17467
  }
17468

17469
  if (VT == MVT::ppcf128)
17470
    return false;
17471

17472
  if (Fast)
17473
    *Fast = 1;
17474

17475
  return true;
17476
}
17477

17478
bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
17479
                                               SDValue C) const {
17480
  // Check integral scalar types.
17481
  if (!VT.isScalarInteger())
17482
    return false;
17483
  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17484
    if (!ConstNode->getAPIntValue().isSignedIntN(64))
17485
      return false;
17486
    // This transformation will generate >= 2 operations. But the following
17487
    // cases will generate <= 2 instructions during ISEL. So exclude them.
17488
    // 1. If the constant multiplier fits 16 bits, it can be handled by one
17489
    // HW instruction, ie. MULLI
17490
    // 2. If the multiplier after shifted fits 16 bits, an extra shift
17491
    // instruction is needed than case 1, ie. MULLI and RLDICR
17492
    int64_t Imm = ConstNode->getSExtValue();
17493
    unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17494
    Imm >>= Shift;
17495
    if (isInt<16>(Imm))
17496
      return false;
17497
    uint64_t UImm = static_cast<uint64_t>(Imm);
17498
    if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17499
        isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17500
      return true;
17501
  }
17502
  return false;
17503
}
17504

17505
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17506
                                                   EVT VT) const {
17507
  return isFMAFasterThanFMulAndFAdd(
17508
      MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
17509
}
17510

17511
bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
17512
                                                   Type *Ty) const {
17513
  if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17514
    return false;
17515
  switch (Ty->getScalarType()->getTypeID()) {
17516
  case Type::FloatTyID:
17517
  case Type::DoubleTyID:
17518
    return true;
17519
  case Type::FP128TyID:
17520
    return Subtarget.hasP9Vector();
17521
  default:
17522
    return false;
17523
  }
17524
}
17525

17526
// FIXME: add more patterns which are not profitable to hoist.
17527
bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
17528
  if (!I->hasOneUse())
17529
    return true;
17530

17531
  Instruction *User = I->user_back();
17532
  assert(User && "A single use instruction with no uses.");
17533

17534
  switch (I->getOpcode()) {
17535
  case Instruction::FMul: {
17536
    // Don't break FMA, PowerPC prefers FMA.
17537
    if (User->getOpcode() != Instruction::FSub &&
17538
        User->getOpcode() != Instruction::FAdd)
17539
      return true;
17540

17541
    const TargetOptions &Options = getTargetMachine().Options;
17542
    const Function *F = I->getFunction();
17543
    const DataLayout &DL = F->getDataLayout();
17544
    Type *Ty = User->getOperand(0)->getType();
17545

17546
    return !(
17547
        isFMAFasterThanFMulAndFAdd(*F, Ty) &&
17548
        isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
17549
        (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17550
  }
17551
  case Instruction::Load: {
17552
    // Don't break "store (load float*)" pattern, this pattern will be combined
17553
    // to "store (load int32)" in later InstCombine pass. See function
17554
    // combineLoadToOperationType. On PowerPC, loading a float point takes more
17555
    // cycles than loading a 32 bit integer.
17556
    LoadInst *LI = cast<LoadInst>(I);
17557
    // For the loads that combineLoadToOperationType does nothing, like
17558
    // ordered load, it should be profitable to hoist them.
17559
    // For swifterror load, it can only be used for pointer to pointer type, so
17560
    // later type check should get rid of this case.
17561
    if (!LI->isUnordered())
17562
      return true;
17563

17564
    if (User->getOpcode() != Instruction::Store)
17565
      return true;
17566

17567
    if (I->getType()->getTypeID() != Type::FloatTyID)
17568
      return true;
17569

17570
    return false;
17571
  }
17572
  default:
17573
    return true;
17574
  }
17575
  return true;
17576
}
17577

17578
const MCPhysReg *
17579
PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
17580
  // LR is a callee-save register, but we must treat it as clobbered by any call
17581
  // site. Hence we include LR in the scratch registers, which are in turn added
17582
  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17583
  // to CTR, which is used by any indirect call.
17584
  static const MCPhysReg ScratchRegs[] = {
17585
    PPC::X12, PPC::LR8, PPC::CTR8, 0
17586
  };
17587

17588
  return ScratchRegs;
17589
}
17590

17591
Register PPCTargetLowering::getExceptionPointerRegister(
17592
    const Constant *PersonalityFn) const {
17593
  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17594
}
17595

17596
Register PPCTargetLowering::getExceptionSelectorRegister(
17597
    const Constant *PersonalityFn) const {
17598
  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17599
}
17600

17601
bool
17602
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17603
                     EVT VT , unsigned DefinedValues) const {
17604
  if (VT == MVT::v2i64)
17605
    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17606

17607
  if (Subtarget.hasVSX())
17608
    return true;
17609

17610
  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17611
}
17612

17613
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
17614
  if (DisableILPPref || Subtarget.enableMachineScheduler())
17615
    return TargetLowering::getSchedulingPreference(N);
17616

17617
  return Sched::ILP;
17618
}
17619

17620
// Create a fast isel object.
17621
FastISel *
17622
PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
17623
                                  const TargetLibraryInfo *LibInfo) const {
17624
  return PPC::createFastISel(FuncInfo, LibInfo);
17625
}
17626

17627
// 'Inverted' means the FMA opcode after negating one multiplicand.
17628
// For example, (fma -a b c) = (fnmsub a b c)
17629
static unsigned invertFMAOpcode(unsigned Opc) {
17630
  switch (Opc) {
17631
  default:
17632
    llvm_unreachable("Invalid FMA opcode for PowerPC!");
17633
  case ISD::FMA:
17634
    return PPCISD::FNMSUB;
17635
  case PPCISD::FNMSUB:
17636
    return ISD::FMA;
17637
  }
17638
}
17639

17640
SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
17641
                                                bool LegalOps, bool OptForSize,
17642
                                                NegatibleCost &Cost,
17643
                                                unsigned Depth) const {
17644
  if (Depth > SelectionDAG::MaxRecursionDepth)
17645
    return SDValue();
17646

17647
  unsigned Opc = Op.getOpcode();
17648
  EVT VT = Op.getValueType();
17649
  SDNodeFlags Flags = Op.getNode()->getFlags();
17650

17651
  switch (Opc) {
17652
  case PPCISD::FNMSUB:
17653
    if (!Op.hasOneUse() || !isTypeLegal(VT))
17654
      break;
17655

17656
    const TargetOptions &Options = getTargetMachine().Options;
17657
    SDValue N0 = Op.getOperand(0);
17658
    SDValue N1 = Op.getOperand(1);
17659
    SDValue N2 = Op.getOperand(2);
17660
    SDLoc Loc(Op);
17661

17662
    NegatibleCost N2Cost = NegatibleCost::Expensive;
17663
    SDValue NegN2 =
17664
        getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17665

17666
    if (!NegN2)
17667
      return SDValue();
17668

17669
    // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17670
    // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17671
    // These transformations may change sign of zeroes. For example,
17672
    // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17673
    if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17674
      // Try and choose the cheaper one to negate.
17675
      NegatibleCost N0Cost = NegatibleCost::Expensive;
17676
      SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17677
                                           N0Cost, Depth + 1);
17678

17679
      NegatibleCost N1Cost = NegatibleCost::Expensive;
17680
      SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17681
                                           N1Cost, Depth + 1);
17682

17683
      if (NegN0 && N0Cost <= N1Cost) {
17684
        Cost = std::min(N0Cost, N2Cost);
17685
        return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17686
      } else if (NegN1) {
17687
        Cost = std::min(N1Cost, N2Cost);
17688
        return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17689
      }
17690
    }
17691

17692
    // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17693
    if (isOperationLegal(ISD::FMA, VT)) {
17694
      Cost = N2Cost;
17695
      return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17696
    }
17697

17698
    break;
17699
  }
17700

17701
  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17702
                                              Cost, Depth);
17703
}
17704

17705
// Override to enable LOAD_STACK_GUARD lowering on Linux.
17706
bool PPCTargetLowering::useLoadStackGuardNode() const {
17707
  if (!Subtarget.isTargetLinux())
17708
    return TargetLowering::useLoadStackGuardNode();
17709
  return true;
17710
}
17711

17712
// Override to disable global variable loading on Linux and insert AIX canary
17713
// word declaration.
17714
void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
17715
  if (Subtarget.isAIXABI()) {
17716
    M.getOrInsertGlobal(AIXSSPCanaryWordName,
17717
                        PointerType::getUnqual(M.getContext()));
17718
    return;
17719
  }
17720
  if (!Subtarget.isTargetLinux())
17721
    return TargetLowering::insertSSPDeclarations(M);
17722
}
17723

17724
Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
17725
  if (Subtarget.isAIXABI())
17726
    return M.getGlobalVariable(AIXSSPCanaryWordName);
17727
  return TargetLowering::getSDagStackGuard(M);
17728
}
17729

17730
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
17731
                                     bool ForCodeSize) const {
17732
  if (!VT.isSimple() || !Subtarget.hasVSX())
17733
    return false;
17734

17735
  switch(VT.getSimpleVT().SimpleTy) {
17736
  default:
17737
    // For FP types that are currently not supported by PPC backend, return
17738
    // false. Examples: f16, f80.
17739
    return false;
17740
  case MVT::f32:
17741
  case MVT::f64: {
17742
    if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
17743
      // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17744
      return true;
17745
    }
17746
    bool IsExact;
17747
    APSInt IntResult(16, false);
17748
    // The rounding mode doesn't really matter because we only care about floats
17749
    // that can be converted to integers exactly.
17750
    Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17751
    // For exact values in the range [-16, 15] we can materialize the float.
17752
    if (IsExact && IntResult <= 15 && IntResult >= -16)
17753
      return true;
17754
    return Imm.isZero();
17755
  }
17756
  case MVT::ppcf128:
17757
    return Imm.isPosZero();
17758
  }
17759
}
17760

17761
// For vector shift operation op, fold
17762
// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17763
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
17764
                                  SelectionDAG &DAG) {
17765
  SDValue N0 = N->getOperand(0);
17766
  SDValue N1 = N->getOperand(1);
17767
  EVT VT = N0.getValueType();
17768
  unsigned OpSizeInBits = VT.getScalarSizeInBits();
17769
  unsigned Opcode = N->getOpcode();
17770
  unsigned TargetOpcode;
17771

17772
  switch (Opcode) {
17773
  default:
17774
    llvm_unreachable("Unexpected shift operation");
17775
  case ISD::SHL:
17776
    TargetOpcode = PPCISD::SHL;
17777
    break;
17778
  case ISD::SRL:
17779
    TargetOpcode = PPCISD::SRL;
17780
    break;
17781
  case ISD::SRA:
17782
    TargetOpcode = PPCISD::SRA;
17783
    break;
17784
  }
17785

17786
  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17787
      N1->getOpcode() == ISD::AND)
17788
    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17789
      if (Mask->getZExtValue() == OpSizeInBits - 1)
17790
        return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17791

17792
  return SDValue();
17793
}
17794

17795
SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17796
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17797
    return Value;
17798

17799
  SDValue N0 = N->getOperand(0);
17800
  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17801
  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17802
      N0.getOpcode() != ISD::SIGN_EXTEND ||
17803
      N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17804
      N->getValueType(0) != MVT::i64)
17805
    return SDValue();
17806

17807
  // We can't save an operation here if the value is already extended, and
17808
  // the existing shift is easier to combine.
17809
  SDValue ExtsSrc = N0.getOperand(0);
17810
  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17811
      ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17812
    return SDValue();
17813

17814
  SDLoc DL(N0);
17815
  SDValue ShiftBy = SDValue(CN1, 0);
17816
  // We want the shift amount to be i32 on the extswli, but the shift could
17817
  // have an i64.
17818
  if (ShiftBy.getValueType() == MVT::i64)
17819
    ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17820

17821
  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17822
                         ShiftBy);
17823
}
17824

17825
SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17826
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17827
    return Value;
17828

17829
  return SDValue();
17830
}
17831

17832
SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17833
  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17834
    return Value;
17835

17836
  return SDValue();
17837
}
17838

17839
// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17840
// Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17841
// When C is zero, the equation (addi Z, -C) can be simplified to Z
17842
// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17843
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
17844
                                 const PPCSubtarget &Subtarget) {
17845
  if (!Subtarget.isPPC64())
17846
    return SDValue();
17847

17848
  SDValue LHS = N->getOperand(0);
17849
  SDValue RHS = N->getOperand(1);
17850

17851
  auto isZextOfCompareWithConstant = [](SDValue Op) {
17852
    if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17853
        Op.getValueType() != MVT::i64)
17854
      return false;
17855

17856
    SDValue Cmp = Op.getOperand(0);
17857
    if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17858
        Cmp.getOperand(0).getValueType() != MVT::i64)
17859
      return false;
17860

17861
    if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17862
      int64_t NegConstant = 0 - Constant->getSExtValue();
17863
      // Due to the limitations of the addi instruction,
17864
      // -C is required to be [-32768, 32767].
17865
      return isInt<16>(NegConstant);
17866
    }
17867

17868
    return false;
17869
  };
17870

17871
  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17872
  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17873

17874
  // If there is a pattern, canonicalize a zext operand to the RHS.
17875
  if (LHSHasPattern && !RHSHasPattern)
17876
    std::swap(LHS, RHS);
17877
  else if (!LHSHasPattern && !RHSHasPattern)
17878
    return SDValue();
17879

17880
  SDLoc DL(N);
17881
  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17882
  SDValue Cmp = RHS.getOperand(0);
17883
  SDValue Z = Cmp.getOperand(0);
17884
  auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17885
  int64_t NegConstant = 0 - Constant->getSExtValue();
17886

17887
  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17888
  default: break;
17889
  case ISD::SETNE: {
17890
    //                                 when C == 0
17891
    //                             --> addze X, (addic Z, -1).carry
17892
    //                            /
17893
    // add X, (zext(setne Z, C))--
17894
    //                            \    when -32768 <= -C <= 32767 && C != 0
17895
    //                             --> addze X, (addic (addi Z, -C), -1).carry
17896
    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17897
                              DAG.getConstant(NegConstant, DL, MVT::i64));
17898
    SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17899
    SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17900
                               AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17901
    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17902
                       SDValue(Addc.getNode(), 1));
17903
    }
17904
  case ISD::SETEQ: {
17905
    //                                 when C == 0
17906
    //                             --> addze X, (subfic Z, 0).carry
17907
    //                            /
17908
    // add X, (zext(sete  Z, C))--
17909
    //                            \    when -32768 <= -C <= 32767 && C != 0
17910
    //                             --> addze X, (subfic (addi Z, -C), 0).carry
17911
    SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17912
                              DAG.getConstant(NegConstant, DL, MVT::i64));
17913
    SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17914
    SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17915
                               DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17916
    return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17917
                       SDValue(Subc.getNode(), 1));
17918
    }
17919
  }
17920

17921
  return SDValue();
17922
}
17923

17924
// Transform
17925
// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17926
// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17927
// In this case both C1 and C2 must be known constants.
17928
// C1+C2 must fit into a 34 bit signed integer.
17929
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
17930
                                          const PPCSubtarget &Subtarget) {
17931
  if (!Subtarget.isUsingPCRelativeCalls())
17932
    return SDValue();
17933

17934
  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17935
  // If we find that node try to cast the Global Address and the Constant.
17936
  SDValue LHS = N->getOperand(0);
17937
  SDValue RHS = N->getOperand(1);
17938

17939
  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17940
    std::swap(LHS, RHS);
17941

17942
  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17943
    return SDValue();
17944

17945
  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17946
  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17947
  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17948

17949
  // Check that both casts succeeded.
17950
  if (!GSDN || !ConstNode)
17951
    return SDValue();
17952

17953
  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17954
  SDLoc DL(GSDN);
17955

17956
  // The signed int offset needs to fit in 34 bits.
17957
  if (!isInt<34>(NewOffset))
17958
    return SDValue();
17959

17960
  // The new global address is a copy of the old global address except
17961
  // that it has the updated Offset.
17962
  SDValue GA =
17963
      DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17964
                                 NewOffset, GSDN->getTargetFlags());
17965
  SDValue MatPCRel =
17966
      DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17967
  return MatPCRel;
17968
}
17969

17970
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17971
  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17972
    return Value;
17973

17974
  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17975
    return Value;
17976

17977
  return SDValue();
17978
}
17979

17980
// Detect TRUNCATE operations on bitcasts of float128 values.
17981
// What we are looking for here is the situtation where we extract a subset
17982
// of bits from a 128 bit float.
17983
// This can be of two forms:
17984
// 1) BITCAST of f128 feeding TRUNCATE
17985
// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17986
// The reason this is required is because we do not have a legal i128 type
17987
// and so we want to prevent having to store the f128 and then reload part
17988
// of it.
17989
SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17990
                                           DAGCombinerInfo &DCI) const {
17991
  // If we are using CRBits then try that first.
17992
  if (Subtarget.useCRBits()) {
17993
    // Check if CRBits did anything and return that if it did.
17994
    if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17995
      return CRTruncValue;
17996
  }
17997

17998
  SDLoc dl(N);
17999
  SDValue Op0 = N->getOperand(0);
18000

18001
  // Looking for a truncate of i128 to i64.
18002
  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18003
    return SDValue();
18004

18005
  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18006

18007
  // SRL feeding TRUNCATE.
18008
  if (Op0.getOpcode() == ISD::SRL) {
18009
    ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18010
    // The right shift has to be by 64 bits.
18011
    if (!ConstNode || ConstNode->getZExtValue() != 64)
18012
      return SDValue();
18013

18014
    // Switch the element number to extract.
18015
    EltToExtract = EltToExtract ? 0 : 1;
18016
    // Update Op0 past the SRL.
18017
    Op0 = Op0.getOperand(0);
18018
  }
18019

18020
  // BITCAST feeding a TRUNCATE possibly via SRL.
18021
  if (Op0.getOpcode() == ISD::BITCAST &&
18022
      Op0.getValueType() == MVT::i128 &&
18023
      Op0.getOperand(0).getValueType() == MVT::f128) {
18024
    SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18025
    return DCI.DAG.getNode(
18026
        ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18027
        DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18028
  }
18029
  return SDValue();
18030
}
18031

18032
SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18033
  SelectionDAG &DAG = DCI.DAG;
18034

18035
  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18036
  if (!ConstOpOrElement)
18037
    return SDValue();
18038

18039
  // An imul is usually smaller than the alternative sequence for legal type.
18040
  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18041
      isOperationLegal(ISD::MUL, N->getValueType(0)))
18042
    return SDValue();
18043

18044
  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18045
    switch (this->Subtarget.getCPUDirective()) {
18046
    default:
18047
      // TODO: enhance the condition for subtarget before pwr8
18048
      return false;
18049
    case PPC::DIR_PWR8:
18050
      //  type        mul     add    shl
18051
      // scalar        4       1      1
18052
      // vector        7       2      2
18053
      return true;
18054
    case PPC::DIR_PWR9:
18055
    case PPC::DIR_PWR10:
18056
    case PPC::DIR_PWR11:
18057
    case PPC::DIR_PWR_FUTURE:
18058
      //  type        mul     add    shl
18059
      // scalar        5       2      2
18060
      // vector        7       2      2
18061

18062
      // The cycle RATIO of related operations are showed as a table above.
18063
      // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18064
      // scalar and vector type. For 2 instrs patterns, add/sub + shl
18065
      // are 4, it is always profitable; but for 3 instrs patterns
18066
      // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18067
      // So we should only do it for vector type.
18068
      return IsAddOne && IsNeg ? VT.isVector() : true;
18069
    }
18070
  };
18071

18072
  EVT VT = N->getValueType(0);
18073
  SDLoc DL(N);
18074

18075
  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18076
  bool IsNeg = MulAmt.isNegative();
18077
  APInt MulAmtAbs = MulAmt.abs();
18078

18079
  if ((MulAmtAbs - 1).isPowerOf2()) {
18080
    // (mul x, 2^N + 1) => (add (shl x, N), x)
18081
    // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18082

18083
    if (!IsProfitable(IsNeg, true, VT))
18084
      return SDValue();
18085

18086
    SDValue Op0 = N->getOperand(0);
18087
    SDValue Op1 =
18088
        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18089
                    DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18090
    SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18091

18092
    if (!IsNeg)
18093
      return Res;
18094

18095
    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18096
  } else if ((MulAmtAbs + 1).isPowerOf2()) {
18097
    // (mul x, 2^N - 1) => (sub (shl x, N), x)
18098
    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18099

18100
    if (!IsProfitable(IsNeg, false, VT))
18101
      return SDValue();
18102

18103
    SDValue Op0 = N->getOperand(0);
18104
    SDValue Op1 =
18105
        DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18106
                    DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18107

18108
    if (!IsNeg)
18109
      return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18110
    else
18111
      return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18112

18113
  } else {
18114
    return SDValue();
18115
  }
18116
}
18117

18118
// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18119
// in combiner since we need to check SD flags and other subtarget features.
18120
SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18121
                                          DAGCombinerInfo &DCI) const {
18122
  SDValue N0 = N->getOperand(0);
18123
  SDValue N1 = N->getOperand(1);
18124
  SDValue N2 = N->getOperand(2);
18125
  SDNodeFlags Flags = N->getFlags();
18126
  EVT VT = N->getValueType(0);
18127
  SelectionDAG &DAG = DCI.DAG;
18128
  const TargetOptions &Options = getTargetMachine().Options;
18129
  unsigned Opc = N->getOpcode();
18130
  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18131
  bool LegalOps = !DCI.isBeforeLegalizeOps();
18132
  SDLoc Loc(N);
18133

18134
  if (!isOperationLegal(ISD::FMA, VT))
18135
    return SDValue();
18136

18137
  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18138
  // since (fnmsub a b c)=-0 while c-ab=+0.
18139
  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18140
    return SDValue();
18141

18142
  // (fma (fneg a) b c) => (fnmsub a b c)
18143
  // (fnmsub (fneg a) b c) => (fma a b c)
18144
  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18145
    return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18146

18147
  // (fma a (fneg b) c) => (fnmsub a b c)
18148
  // (fnmsub a (fneg b) c) => (fma a b c)
18149
  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18150
    return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18151

18152
  return SDValue();
18153
}
18154

18155
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18156
  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18157
  if (!Subtarget.is64BitELFABI())
18158
    return false;
18159

18160
  // If not a tail call then no need to proceed.
18161
  if (!CI->isTailCall())
18162
    return false;
18163

18164
  // If sibling calls have been disabled and tail-calls aren't guaranteed
18165
  // there is no reason to duplicate.
18166
  auto &TM = getTargetMachine();
18167
  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18168
    return false;
18169

18170
  // Can't tail call a function called indirectly, or if it has variadic args.
18171
  const Function *Callee = CI->getCalledFunction();
18172
  if (!Callee || Callee->isVarArg())
18173
    return false;
18174

18175
  // Make sure the callee and caller calling conventions are eligible for tco.
18176
  const Function *Caller = CI->getParent()->getParent();
18177
  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18178
                                           CI->getCallingConv()))
18179
      return false;
18180

18181
  // If the function is local then we have a good chance at tail-calling it
18182
  return getTargetMachine().shouldAssumeDSOLocal(Callee);
18183
}
18184

18185
bool PPCTargetLowering::
18186
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18187
  const Value *Mask = AndI.getOperand(1);
18188
  // If the mask is suitable for andi. or andis. we should sink the and.
18189
  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18190
    // Can't handle constants wider than 64-bits.
18191
    if (CI->getBitWidth() > 64)
18192
      return false;
18193
    int64_t ConstVal = CI->getZExtValue();
18194
    return isUInt<16>(ConstVal) ||
18195
      (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18196
  }
18197

18198
  // For non-constant masks, we can always use the record-form and.
18199
  return true;
18200
}
18201

18202
/// getAddrModeForFlags - Based on the set of address flags, select the most
18203
/// optimal instruction format to match by.
18204
PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18205
  // This is not a node we should be handling here.
18206
  if (Flags == PPC::MOF_None)
18207
    return PPC::AM_None;
18208
  // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18209
  for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18210
    if ((Flags & FlagSet) == FlagSet)
18211
      return PPC::AM_DForm;
18212
  for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18213
    if ((Flags & FlagSet) == FlagSet)
18214
      return PPC::AM_DSForm;
18215
  for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18216
    if ((Flags & FlagSet) == FlagSet)
18217
      return PPC::AM_DQForm;
18218
  for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18219
    if ((Flags & FlagSet) == FlagSet)
18220
      return PPC::AM_PrefixDForm;
18221
  // If no other forms are selected, return an X-Form as it is the most
18222
  // general addressing mode.
18223
  return PPC::AM_XForm;
18224
}
18225

18226
/// Set alignment flags based on whether or not the Frame Index is aligned.
18227
/// Utilized when computing flags for address computation when selecting
18228
/// load and store instructions.
18229
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18230
                               SelectionDAG &DAG) {
18231
  bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18232
  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18233
  if (!FI)
18234
    return;
18235
  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18236
  unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18237
  // If this is (add $FI, $S16Imm), the alignment flags are already set
18238
  // based on the immediate. We just need to clear the alignment flags
18239
  // if the FI alignment is weaker.
18240
  if ((FrameIndexAlign % 4) != 0)
18241
    FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18242
  if ((FrameIndexAlign % 16) != 0)
18243
    FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18244
  // If the address is a plain FrameIndex, set alignment flags based on
18245
  // FI alignment.
18246
  if (!IsAdd) {
18247
    if ((FrameIndexAlign % 4) == 0)
18248
      FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18249
    if ((FrameIndexAlign % 16) == 0)
18250
      FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18251
  }
18252
}
18253

18254
/// Given a node, compute flags that are used for address computation when
18255
/// selecting load and store instructions. The flags computed are stored in
18256
/// FlagSet. This function takes into account whether the node is a constant,
18257
/// an ADD, OR, or a constant, and computes the address flags accordingly.
18258
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18259
                                              SelectionDAG &DAG) {
18260
  // Set the alignment flags for the node depending on if the node is
18261
  // 4-byte or 16-byte aligned.
18262
  auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18263
    if ((Imm & 0x3) == 0)
18264
      FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18265
    if ((Imm & 0xf) == 0)
18266
      FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18267
  };
18268

18269
  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18270
    // All 32-bit constants can be computed as LIS + Disp.
18271
    const APInt &ConstImm = CN->getAPIntValue();
18272
    if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18273
      FlagSet |= PPC::MOF_AddrIsSImm32;
18274
      SetAlignFlagsForImm(ConstImm.getZExtValue());
18275
      setAlignFlagsForFI(N, FlagSet, DAG);
18276
    }
18277
    if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18278
      FlagSet |= PPC::MOF_RPlusSImm34;
18279
    else // Let constant materialization handle large constants.
18280
      FlagSet |= PPC::MOF_NotAddNorCst;
18281
  } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18282
    // This address can be represented as an addition of:
18283
    // - Register + Imm16 (possibly a multiple of 4/16)
18284
    // - Register + Imm34
18285
    // - Register + PPCISD::Lo
18286
    // - Register + Register
18287
    // In any case, we won't have to match this as Base + Zero.
18288
    SDValue RHS = N.getOperand(1);
18289
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18290
      const APInt &ConstImm = CN->getAPIntValue();
18291
      if (ConstImm.isSignedIntN(16)) {
18292
        FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18293
        SetAlignFlagsForImm(ConstImm.getZExtValue());
18294
        setAlignFlagsForFI(N, FlagSet, DAG);
18295
      }
18296
      if (ConstImm.isSignedIntN(34))
18297
        FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18298
      else
18299
        FlagSet |= PPC::MOF_RPlusR; // Register.
18300
    } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18301
      FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18302
    else
18303
      FlagSet |= PPC::MOF_RPlusR;
18304
  } else { // The address computation is not a constant or an addition.
18305
    setAlignFlagsForFI(N, FlagSet, DAG);
18306
    FlagSet |= PPC::MOF_NotAddNorCst;
18307
  }
18308
}
18309

18310
static bool isPCRelNode(SDValue N) {
18311
  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18312
      isValidPCRelNode<ConstantPoolSDNode>(N) ||
18313
      isValidPCRelNode<GlobalAddressSDNode>(N) ||
18314
      isValidPCRelNode<JumpTableSDNode>(N) ||
18315
      isValidPCRelNode<BlockAddressSDNode>(N));
18316
}
18317

18318
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18319
/// the address flags of the load/store instruction that is to be matched.
18320
unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18321
                                           SelectionDAG &DAG) const {
18322
  unsigned FlagSet = PPC::MOF_None;
18323

18324
  // Compute subtarget flags.
18325
  if (!Subtarget.hasP9Vector())
18326
    FlagSet |= PPC::MOF_SubtargetBeforeP9;
18327
  else
18328
    FlagSet |= PPC::MOF_SubtargetP9;
18329

18330
  if (Subtarget.hasPrefixInstrs())
18331
    FlagSet |= PPC::MOF_SubtargetP10;
18332

18333
  if (Subtarget.hasSPE())
18334
    FlagSet |= PPC::MOF_SubtargetSPE;
18335

18336
  // Check if we have a PCRel node and return early.
18337
  if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18338
    return FlagSet;
18339

18340
  // If the node is the paired load/store intrinsics, compute flags for
18341
  // address computation and return early.
18342
  unsigned ParentOp = Parent->getOpcode();
18343
  if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18344
                               (ParentOp == ISD::INTRINSIC_VOID))) {
18345
    unsigned ID = Parent->getConstantOperandVal(1);
18346
    if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18347
      SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18348
                             ? Parent->getOperand(2)
18349
                             : Parent->getOperand(3);
18350
      computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18351
      FlagSet |= PPC::MOF_Vector;
18352
      return FlagSet;
18353
    }
18354
  }
18355

18356
  // Mark this as something we don't want to handle here if it is atomic
18357
  // or pre-increment instruction.
18358
  if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18359
    if (LSB->isIndexed())
18360
      return PPC::MOF_None;
18361

18362
  // Compute in-memory type flags. This is based on if there are scalars,
18363
  // floats or vectors.
18364
  const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18365
  assert(MN && "Parent should be a MemSDNode!");
18366
  EVT MemVT = MN->getMemoryVT();
18367
  unsigned Size = MemVT.getSizeInBits();
18368
  if (MemVT.isScalarInteger()) {
18369
    assert(Size <= 128 &&
18370
           "Not expecting scalar integers larger than 16 bytes!");
18371
    if (Size < 32)
18372
      FlagSet |= PPC::MOF_SubWordInt;
18373
    else if (Size == 32)
18374
      FlagSet |= PPC::MOF_WordInt;
18375
    else
18376
      FlagSet |= PPC::MOF_DoubleWordInt;
18377
  } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18378
    if (Size == 128)
18379
      FlagSet |= PPC::MOF_Vector;
18380
    else if (Size == 256) {
18381
      assert(Subtarget.pairedVectorMemops() &&
18382
             "256-bit vectors are only available when paired vector memops is "
18383
             "enabled!");
18384
      FlagSet |= PPC::MOF_Vector;
18385
    } else
18386
      llvm_unreachable("Not expecting illegal vectors!");
18387
  } else { // Floating point type: can be scalar, f128 or vector types.
18388
    if (Size == 32 || Size == 64)
18389
      FlagSet |= PPC::MOF_ScalarFloat;
18390
    else if (MemVT == MVT::f128 || MemVT.isVector())
18391
      FlagSet |= PPC::MOF_Vector;
18392
    else
18393
      llvm_unreachable("Not expecting illegal scalar floats!");
18394
  }
18395

18396
  // Compute flags for address computation.
18397
  computeFlagsForAddressComputation(N, FlagSet, DAG);
18398

18399
  // Compute type extension flags.
18400
  if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18401
    switch (LN->getExtensionType()) {
18402
    case ISD::SEXTLOAD:
18403
      FlagSet |= PPC::MOF_SExt;
18404
      break;
18405
    case ISD::EXTLOAD:
18406
    case ISD::ZEXTLOAD:
18407
      FlagSet |= PPC::MOF_ZExt;
18408
      break;
18409
    case ISD::NON_EXTLOAD:
18410
      FlagSet |= PPC::MOF_NoExt;
18411
      break;
18412
    }
18413
  } else
18414
    FlagSet |= PPC::MOF_NoExt;
18415

18416
  // For integers, no extension is the same as zero extension.
18417
  // We set the extension mode to zero extension so we don't have
18418
  // to add separate entries in AddrModesMap for loads and stores.
18419
  if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18420
    FlagSet |= PPC::MOF_ZExt;
18421
    FlagSet &= ~PPC::MOF_NoExt;
18422
  }
18423

18424
  // If we don't have prefixed instructions, 34-bit constants should be
18425
  // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18426
  bool IsNonP1034BitConst =
18427
      ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
18428
       FlagSet) == PPC::MOF_RPlusSImm34;
18429
  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18430
      IsNonP1034BitConst)
18431
    FlagSet |= PPC::MOF_NotAddNorCst;
18432

18433
  return FlagSet;
18434
}
18435

18436
/// SelectForceXFormMode - Given the specified address, force it to be
18437
/// represented as an indexed [r+r] operation (an XForm instruction).
18438
PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
18439
                                                      SDValue &Base,
18440
                                                      SelectionDAG &DAG) const {
18441

18442
  PPC::AddrMode Mode = PPC::AM_XForm;
18443
  int16_t ForceXFormImm = 0;
18444
  if (provablyDisjointOr(DAG, N) &&
18445
      !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18446
    Disp = N.getOperand(0);
18447
    Base = N.getOperand(1);
18448
    return Mode;
18449
  }
18450

18451
  // If the address is the result of an add, we will utilize the fact that the
18452
  // address calculation includes an implicit add.  However, we can reduce
18453
  // register pressure if we do not materialize a constant just for use as the
18454
  // index register.  We only get rid of the add if it is not an add of a
18455
  // value and a 16-bit signed constant and both have a single use.
18456
  if (N.getOpcode() == ISD::ADD &&
18457
      (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18458
       !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18459
    Disp = N.getOperand(0);
18460
    Base = N.getOperand(1);
18461
    return Mode;
18462
  }
18463

18464
  // Otherwise, use R0 as the base register.
18465
  Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18466
                         N.getValueType());
18467
  Base = N;
18468

18469
  return Mode;
18470
}
18471

18472
bool PPCTargetLowering::splitValueIntoRegisterParts(
18473
    SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18474
    unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18475
  EVT ValVT = Val.getValueType();
18476
  // If we are splitting a scalar integer into f64 parts (i.e. so they
18477
  // can be placed into VFRC registers), we need to zero extend and
18478
  // bitcast the values. This will ensure the value is placed into a
18479
  // VSR using direct moves or stack operations as needed.
18480
  if (PartVT == MVT::f64 &&
18481
      (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18482
    Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18483
    Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18484
    Parts[0] = Val;
18485
    return true;
18486
  }
18487
  return false;
18488
}
18489

18490
SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18491
                                          SelectionDAG &DAG) const {
18492
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18493
  TargetLowering::CallLoweringInfo CLI(DAG);
18494
  EVT RetVT = Op.getValueType();
18495
  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18496
  SDValue Callee =
18497
      DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18498
  bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18499
  TargetLowering::ArgListTy Args;
18500
  TargetLowering::ArgListEntry Entry;
18501
  for (const SDValue &N : Op->op_values()) {
18502
    EVT ArgVT = N.getValueType();
18503
    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18504
    Entry.Node = N;
18505
    Entry.Ty = ArgTy;
18506
    Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18507
    Entry.IsZExt = !Entry.IsSExt;
18508
    Args.push_back(Entry);
18509
  }
18510

18511
  SDValue InChain = DAG.getEntryNode();
18512
  SDValue TCChain = InChain;
18513
  const Function &F = DAG.getMachineFunction().getFunction();
18514
  bool isTailCall =
18515
      TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18516
      (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18517
  if (isTailCall)
18518
    InChain = TCChain;
18519
  CLI.setDebugLoc(SDLoc(Op))
18520
      .setChain(InChain)
18521
      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18522
      .setTailCall(isTailCall)
18523
      .setSExtResult(SignExtend)
18524
      .setZExtResult(!SignExtend)
18525
      .setIsPostTypeLegalization(true);
18526
  return TLI.LowerCallTo(CLI).first;
18527
}
18528

18529
SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18530
    const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18531
    SelectionDAG &DAG) const {
18532
  if (Op.getValueType() == MVT::f32)
18533
    return lowerToLibCall(LibCallFloatName, Op, DAG);
18534

18535
  if (Op.getValueType() == MVT::f64)
18536
    return lowerToLibCall(LibCallDoubleName, Op, DAG);
18537

18538
  return SDValue();
18539
}
18540

18541
bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18542
  SDNodeFlags Flags = Op.getNode()->getFlags();
18543
  return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18544
         Flags.hasNoNaNs() && Flags.hasNoInfs();
18545
}
18546

18547
bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18548
  return Op.getNode()->getFlags().hasApproximateFuncs();
18549
}
18550

18551
bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18552
  return getTargetMachine().Options.PPCGenScalarMASSEntries;
18553
}
18554

18555
SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18556
                                            const char *LibCallFloatName,
18557
                                            const char *LibCallDoubleNameFinite,
18558
                                            const char *LibCallFloatNameFinite,
18559
                                            SDValue Op,
18560
                                            SelectionDAG &DAG) const {
18561
  if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18562
    return SDValue();
18563

18564
  if (!isLowringToMASSFiniteSafe(Op))
18565
    return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18566
                                   DAG);
18567

18568
  return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18569
                                 LibCallDoubleNameFinite, Op, DAG);
18570
}
18571

18572
SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18573
  return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18574
                          "__xl_powf_finite", Op, DAG);
18575
}
18576

18577
SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18578
  return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18579
                          "__xl_sinf_finite", Op, DAG);
18580
}
18581

18582
SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18583
  return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18584
                          "__xl_cosf_finite", Op, DAG);
18585
}
18586

18587
SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18588
  return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18589
                          "__xl_logf_finite", Op, DAG);
18590
}
18591

18592
SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18593
  return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18594
                          "__xl_log10f_finite", Op, DAG);
18595
}
18596

18597
SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18598
  return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18599
                          "__xl_expf_finite", Op, DAG);
18600
}
18601

18602
// If we happen to match to an aligned D-Form, check if the Frame Index is
18603
// adequately aligned. If it is not, reset the mode to match to X-Form.
18604
static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18605
                                   PPC::AddrMode &Mode) {
18606
  if (!isa<FrameIndexSDNode>(N))
18607
    return;
18608
  if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18609
      (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18610
    Mode = PPC::AM_XForm;
18611
}
18612

18613
/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18614
/// compute the address flags of the node, get the optimal address mode based
18615
/// on the flags, and set the Base and Disp based on the address mode.
18616
PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
18617
                                                       SDValue N, SDValue &Disp,
18618
                                                       SDValue &Base,
18619
                                                       SelectionDAG &DAG,
18620
                                                       MaybeAlign Align) const {
18621
  SDLoc DL(Parent);
18622

18623
  // Compute the address flags.
18624
  unsigned Flags = computeMOFlags(Parent, N, DAG);
18625

18626
  // Get the optimal address mode based on the Flags.
18627
  PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18628

18629
  // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18630
  // Select an X-Form load if it is not.
18631
  setXFormForUnalignedFI(N, Flags, Mode);
18632

18633
  // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18634
  if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18635
    assert(Subtarget.isUsingPCRelativeCalls() &&
18636
           "Must be using PC-Relative calls when a valid PC-Relative node is "
18637
           "present!");
18638
    Mode = PPC::AM_PCRel;
18639
  }
18640

18641
  // Set Base and Disp accordingly depending on the address mode.
18642
  switch (Mode) {
18643
  case PPC::AM_DForm:
18644
  case PPC::AM_DSForm:
18645
  case PPC::AM_DQForm: {
18646
    // This is a register plus a 16-bit immediate. The base will be the
18647
    // register and the displacement will be the immediate unless it
18648
    // isn't sufficiently aligned.
18649
    if (Flags & PPC::MOF_RPlusSImm16) {
18650
      SDValue Op0 = N.getOperand(0);
18651
      SDValue Op1 = N.getOperand(1);
18652
      int16_t Imm = Op1->getAsZExtVal();
18653
      if (!Align || isAligned(*Align, Imm)) {
18654
        Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18655
        Base = Op0;
18656
        if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18657
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18658
          fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18659
        }
18660
        break;
18661
      }
18662
    }
18663
    // This is a register plus the @lo relocation. The base is the register
18664
    // and the displacement is the global address.
18665
    else if (Flags & PPC::MOF_RPlusLo) {
18666
      Disp = N.getOperand(1).getOperand(0); // The global address.
18667
      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
18668
             Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
18669
             Disp.getOpcode() == ISD::TargetConstantPool ||
18670
             Disp.getOpcode() == ISD::TargetJumpTable);
18671
      Base = N.getOperand(0);
18672
      break;
18673
    }
18674
    // This is a constant address at most 32 bits. The base will be
18675
    // zero or load-immediate-shifted and the displacement will be
18676
    // the low 16 bits of the address.
18677
    else if (Flags & PPC::MOF_AddrIsSImm32) {
18678
      auto *CN = cast<ConstantSDNode>(N);
18679
      EVT CNType = CN->getValueType(0);
18680
      uint64_t CNImm = CN->getZExtValue();
18681
      // If this address fits entirely in a 16-bit sext immediate field, codegen
18682
      // this as "d, 0".
18683
      int16_t Imm;
18684
      if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18685
        Disp = DAG.getTargetConstant(Imm, DL, CNType);
18686
        Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18687
                               CNType);
18688
        break;
18689
      }
18690
      // Handle 32-bit sext immediate with LIS + Addr mode.
18691
      if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18692
          (!Align || isAligned(*Align, CNImm))) {
18693
        int32_t Addr = (int32_t)CNImm;
18694
        // Otherwise, break this down into LIS + Disp.
18695
        Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18696
        Base =
18697
            DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18698
        uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18699
        Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18700
        break;
18701
      }
18702
    }
18703
    // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18704
    Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18705
    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18706
      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18707
      fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18708
    } else
18709
      Base = N;
18710
    break;
18711
  }
18712
  case PPC::AM_PrefixDForm: {
18713
    int64_t Imm34 = 0;
18714
    unsigned Opcode = N.getOpcode();
18715
    if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18716
        (isIntS34Immediate(N.getOperand(1), Imm34))) {
18717
      // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18718
      Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18719
      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18720
        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18721
      else
18722
        Base = N.getOperand(0);
18723
    } else if (isIntS34Immediate(N, Imm34)) {
18724
      // The address is a 34-bit signed immediate.
18725
      Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18726
      Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18727
    }
18728
    break;
18729
  }
18730
  case PPC::AM_PCRel: {
18731
    // When selecting PC-Relative instructions, "Base" is not utilized as
18732
    // we select the address as [PC+imm].
18733
    Disp = N;
18734
    break;
18735
  }
18736
  case PPC::AM_None:
18737
    break;
18738
  default: { // By default, X-Form is always available to be selected.
18739
    // When a frame index is not aligned, we also match by XForm.
18740
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18741
    Base = FI ? N : N.getOperand(1);
18742
    Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18743
                                N.getValueType())
18744
              : N.getOperand(0);
18745
    break;
18746
  }
18747
  }
18748
  return Mode;
18749
}
18750

18751
CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
18752
                                                 bool Return,
18753
                                                 bool IsVarArg) const {
18754
  switch (CC) {
18755
  case CallingConv::Cold:
18756
    return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18757
  default:
18758
    return CC_PPC64_ELF;
18759
  }
18760
}
18761

18762
bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18763
  return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18764
}
18765

18766
TargetLowering::AtomicExpansionKind
18767
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18768
  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18769
  if (shouldInlineQuadwordAtomics() && Size == 128)
18770
    return AtomicExpansionKind::MaskedIntrinsic;
18771

18772
  switch (AI->getOperation()) {
18773
  case AtomicRMWInst::UIncWrap:
18774
  case AtomicRMWInst::UDecWrap:
18775
    return AtomicExpansionKind::CmpXChg;
18776
  default:
18777
    return TargetLowering::shouldExpandAtomicRMWInIR(AI);
18778
  }
18779

18780
  llvm_unreachable("unreachable atomicrmw operation");
18781
}
18782

18783
TargetLowering::AtomicExpansionKind
18784
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
18785
  unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18786
  if (shouldInlineQuadwordAtomics() && Size == 128)
18787
    return AtomicExpansionKind::MaskedIntrinsic;
18788
  return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
18789
}
18790

18791
static Intrinsic::ID
18792
getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
18793
  switch (BinOp) {
18794
  default:
18795
    llvm_unreachable("Unexpected AtomicRMW BinOp");
18796
  case AtomicRMWInst::Xchg:
18797
    return Intrinsic::ppc_atomicrmw_xchg_i128;
18798
  case AtomicRMWInst::Add:
18799
    return Intrinsic::ppc_atomicrmw_add_i128;
18800
  case AtomicRMWInst::Sub:
18801
    return Intrinsic::ppc_atomicrmw_sub_i128;
18802
  case AtomicRMWInst::And:
18803
    return Intrinsic::ppc_atomicrmw_and_i128;
18804
  case AtomicRMWInst::Or:
18805
    return Intrinsic::ppc_atomicrmw_or_i128;
18806
  case AtomicRMWInst::Xor:
18807
    return Intrinsic::ppc_atomicrmw_xor_i128;
18808
  case AtomicRMWInst::Nand:
18809
    return Intrinsic::ppc_atomicrmw_nand_i128;
18810
  }
18811
}
18812

18813
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18814
    IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18815
    Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18816
  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18817
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18818
  Type *ValTy = Incr->getType();
18819
  assert(ValTy->getPrimitiveSizeInBits() == 128);
18820
  Function *RMW = Intrinsic::getDeclaration(
18821
      M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
18822
  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18823
  Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18824
  Value *IncrHi =
18825
      Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18826
  Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18827
  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18828
  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18829
  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18830
  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18831
  return Builder.CreateOr(
18832
      Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18833
}
18834

18835
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18836
    IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18837
    Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18838
  assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18839
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18840
  Type *ValTy = CmpVal->getType();
18841
  assert(ValTy->getPrimitiveSizeInBits() == 128);
18842
  Function *IntCmpXchg =
18843
      Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18844
  Type *Int64Ty = Type::getInt64Ty(M->getContext());
18845
  Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18846
  Value *CmpHi =
18847
      Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18848
  Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18849
  Value *NewHi =
18850
      Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18851
  emitLeadingFence(Builder, CI, Ord);
18852
  Value *LoHi =
18853
      Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18854
  emitTrailingFence(Builder, CI, Ord);
18855
  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18856
  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18857
  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18858
  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18859
  return Builder.CreateOr(
18860
      Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18861
}
18862

18863
Product

Resources

Company