Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
35266 views
1
//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines a pattern matching instruction selector for PowerPC,
10
// converting from a legalized dag to a PPC dag.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "MCTargetDesc/PPCMCTargetDesc.h"
15
#include "MCTargetDesc/PPCPredicates.h"
16
#include "PPC.h"
17
#include "PPCISelLowering.h"
18
#include "PPCMachineFunctionInfo.h"
19
#include "PPCSubtarget.h"
20
#include "PPCTargetMachine.h"
21
#include "llvm/ADT/APInt.h"
22
#include "llvm/ADT/APSInt.h"
23
#include "llvm/ADT/DenseMap.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/ADT/SmallPtrSet.h"
26
#include "llvm/ADT/SmallVector.h"
27
#include "llvm/ADT/Statistic.h"
28
#include "llvm/Analysis/BranchProbabilityInfo.h"
29
#include "llvm/CodeGen/FunctionLoweringInfo.h"
30
#include "llvm/CodeGen/ISDOpcodes.h"
31
#include "llvm/CodeGen/MachineBasicBlock.h"
32
#include "llvm/CodeGen/MachineFrameInfo.h"
33
#include "llvm/CodeGen/MachineFunction.h"
34
#include "llvm/CodeGen/MachineInstrBuilder.h"
35
#include "llvm/CodeGen/MachineRegisterInfo.h"
36
#include "llvm/CodeGen/SelectionDAG.h"
37
#include "llvm/CodeGen/SelectionDAGISel.h"
38
#include "llvm/CodeGen/SelectionDAGNodes.h"
39
#include "llvm/CodeGen/TargetInstrInfo.h"
40
#include "llvm/CodeGen/TargetRegisterInfo.h"
41
#include "llvm/CodeGen/ValueTypes.h"
42
#include "llvm/CodeGenTypes/MachineValueType.h"
43
#include "llvm/IR/BasicBlock.h"
44
#include "llvm/IR/DebugLoc.h"
45
#include "llvm/IR/Function.h"
46
#include "llvm/IR/GlobalValue.h"
47
#include "llvm/IR/InlineAsm.h"
48
#include "llvm/IR/InstrTypes.h"
49
#include "llvm/IR/IntrinsicsPowerPC.h"
50
#include "llvm/IR/Module.h"
51
#include "llvm/Support/Casting.h"
52
#include "llvm/Support/CodeGen.h"
53
#include "llvm/Support/CommandLine.h"
54
#include "llvm/Support/Compiler.h"
55
#include "llvm/Support/Debug.h"
56
#include "llvm/Support/ErrorHandling.h"
57
#include "llvm/Support/KnownBits.h"
58
#include "llvm/Support/MathExtras.h"
59
#include "llvm/Support/raw_ostream.h"
60
#include <algorithm>
61
#include <cassert>
62
#include <cstdint>
63
#include <iterator>
64
#include <limits>
65
#include <memory>
66
#include <new>
67
#include <tuple>
68
#include <utility>
69
70
using namespace llvm;
71
72
#define DEBUG_TYPE "ppc-isel"
73
#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75
STATISTIC(NumSextSetcc,
76
"Number of (sext(setcc)) nodes expanded into GPR sequence.");
77
STATISTIC(NumZextSetcc,
78
"Number of (zext(setcc)) nodes expanded into GPR sequence.");
79
STATISTIC(SignExtensionsAdded,
80
"Number of sign extensions for compare inputs added.");
81
STATISTIC(ZeroExtensionsAdded,
82
"Number of zero extensions for compare inputs added.");
83
STATISTIC(NumLogicOpsOnComparison,
84
"Number of logical ops on i1 values calculated in GPR.");
85
STATISTIC(OmittedForNonExtendUses,
86
"Number of compares not eliminated as they have non-extending uses.");
87
STATISTIC(NumP9Setb,
88
"Number of compares lowered to setb.");
89
90
// FIXME: Remove this once the bug has been fixed!
91
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94
static cl::opt<bool>
95
UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
96
cl::desc("use aggressive ppc isel for bit permutations"),
97
cl::Hidden);
98
static cl::opt<bool> BPermRewriterNoMasking(
99
"ppc-bit-perm-rewriter-stress-rotates",
100
cl::desc("stress rotate selection in aggressive ppc isel for "
101
"bit permutations"),
102
cl::Hidden);
103
104
static cl::opt<bool> EnableBranchHint(
105
"ppc-use-branch-hint", cl::init(true),
106
cl::desc("Enable static hinting of branches on ppc"),
107
cl::Hidden);
108
109
static cl::opt<bool> EnableTLSOpt(
110
"ppc-tls-opt", cl::init(true),
111
cl::desc("Enable tls optimization peephole"),
112
cl::Hidden);
113
114
enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
115
ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
116
ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
117
118
static cl::opt<ICmpInGPRType> CmpInGPR(
119
"ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
120
cl::desc("Specify the types of comparisons to emit GPR-only code for."),
121
cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122
clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123
clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124
clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125
clEnumValN(ICGPR_NonExtIn, "nonextin",
126
"Only comparisons where inputs don't need [sz]ext."),
127
clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128
clEnumValN(ICGPR_ZextI32, "zexti32",
129
"Only i32 comparisons with zext result."),
130
clEnumValN(ICGPR_ZextI64, "zexti64",
131
"Only i64 comparisons with zext result."),
132
clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133
clEnumValN(ICGPR_SextI32, "sexti32",
134
"Only i32 comparisons with sext result."),
135
clEnumValN(ICGPR_SextI64, "sexti64",
136
"Only i64 comparisons with sext result.")));
137
namespace {
138
139
//===--------------------------------------------------------------------===//
140
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
141
/// instructions for SelectionDAG operations.
142
///
143
class PPCDAGToDAGISel : public SelectionDAGISel {
144
const PPCTargetMachine &TM;
145
const PPCSubtarget *Subtarget = nullptr;
146
const PPCTargetLowering *PPCLowering = nullptr;
147
unsigned GlobalBaseReg = 0;
148
149
public:
150
PPCDAGToDAGISel() = delete;
151
152
explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
153
: SelectionDAGISel(tm, OptLevel), TM(tm) {}
154
155
bool runOnMachineFunction(MachineFunction &MF) override {
156
// Make sure we re-emit a set of the global base reg if necessary
157
GlobalBaseReg = 0;
158
Subtarget = &MF.getSubtarget<PPCSubtarget>();
159
PPCLowering = Subtarget->getTargetLowering();
160
if (Subtarget->hasROPProtect()) {
161
// Create a place on the stack for the ROP Protection Hash.
162
// The ROP Protection Hash will always be 8 bytes and aligned to 8
163
// bytes.
164
MachineFrameInfo &MFI = MF.getFrameInfo();
165
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
166
const int Result = MFI.CreateStackObject(8, Align(8), false);
167
FI->setROPProtectionHashSaveIndex(Result);
168
}
169
SelectionDAGISel::runOnMachineFunction(MF);
170
171
return true;
172
}
173
174
void PreprocessISelDAG() override;
175
void PostprocessISelDAG() override;
176
177
/// getI16Imm - Return a target constant with the specified value, of type
178
/// i16.
179
inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
180
return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
181
}
182
183
/// getI32Imm - Return a target constant with the specified value, of type
184
/// i32.
185
inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
186
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
187
}
188
189
/// getI64Imm - Return a target constant with the specified value, of type
190
/// i64.
191
inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
192
return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
193
}
194
195
/// getSmallIPtrImm - Return a target constant of pointer type.
196
inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
197
return CurDAG->getTargetConstant(
198
Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
199
}
200
201
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
202
/// rotate and mask opcode and mask operation.
203
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
204
unsigned &SH, unsigned &MB, unsigned &ME);
205
206
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
207
/// base register. Return the virtual register that holds this value.
208
SDNode *getGlobalBaseReg();
209
210
void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
211
212
// Select - Convert the specified operand from a target-independent to a
213
// target-specific node if it hasn't already been changed.
214
void Select(SDNode *N) override;
215
216
bool tryBitfieldInsert(SDNode *N);
217
bool tryBitPermutation(SDNode *N);
218
bool tryIntCompareInGPR(SDNode *N);
219
220
// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
221
// an X-Form load instruction with the offset being a relocation coming from
222
// the PPCISD::ADD_TLS.
223
bool tryTLSXFormLoad(LoadSDNode *N);
224
// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
225
// an X-Form store instruction with the offset being a relocation coming from
226
// the PPCISD::ADD_TLS.
227
bool tryTLSXFormStore(StoreSDNode *N);
228
/// SelectCC - Select a comparison of the specified values with the
229
/// specified condition code, returning the CR# of the expression.
230
SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
231
const SDLoc &dl, SDValue Chain = SDValue());
232
233
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
234
/// immediate field. Note that the operand at this point is already the
235
/// result of a prior SelectAddressRegImm call.
236
bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
237
if (N.getOpcode() == ISD::TargetConstant ||
238
N.getOpcode() == ISD::TargetGlobalAddress) {
239
Out = N;
240
return true;
241
}
242
243
return false;
244
}
245
246
/// SelectDSForm - Returns true if address N can be represented by the
247
/// addressing mode of DSForm instructions (a base register, plus a signed
248
/// 16-bit displacement that is a multiple of 4.
249
bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
250
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
251
Align(4)) == PPC::AM_DSForm;
252
}
253
254
/// SelectDQForm - Returns true if address N can be represented by the
255
/// addressing mode of DQForm instructions (a base register, plus a signed
256
/// 16-bit displacement that is a multiple of 16.
257
bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
258
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
259
Align(16)) == PPC::AM_DQForm;
260
}
261
262
/// SelectDForm - Returns true if address N can be represented by
263
/// the addressing mode of DForm instructions (a base register, plus a
264
/// signed 16-bit immediate.
265
bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
266
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
267
std::nullopt) == PPC::AM_DForm;
268
}
269
270
/// SelectPCRelForm - Returns true if address N can be represented by
271
/// PC-Relative addressing mode.
272
bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
273
SDValue &Base) {
274
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
275
std::nullopt) == PPC::AM_PCRel;
276
}
277
278
/// SelectPDForm - Returns true if address N can be represented by Prefixed
279
/// DForm addressing mode (a base register, plus a signed 34-bit immediate.
280
bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
281
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
282
std::nullopt) ==
283
PPC::AM_PrefixDForm;
284
}
285
286
/// SelectXForm - Returns true if address N can be represented by the
287
/// addressing mode of XForm instructions (an indexed [r+r] operation).
288
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
289
return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
290
std::nullopt) == PPC::AM_XForm;
291
}
292
293
/// SelectForceXForm - Given the specified address, force it to be
294
/// represented as an indexed [r+r] operation (an XForm instruction).
295
bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
296
SDValue &Base) {
297
return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
298
PPC::AM_XForm;
299
}
300
301
/// SelectAddrIdx - Given the specified address, check to see if it can be
302
/// represented as an indexed [r+r] operation.
303
/// This is for xform instructions whose associated displacement form is D.
304
/// The last parameter \p 0 means associated D form has no requirment for 16
305
/// bit signed displacement.
306
/// Returns false if it can be represented by [r+imm], which are preferred.
307
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
308
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
309
std::nullopt);
310
}
311
312
/// SelectAddrIdx4 - Given the specified address, check to see if it can be
313
/// represented as an indexed [r+r] operation.
314
/// This is for xform instructions whose associated displacement form is DS.
315
/// The last parameter \p 4 means associated DS form 16 bit signed
316
/// displacement must be a multiple of 4.
317
/// Returns false if it can be represented by [r+imm], which are preferred.
318
bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
319
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
320
Align(4));
321
}
322
323
/// SelectAddrIdx16 - Given the specified address, check to see if it can be
324
/// represented as an indexed [r+r] operation.
325
/// This is for xform instructions whose associated displacement form is DQ.
326
/// The last parameter \p 16 means associated DQ form 16 bit signed
327
/// displacement must be a multiple of 16.
328
/// Returns false if it can be represented by [r+imm], which are preferred.
329
bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
330
return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
331
Align(16));
332
}
333
334
/// SelectAddrIdxOnly - Given the specified address, force it to be
335
/// represented as an indexed [r+r] operation.
336
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
337
return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
338
}
339
340
/// SelectAddrImm - Returns true if the address N can be represented by
341
/// a base register plus a signed 16-bit displacement [r+imm].
342
/// The last parameter \p 0 means D form has no requirment for 16 bit signed
343
/// displacement.
344
bool SelectAddrImm(SDValue N, SDValue &Disp,
345
SDValue &Base) {
346
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
347
std::nullopt);
348
}
349
350
/// SelectAddrImmX4 - Returns true if the address N can be represented by
351
/// a base register plus a signed 16-bit displacement that is a multiple of
352
/// 4 (last parameter). Suitable for use by STD and friends.
353
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
354
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
355
}
356
357
/// SelectAddrImmX16 - Returns true if the address N can be represented by
358
/// a base register plus a signed 16-bit displacement that is a multiple of
359
/// 16(last parameter). Suitable for use by STXV and friends.
360
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
361
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
362
Align(16));
363
}
364
365
/// SelectAddrImmX34 - Returns true if the address N can be represented by
366
/// a base register plus a signed 34-bit displacement. Suitable for use by
367
/// PSTXVP and friends.
368
bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
369
return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
370
}
371
372
// Select an address into a single register.
373
bool SelectAddr(SDValue N, SDValue &Base) {
374
Base = N;
375
return true;
376
}
377
378
bool SelectAddrPCRel(SDValue N, SDValue &Base) {
379
return PPCLowering->SelectAddressPCRel(N, Base);
380
}
381
382
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
383
/// inline asm expressions. It is always correct to compute the value into
384
/// a register. The case of adding a (possibly relocatable) constant to a
385
/// register can be improved, but it is wrong to substitute Reg+Reg for
386
/// Reg in an asm, because the load or store opcode would have to change.
387
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
388
InlineAsm::ConstraintCode ConstraintID,
389
std::vector<SDValue> &OutOps) override {
390
switch(ConstraintID) {
391
default:
392
errs() << "ConstraintID: "
393
<< InlineAsm::getMemConstraintName(ConstraintID) << "\n";
394
llvm_unreachable("Unexpected asm memory constraint");
395
case InlineAsm::ConstraintCode::es:
396
case InlineAsm::ConstraintCode::m:
397
case InlineAsm::ConstraintCode::o:
398
case InlineAsm::ConstraintCode::Q:
399
case InlineAsm::ConstraintCode::Z:
400
case InlineAsm::ConstraintCode::Zy:
401
// We need to make sure that this one operand does not end up in r0
402
// (because we might end up lowering this as 0(%op)).
403
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
404
const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
405
SDLoc dl(Op);
406
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
407
SDValue NewOp =
408
SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
409
dl, Op.getValueType(),
410
Op, RC), 0);
411
412
OutOps.push_back(NewOp);
413
return false;
414
}
415
return true;
416
}
417
418
// Include the pieces autogenerated from the target description.
419
#include "PPCGenDAGISel.inc"
420
421
private:
422
bool trySETCC(SDNode *N);
423
bool tryFoldSWTestBRCC(SDNode *N);
424
bool trySelectLoopCountIntrinsic(SDNode *N);
425
bool tryAsSingleRLDICL(SDNode *N);
426
bool tryAsSingleRLDCL(SDNode *N);
427
bool tryAsSingleRLDICR(SDNode *N);
428
bool tryAsSingleRLWINM(SDNode *N);
429
bool tryAsSingleRLWINM8(SDNode *N);
430
bool tryAsSingleRLWIMI(SDNode *N);
431
bool tryAsPairOfRLDICL(SDNode *N);
432
bool tryAsSingleRLDIMI(SDNode *N);
433
434
void PeepholePPC64();
435
void PeepholePPC64ZExt();
436
void PeepholeCROps();
437
438
SDValue combineToCMPB(SDNode *N);
439
void foldBoolExts(SDValue &Res, SDNode *&N);
440
441
bool AllUsersSelectZero(SDNode *N);
442
void SwapAllSelectUsers(SDNode *N);
443
444
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
445
void transferMemOperands(SDNode *N, SDNode *Result);
446
};
447
448
class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
449
public:
450
static char ID;
451
explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
452
CodeGenOptLevel OptLevel)
453
: SelectionDAGISelLegacy(
454
ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
455
};
456
} // end anonymous namespace
457
458
char PPCDAGToDAGISelLegacy::ID = 0;
459
460
INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
461
462
/// getGlobalBaseReg - Output the instructions required to put the
463
/// base address to use for accessing globals into a register.
464
///
465
SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
466
if (!GlobalBaseReg) {
467
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
468
// Insert the set of GlobalBaseReg into the first MBB of the function
469
MachineBasicBlock &FirstMBB = MF->front();
470
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
471
const Module *M = MF->getFunction().getParent();
472
DebugLoc dl;
473
474
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
475
if (Subtarget->isTargetELF()) {
476
GlobalBaseReg = PPC::R30;
477
if (!Subtarget->isSecurePlt() &&
478
M->getPICLevel() == PICLevel::SmallPIC) {
479
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
480
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
481
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
482
} else {
483
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
484
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
485
Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
486
BuildMI(FirstMBB, MBBI, dl,
487
TII.get(PPC::UpdateGBR), GlobalBaseReg)
488
.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
489
MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
490
}
491
} else {
492
GlobalBaseReg =
493
RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
494
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
495
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
496
}
497
} else {
498
// We must ensure that this sequence is dominated by the prologue.
499
// FIXME: This is a bit of a big hammer since we don't get the benefits
500
// of shrink-wrapping whenever we emit this instruction. Considering
501
// this is used in any function where we emit a jump table, this may be
502
// a significant limitation. We should consider inserting this in the
503
// block where it is used and then commoning this sequence up if it
504
// appears in multiple places.
505
// Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
506
// MovePCtoLR8.
507
MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
508
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
509
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
510
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
511
}
512
}
513
return CurDAG->getRegister(GlobalBaseReg,
514
PPCLowering->getPointerTy(CurDAG->getDataLayout()))
515
.getNode();
516
}
517
518
// Check if a SDValue has the toc-data attribute.
519
static bool hasTocDataAttr(SDValue Val) {
520
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
521
if (!GA)
522
return false;
523
524
const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
525
if (!GV)
526
return false;
527
528
if (!GV->hasAttribute("toc-data"))
529
return false;
530
return true;
531
}
532
533
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget,
534
const TargetMachine &TM,
535
const SDNode *Node) {
536
// If there isn't an attribute to override the module code model
537
// this will be the effective code model.
538
CodeModel::Model ModuleModel = TM.getCodeModel();
539
540
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
541
if (!GA)
542
return ModuleModel;
543
544
const GlobalValue *GV = GA->getGlobal();
545
if (!GV)
546
return ModuleModel;
547
548
return Subtarget.getCodeModel(TM, GV);
549
}
550
551
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
552
/// operand. If so Imm will receive the 32-bit value.
553
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
554
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
555
Imm = N->getAsZExtVal();
556
return true;
557
}
558
return false;
559
}
560
561
/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
562
/// operand. If so Imm will receive the 64-bit value.
563
static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
564
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
565
Imm = N->getAsZExtVal();
566
return true;
567
}
568
return false;
569
}
570
571
// isInt32Immediate - This method tests to see if a constant operand.
572
// If so Imm will receive the 32 bit value.
573
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
574
return isInt32Immediate(N.getNode(), Imm);
575
}
576
577
/// isInt64Immediate - This method tests to see if the value is a 64-bit
578
/// constant operand. If so Imm will receive the 64-bit value.
579
static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
580
return isInt64Immediate(N.getNode(), Imm);
581
}
582
583
static unsigned getBranchHint(unsigned PCC,
584
const FunctionLoweringInfo &FuncInfo,
585
const SDValue &DestMBB) {
586
assert(isa<BasicBlockSDNode>(DestMBB));
587
588
if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
589
590
const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
591
const Instruction *BBTerm = BB->getTerminator();
592
593
if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
594
595
const BasicBlock *TBB = BBTerm->getSuccessor(0);
596
const BasicBlock *FBB = BBTerm->getSuccessor(1);
597
598
auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
599
auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
600
601
// We only want to handle cases which are easy to predict at static time, e.g.
602
// C++ throw statement, that is very likely not taken, or calling never
603
// returned function, e.g. stdlib exit(). So we set Threshold to filter
604
// unwanted cases.
605
//
606
// Below is LLVM branch weight table, we only want to handle case 1, 2
607
//
608
// Case Taken:Nontaken Example
609
// 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
610
// 2. Invoke-terminating 1:1048575
611
// 3. Coldblock 4:64 __builtin_expect
612
// 4. Loop Branch 124:4 For loop
613
// 5. PH/ZH/FPH 20:12
614
const uint32_t Threshold = 10000;
615
616
if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
617
return PPC::BR_NO_HINT;
618
619
LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
620
<< "::" << BB->getName() << "'\n"
621
<< " -> " << TBB->getName() << ": " << TProb << "\n"
622
<< " -> " << FBB->getName() << ": " << FProb << "\n");
623
624
const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
625
626
// If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
627
// because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
628
if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
629
std::swap(TProb, FProb);
630
631
return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
632
}
633
634
// isOpcWithIntImmediate - This method tests to see if the node is a specific
635
// opcode and that it has a immediate integer right operand.
636
// If so Imm will receive the 32 bit value.
637
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
638
return N->getOpcode() == Opc
639
&& isInt32Immediate(N->getOperand(1).getNode(), Imm);
640
}
641
642
void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
643
SDLoc dl(SN);
644
int FI = cast<FrameIndexSDNode>(N)->getIndex();
645
SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
646
unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
647
if (SN->hasOneUse())
648
CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
649
getSmallIPtrImm(Offset, dl));
650
else
651
ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
652
getSmallIPtrImm(Offset, dl)));
653
}
654
655
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
656
bool isShiftMask, unsigned &SH,
657
unsigned &MB, unsigned &ME) {
658
// Don't even go down this path for i64, since different logic will be
659
// necessary for rldicl/rldicr/rldimi.
660
if (N->getValueType(0) != MVT::i32)
661
return false;
662
663
unsigned Shift = 32;
664
unsigned Indeterminant = ~0; // bit mask marking indeterminant results
665
unsigned Opcode = N->getOpcode();
666
if (N->getNumOperands() != 2 ||
667
!isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
668
return false;
669
670
if (Opcode == ISD::SHL) {
671
// apply shift left to mask if it comes first
672
if (isShiftMask) Mask = Mask << Shift;
673
// determine which bits are made indeterminant by shift
674
Indeterminant = ~(0xFFFFFFFFu << Shift);
675
} else if (Opcode == ISD::SRL) {
676
// apply shift right to mask if it comes first
677
if (isShiftMask) Mask = Mask >> Shift;
678
// determine which bits are made indeterminant by shift
679
Indeterminant = ~(0xFFFFFFFFu >> Shift);
680
// adjust for the left rotate
681
Shift = 32 - Shift;
682
} else if (Opcode == ISD::ROTL) {
683
Indeterminant = 0;
684
} else {
685
return false;
686
}
687
688
// if the mask doesn't intersect any Indeterminant bits
689
if (Mask && !(Mask & Indeterminant)) {
690
SH = Shift & 31;
691
// make sure the mask is still a mask (wrap arounds may not be)
692
return isRunOfOnes(Mask, MB, ME);
693
}
694
return false;
695
}
696
697
// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
698
// instruction use the thread pointer.
699
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) {
700
assert(
701
Base.getOpcode() == PPCISD::ADD_TLS &&
702
"Only expecting the ADD_TLS instruction to acquire the thread pointer!");
703
const PPCSubtarget &Subtarget =
704
CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
705
SDValue ADDTLSOp1 = Base.getOperand(0);
706
unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
707
708
// Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
709
//
710
// Although ADD_TLS does not explicitly use the thread pointer
711
// register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
712
// instruction will have a relocation specifier, @got@tprel, that is used to
713
// generate a GOT entry. The linker replaces this entry with an offset for a
714
// for a thread local variable, which will be relative to the thread pointer.
715
if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
716
return true;
717
// When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
718
// node is produced instead to represent the aforementioned situation.
719
LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
720
if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
721
return true;
722
723
// A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
724
// to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
725
// later returning it into R3.
726
if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
727
return true;
728
729
// The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
730
RegisterSDNode *AddFirstOpReg =
731
dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
732
if (AddFirstOpReg &&
733
AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
734
return true;
735
736
return false;
737
}
738
739
// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
740
// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
741
// operation, can be optimized to use an X-Form load or store, allowing the
742
// ADD_TLS node to be removed completely.
743
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) {
744
745
// Do not do this transformation at -O0.
746
if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
747
return false;
748
749
// In order to perform this optimization inside tryTLSXForm[Load|Store],
750
// Base is expected to be an ADD_TLS node.
751
if (Base.getOpcode() != PPCISD::ADD_TLS)
752
return false;
753
for (auto *ADDTLSUse : Base.getNode()->uses()) {
754
// The optimization to convert the D-Form load/store into its X-Form
755
// counterpart should only occur if the source value offset of the load/
756
// store is 0. This also means that The offset should always be undefined.
757
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
758
if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
759
return false;
760
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
761
if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
762
return false;
763
} else // Don't optimize if there are ADD_TLS users that aren't load/stores.
764
return false;
765
}
766
767
if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
768
return false;
769
770
// Does the ADD_TLS node of the load/store use the thread pointer?
771
// If the thread pointer is not used as one of the operands of ADD_TLS,
772
// then this optimization is not valid.
773
return isThreadPointerAcquisitionNode(Base, CurDAG);
774
}
775
776
bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
777
SDValue Base = ST->getBasePtr();
778
if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
779
return false;
780
781
SDLoc dl(ST);
782
EVT MemVT = ST->getMemoryVT();
783
EVT RegVT = ST->getValue().getValueType();
784
785
unsigned Opcode;
786
switch (MemVT.getSimpleVT().SimpleTy) {
787
default:
788
return false;
789
case MVT::i8: {
790
Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
791
break;
792
}
793
case MVT::i16: {
794
Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
795
break;
796
}
797
case MVT::i32: {
798
Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
799
break;
800
}
801
case MVT::i64: {
802
Opcode = PPC::STDXTLS;
803
break;
804
}
805
case MVT::f32: {
806
Opcode = PPC::STFSXTLS;
807
break;
808
}
809
case MVT::f64: {
810
Opcode = PPC::STFDXTLS;
811
break;
812
}
813
}
814
SDValue Chain = ST->getChain();
815
SDVTList VTs = ST->getVTList();
816
SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
817
Chain};
818
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
819
transferMemOperands(ST, MN);
820
ReplaceNode(ST, MN);
821
return true;
822
}
823
824
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
825
SDValue Base = LD->getBasePtr();
826
if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
827
return false;
828
829
SDLoc dl(LD);
830
EVT MemVT = LD->getMemoryVT();
831
EVT RegVT = LD->getValueType(0);
832
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
833
unsigned Opcode;
834
switch (MemVT.getSimpleVT().SimpleTy) {
835
default:
836
return false;
837
case MVT::i8: {
838
Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
839
break;
840
}
841
case MVT::i16: {
842
if (RegVT == MVT::i32)
843
Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
844
else
845
Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
846
break;
847
}
848
case MVT::i32: {
849
if (RegVT == MVT::i32)
850
Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
851
else
852
Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
853
break;
854
}
855
case MVT::i64: {
856
Opcode = PPC::LDXTLS;
857
break;
858
}
859
case MVT::f32: {
860
Opcode = PPC::LFSXTLS;
861
break;
862
}
863
case MVT::f64: {
864
Opcode = PPC::LFDXTLS;
865
break;
866
}
867
}
868
SDValue Chain = LD->getChain();
869
SDVTList VTs = LD->getVTList();
870
SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
871
SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
872
transferMemOperands(LD, MN);
873
ReplaceNode(LD, MN);
874
return true;
875
}
876
877
/// Turn an or of two masked values into the rotate left word immediate then
878
/// mask insert (rlwimi) instruction.
879
bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
880
SDValue Op0 = N->getOperand(0);
881
SDValue Op1 = N->getOperand(1);
882
SDLoc dl(N);
883
884
KnownBits LKnown = CurDAG->computeKnownBits(Op0);
885
KnownBits RKnown = CurDAG->computeKnownBits(Op1);
886
887
unsigned TargetMask = LKnown.Zero.getZExtValue();
888
unsigned InsertMask = RKnown.Zero.getZExtValue();
889
890
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
891
unsigned Op0Opc = Op0.getOpcode();
892
unsigned Op1Opc = Op1.getOpcode();
893
unsigned Value, SH = 0;
894
TargetMask = ~TargetMask;
895
InsertMask = ~InsertMask;
896
897
// If the LHS has a foldable shift and the RHS does not, then swap it to the
898
// RHS so that we can fold the shift into the insert.
899
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
900
if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
901
Op0.getOperand(0).getOpcode() == ISD::SRL) {
902
if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
903
Op1.getOperand(0).getOpcode() != ISD::SRL) {
904
std::swap(Op0, Op1);
905
std::swap(Op0Opc, Op1Opc);
906
std::swap(TargetMask, InsertMask);
907
}
908
}
909
} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
910
if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
911
Op1.getOperand(0).getOpcode() != ISD::SRL) {
912
std::swap(Op0, Op1);
913
std::swap(Op0Opc, Op1Opc);
914
std::swap(TargetMask, InsertMask);
915
}
916
}
917
918
unsigned MB, ME;
919
if (isRunOfOnes(InsertMask, MB, ME)) {
920
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
921
isInt32Immediate(Op1.getOperand(1), Value)) {
922
Op1 = Op1.getOperand(0);
923
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
924
}
925
if (Op1Opc == ISD::AND) {
926
// The AND mask might not be a constant, and we need to make sure that
927
// if we're going to fold the masking with the insert, all bits not
928
// know to be zero in the mask are known to be one.
929
KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
930
bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
931
932
unsigned SHOpc = Op1.getOperand(0).getOpcode();
933
if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
934
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
935
// Note that Value must be in range here (less than 32) because
936
// otherwise there would not be any bits set in InsertMask.
937
Op1 = Op1.getOperand(0).getOperand(0);
938
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
939
}
940
}
941
942
SH &= 31;
943
SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
944
getI32Imm(ME, dl) };
945
ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
946
return true;
947
}
948
}
949
return false;
950
}
951
952
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
953
unsigned MaxTruncation = 0;
954
// Cannot use range-based for loop here as we need the actual use (i.e. we
955
// need the operand number corresponding to the use). A range-based for
956
// will unbox the use and provide an SDNode*.
957
for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
958
Use != UseEnd; ++Use) {
959
unsigned Opc =
960
Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
961
switch (Opc) {
962
default: return 0;
963
case ISD::TRUNCATE:
964
if (Use->isMachineOpcode())
965
return 0;
966
MaxTruncation =
967
std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
968
continue;
969
case ISD::STORE: {
970
if (Use->isMachineOpcode())
971
return 0;
972
StoreSDNode *STN = cast<StoreSDNode>(*Use);
973
unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
974
if (MemVTSize == 64 || Use.getOperandNo() != 0)
975
return 0;
976
MaxTruncation = std::max(MaxTruncation, MemVTSize);
977
continue;
978
}
979
case PPC::STW8:
980
case PPC::STWX8:
981
case PPC::STWU8:
982
case PPC::STWUX8:
983
if (Use.getOperandNo() != 0)
984
return 0;
985
MaxTruncation = std::max(MaxTruncation, 32u);
986
continue;
987
case PPC::STH8:
988
case PPC::STHX8:
989
case PPC::STHU8:
990
case PPC::STHUX8:
991
if (Use.getOperandNo() != 0)
992
return 0;
993
MaxTruncation = std::max(MaxTruncation, 16u);
994
continue;
995
case PPC::STB8:
996
case PPC::STBX8:
997
case PPC::STBU8:
998
case PPC::STBUX8:
999
if (Use.getOperandNo() != 0)
1000
return 0;
1001
MaxTruncation = std::max(MaxTruncation, 8u);
1002
continue;
1003
}
1004
}
1005
return MaxTruncation;
1006
}
1007
1008
// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1009
// zeros and return the number of bits by the left of these consecutive zeros.
1010
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1011
unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1012
unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1013
if ((HiTZ + LoLZ) >= Num)
1014
return (32 + HiTZ);
1015
return 0;
1016
}
1017
1018
// Direct materialization of 64-bit constants by enumerated patterns.
1019
static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1020
uint64_t Imm, unsigned &InstCnt) {
1021
unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1022
unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1023
unsigned TO = llvm::countr_one<uint64_t>(Imm);
1024
unsigned LO = llvm::countl_one<uint64_t>(Imm);
1025
unsigned Hi32 = Hi_32(Imm);
1026
unsigned Lo32 = Lo_32(Imm);
1027
SDNode *Result = nullptr;
1028
unsigned Shift = 0;
1029
1030
auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1031
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1032
};
1033
1034
// Following patterns use 1 instructions to materialize the Imm.
1035
InstCnt = 1;
1036
// 1-1) Patterns : {zeros}{15-bit valve}
1037
// {ones}{15-bit valve}
1038
if (isInt<16>(Imm)) {
1039
SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1040
return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1041
}
1042
// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1043
// {ones}{15-bit valve}{16 zeros}
1044
if (TZ > 15 && (LZ > 32 || LO > 32))
1045
return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1046
getI32Imm((Imm >> 16) & 0xffff));
1047
1048
// Following patterns use 2 instructions to materialize the Imm.
1049
InstCnt = 2;
1050
assert(LZ < 64 && "Unexpected leading zeros here.");
1051
// Count of ones follwing the leading zeros.
1052
unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1053
// 2-1) Patterns : {zeros}{31-bit value}
1054
// {ones}{31-bit value}
1055
if (isInt<32>(Imm)) {
1056
uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1057
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1058
Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1059
return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1060
getI32Imm(Imm & 0xffff));
1061
}
1062
// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1063
// {zeros}{15-bit value}{zeros}
1064
// {zeros}{ones}{15-bit value}
1065
// {ones}{15-bit value}{zeros}
1066
// We can take advantage of LI's sign-extension semantics to generate leading
1067
// ones, and then use RLDIC to mask off the ones in both sides after rotation.
1068
if ((LZ + FO + TZ) > 48) {
1069
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1070
getI32Imm((Imm >> TZ) & 0xffff));
1071
return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1072
getI32Imm(TZ), getI32Imm(LZ));
1073
}
1074
// 2-3) Pattern : {zeros}{15-bit value}{ones}
1075
// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1076
// therefore we can take advantage of LI's sign-extension semantics, and then
1077
// mask them off after rotation.
1078
//
1079
// +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1080
// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1081
// +------------------------+ +------------------------+
1082
// 63 0 63 0
1083
// Imm (Imm >> (48 - LZ) & 0xffff)
1084
// +----sext-----|--16-bit--+ +clear-|-----------------+
1085
// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1086
// +------------------------+ +------------------------+
1087
// 63 0 63 0
1088
// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1089
if ((LZ + TO) > 48) {
1090
// Since the immediates with (LZ > 32) have been handled by previous
1091
// patterns, here we have (LZ <= 32) to make sure we will not shift right
1092
// the Imm by a negative value.
1093
assert(LZ <= 32 && "Unexpected shift value.");
1094
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1095
getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1096
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1097
getI32Imm(48 - LZ), getI32Imm(LZ));
1098
}
1099
// 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1100
// {ones}{15-bit value}{ones}
1101
// We can take advantage of LI's sign-extension semantics to generate leading
1102
// ones, and then use RLDICL to mask off the ones in left sides (if required)
1103
// after rotation.
1104
//
1105
// +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1106
// |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1107
// +------------------------+ +------------------------+
1108
// 63 0 63 0
1109
// Imm (Imm >> TO) & 0xffff
1110
// +----sext-----|--16-bit--+ +LZ|---------------------+
1111
// |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1112
// +------------------------+ +------------------------+
1113
// 63 0 63 0
1114
// LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1115
if ((LZ + FO + TO) > 48) {
1116
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1117
getI32Imm((Imm >> TO) & 0xffff));
1118
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1119
getI32Imm(TO), getI32Imm(LZ));
1120
}
1121
// 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1122
// If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1123
// value, we can use LI for Lo16 without generating leading ones then add the
1124
// Hi16(in Lo32).
1125
if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1126
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1127
getI32Imm(Lo32 & 0xffff));
1128
return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1129
getI32Imm(Lo32 >> 16));
1130
}
1131
// 2-6) Patterns : {******}{49 zeros}{******}
1132
// {******}{49 ones}{******}
1133
// If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1134
// bits remain on both sides. Rotate right the Imm to construct an int<16>
1135
// value, use LI for int<16> value and then use RLDICL without mask to rotate
1136
// it back.
1137
//
1138
// 1) findContiguousZerosAtLeast(Imm, 49)
1139
// +------|--zeros-|------+ +---ones--||---15 bit--+
1140
// |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1141
// +----------------------+ +----------------------+
1142
// 63 0 63 0
1143
//
1144
// 2) findContiguousZerosAtLeast(~Imm, 49)
1145
// +------|--ones--|------+ +---ones--||---15 bit--+
1146
// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1147
// +----------------------+ +----------------------+
1148
// 63 0 63 0
1149
if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1150
(Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1151
uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1152
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1153
getI32Imm(RotImm & 0xffff));
1154
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1155
getI32Imm(Shift), getI32Imm(0));
1156
}
1157
// 2-7) Patterns : High word == Low word
1158
// This may require 2 to 3 instructions, depending on whether Lo32 can be
1159
// materialized in 1 instruction.
1160
if (Hi32 == Lo32) {
1161
// Handle the first 32 bits.
1162
uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1163
uint64_t ImmLo16 = Lo32 & 0xffff;
1164
if (isInt<16>(Lo32))
1165
Result =
1166
CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1167
else if (!ImmLo16)
1168
Result =
1169
CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1170
else {
1171
InstCnt = 3;
1172
Result =
1173
CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1174
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1175
SDValue(Result, 0), getI32Imm(ImmLo16));
1176
}
1177
// Use rldimi to insert the Low word into High word.
1178
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1179
getI32Imm(0)};
1180
return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1181
}
1182
1183
// Following patterns use 3 instructions to materialize the Imm.
1184
InstCnt = 3;
1185
// 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1186
// {zeros}{31-bit value}{zeros}
1187
// {zeros}{ones}{31-bit value}
1188
// {ones}{31-bit value}{zeros}
1189
// We can take advantage of LIS's sign-extension semantics to generate leading
1190
// ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1191
// ones in both sides after rotation.
1192
if ((LZ + FO + TZ) > 32) {
1193
uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1194
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1195
Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1196
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1197
getI32Imm((Imm >> TZ) & 0xffff));
1198
return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1199
getI32Imm(TZ), getI32Imm(LZ));
1200
}
1201
// 3-2) Pattern : {zeros}{31-bit value}{ones}
1202
// Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1203
// value, therefore we can take advantage of LIS's sign-extension semantics,
1204
// add the remaining bits with ORI, and then mask them off after rotation.
1205
// This is similar to Pattern 2-3, please refer to the diagram there.
1206
if ((LZ + TO) > 32) {
1207
// Since the immediates with (LZ > 32) have been handled by previous
1208
// patterns, here we have (LZ <= 32) to make sure we will not shift right
1209
// the Imm by a negative value.
1210
assert(LZ <= 32 && "Unexpected shift value.");
1211
Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1212
getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1213
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1214
getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1215
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1216
getI32Imm(32 - LZ), getI32Imm(LZ));
1217
}
1218
// 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1219
// {ones}{31-bit value}{ones}
1220
// We can take advantage of LIS's sign-extension semantics to generate leading
1221
// ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1222
// ones in left sides (if required) after rotation.
1223
// This is similar to Pattern 2-4, please refer to the diagram there.
1224
if ((LZ + FO + TO) > 32) {
1225
Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1226
getI32Imm((Imm >> (TO + 16)) & 0xffff));
1227
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1228
getI32Imm((Imm >> TO) & 0xffff));
1229
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1230
getI32Imm(TO), getI32Imm(LZ));
1231
}
1232
// 3-4) Patterns : {******}{33 zeros}{******}
1233
// {******}{33 ones}{******}
1234
// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1235
// bits remain on both sides. Rotate right the Imm to construct an int<32>
1236
// value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1237
// rotate it back.
1238
// This is similar to Pattern 2-6, please refer to the diagram there.
1239
if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1240
(Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1241
uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1242
uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1243
unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1244
Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1245
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1246
getI32Imm(RotImm & 0xffff));
1247
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1248
getI32Imm(Shift), getI32Imm(0));
1249
}
1250
1251
InstCnt = 0;
1252
return nullptr;
1253
}
1254
1255
// Try to select instructions to generate a 64 bit immediate using prefix as
1256
// well as non prefix instructions. The function will return the SDNode
1257
// to materialize that constant or it will return nullptr if it does not
1258
// find one. The variable InstCnt is set to the number of instructions that
1259
// were selected.
1260
static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
1261
uint64_t Imm, unsigned &InstCnt) {
1262
unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1263
unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1264
unsigned TO = llvm::countr_one<uint64_t>(Imm);
1265
unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1266
unsigned Hi32 = Hi_32(Imm);
1267
unsigned Lo32 = Lo_32(Imm);
1268
1269
auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1270
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1271
};
1272
1273
auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1274
return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1275
};
1276
1277
// Following patterns use 1 instruction to materialize Imm.
1278
InstCnt = 1;
1279
1280
// The pli instruction can materialize up to 34 bits directly.
1281
// If a constant fits within 34-bits, emit the pli instruction here directly.
1282
if (isInt<34>(Imm))
1283
return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1284
CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1285
1286
// Require at least two instructions.
1287
InstCnt = 2;
1288
SDNode *Result = nullptr;
1289
// Patterns : {zeros}{ones}{33-bit value}{zeros}
1290
// {zeros}{33-bit value}{zeros}
1291
// {zeros}{ones}{33-bit value}
1292
// {ones}{33-bit value}{zeros}
1293
// We can take advantage of PLI's sign-extension semantics to generate leading
1294
// ones, and then use RLDIC to mask off the ones on both sides after rotation.
1295
if ((LZ + FO + TZ) > 30) {
1296
APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1297
APInt Extended = SignedInt34.sext(64);
1298
Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1299
getI64Imm(*Extended.getRawData()));
1300
return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1301
getI32Imm(TZ), getI32Imm(LZ));
1302
}
1303
// Pattern : {zeros}{33-bit value}{ones}
1304
// Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1305
// therefore we can take advantage of PLI's sign-extension semantics, and then
1306
// mask them off after rotation.
1307
//
1308
// +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1309
// |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1310
// +------------------------+ +------------------------+
1311
// 63 0 63 0
1312
//
1313
// +----sext-----|--34-bit--+ +clear-|-----------------+
1314
// |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1315
// +------------------------+ +------------------------+
1316
// 63 0 63 0
1317
if ((LZ + TO) > 30) {
1318
APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1319
APInt Extended = SignedInt34.sext(64);
1320
Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1321
getI64Imm(*Extended.getRawData()));
1322
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1323
getI32Imm(30 - LZ), getI32Imm(LZ));
1324
}
1325
// Patterns : {zeros}{ones}{33-bit value}{ones}
1326
// {ones}{33-bit value}{ones}
1327
// Similar to LI we can take advantage of PLI's sign-extension semantics to
1328
// generate leading ones, and then use RLDICL to mask off the ones in left
1329
// sides (if required) after rotation.
1330
if ((LZ + FO + TO) > 30) {
1331
APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1332
APInt Extended = SignedInt34.sext(64);
1333
Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1334
getI64Imm(*Extended.getRawData()));
1335
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1336
getI32Imm(TO), getI32Imm(LZ));
1337
}
1338
// Patterns : {******}{31 zeros}{******}
1339
// : {******}{31 ones}{******}
1340
// If Imm contains 31 consecutive zeros/ones then the remaining bit count
1341
// is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1342
// for the int<33> value and then use RLDICL without a mask to rotate it back.
1343
//
1344
// +------|--ones--|------+ +---ones--||---33 bit--+
1345
// |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1346
// +----------------------+ +----------------------+
1347
// 63 0 63 0
1348
for (unsigned Shift = 0; Shift < 63; ++Shift) {
1349
uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1350
if (isInt<34>(RotImm)) {
1351
Result =
1352
CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1353
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1354
SDValue(Result, 0), getI32Imm(Shift),
1355
getI32Imm(0));
1356
}
1357
}
1358
1359
// Patterns : High word == Low word
1360
// This is basically a splat of a 32 bit immediate.
1361
if (Hi32 == Lo32) {
1362
Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1363
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1364
getI32Imm(0)};
1365
return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1366
}
1367
1368
InstCnt = 3;
1369
// Catch-all
1370
// This pattern can form any 64 bit immediate in 3 instructions.
1371
SDNode *ResultHi =
1372
CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1373
SDNode *ResultLo =
1374
CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1375
SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1376
getI32Imm(0)};
1377
return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1378
}
1379
1380
static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1381
unsigned *InstCnt = nullptr) {
1382
unsigned InstCntDirect = 0;
1383
// No more than 3 instructions are used if we can select the i64 immediate
1384
// directly.
1385
SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1386
1387
const PPCSubtarget &Subtarget =
1388
CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
1389
1390
// If we have prefixed instructions and there is a chance we can
1391
// materialize the constant with fewer prefixed instructions than
1392
// non-prefixed, try that.
1393
if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1394
unsigned InstCntDirectP = 0;
1395
SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1396
// Use the prefix case in either of two cases:
1397
// 1) We have no result from the non-prefix case to use.
1398
// 2) The non-prefix case uses more instructions than the prefix case.
1399
// If the prefix and non-prefix cases use the same number of instructions
1400
// we will prefer the non-prefix case.
1401
if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1402
if (InstCnt)
1403
*InstCnt = InstCntDirectP;
1404
return ResultP;
1405
}
1406
}
1407
1408
if (Result) {
1409
if (InstCnt)
1410
*InstCnt = InstCntDirect;
1411
return Result;
1412
}
1413
auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1414
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1415
};
1416
1417
uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1418
uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1419
1420
// Try to use 4 instructions to materialize the immediate which is "almost" a
1421
// splat of a 32 bit immediate.
1422
if (Hi16OfLo32 && Lo16OfLo32) {
1423
uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1424
uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1425
bool IsSelected = false;
1426
1427
auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1428
SDNode *Result =
1429
CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1430
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1431
SDValue(Result, 0), getI32Imm(Lo16));
1432
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1433
getI32Imm(0)};
1434
return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1435
};
1436
1437
if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1438
IsSelected = true;
1439
Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1440
// Modify Hi16OfHi32.
1441
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1442
getI32Imm(0)};
1443
Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1444
} else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1445
IsSelected = true;
1446
Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1447
// Modify Lo16OfLo32.
1448
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1449
getI32Imm(16), getI32Imm(31)};
1450
Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1451
} else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1452
IsSelected = true;
1453
Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1454
// Modify Hi16OfLo32.
1455
SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1456
getI32Imm(0), getI32Imm(15)};
1457
Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1458
}
1459
if (IsSelected == true) {
1460
if (InstCnt)
1461
*InstCnt = 4;
1462
return Result;
1463
}
1464
}
1465
1466
// Handle the upper 32 bit value.
1467
Result =
1468
selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1469
// Add in the last bits as required.
1470
if (Hi16OfLo32) {
1471
Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1472
SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1473
++InstCntDirect;
1474
}
1475
if (Lo16OfLo32) {
1476
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1477
getI32Imm(Lo16OfLo32));
1478
++InstCntDirect;
1479
}
1480
if (InstCnt)
1481
*InstCnt = InstCntDirect;
1482
return Result;
1483
}
1484
1485
// Select a 64-bit constant.
1486
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1487
SDLoc dl(N);
1488
1489
// Get 64 bit value.
1490
int64_t Imm = N->getAsZExtVal();
1491
if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1492
uint64_t SextImm = SignExtend64(Imm, MinSize);
1493
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1494
if (isInt<16>(SextImm))
1495
return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1496
}
1497
return selectI64Imm(CurDAG, dl, Imm);
1498
}
1499
1500
namespace {
1501
1502
class BitPermutationSelector {
1503
struct ValueBit {
1504
SDValue V;
1505
1506
// The bit number in the value, using a convention where bit 0 is the
1507
// lowest-order bit.
1508
unsigned Idx;
1509
1510
// ConstZero means a bit we need to mask off.
1511
// Variable is a bit comes from an input variable.
1512
// VariableKnownToBeZero is also a bit comes from an input variable,
1513
// but it is known to be already zero. So we do not need to mask them.
1514
enum Kind {
1515
ConstZero,
1516
Variable,
1517
VariableKnownToBeZero
1518
} K;
1519
1520
ValueBit(SDValue V, unsigned I, Kind K = Variable)
1521
: V(V), Idx(I), K(K) {}
1522
ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1523
1524
bool isZero() const {
1525
return K == ConstZero || K == VariableKnownToBeZero;
1526
}
1527
1528
bool hasValue() const {
1529
return K == Variable || K == VariableKnownToBeZero;
1530
}
1531
1532
SDValue getValue() const {
1533
assert(hasValue() && "Cannot get the value of a constant bit");
1534
return V;
1535
}
1536
1537
unsigned getValueBitIndex() const {
1538
assert(hasValue() && "Cannot get the value bit index of a constant bit");
1539
return Idx;
1540
}
1541
};
1542
1543
// A bit group has the same underlying value and the same rotate factor.
1544
struct BitGroup {
1545
SDValue V;
1546
unsigned RLAmt;
1547
unsigned StartIdx, EndIdx;
1548
1549
// This rotation amount assumes that the lower 32 bits of the quantity are
1550
// replicated in the high 32 bits by the rotation operator (which is done
1551
// by rlwinm and friends in 64-bit mode).
1552
bool Repl32;
1553
// Did converting to Repl32 == true change the rotation factor? If it did,
1554
// it decreased it by 32.
1555
bool Repl32CR;
1556
// Was this group coalesced after setting Repl32 to true?
1557
bool Repl32Coalesced;
1558
1559
BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1560
: V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1561
Repl32Coalesced(false) {
1562
LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1563
<< " [" << S << ", " << E << "]\n");
1564
}
1565
};
1566
1567
// Information on each (Value, RLAmt) pair (like the number of groups
1568
// associated with each) used to choose the lowering method.
1569
struct ValueRotInfo {
1570
SDValue V;
1571
unsigned RLAmt = std::numeric_limits<unsigned>::max();
1572
unsigned NumGroups = 0;
1573
unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1574
bool Repl32 = false;
1575
1576
ValueRotInfo() = default;
1577
1578
// For sorting (in reverse order) by NumGroups, and then by
1579
// FirstGroupStartIdx.
1580
bool operator < (const ValueRotInfo &Other) const {
1581
// We need to sort so that the non-Repl32 come first because, when we're
1582
// doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1583
// masking operation.
1584
if (Repl32 < Other.Repl32)
1585
return true;
1586
else if (Repl32 > Other.Repl32)
1587
return false;
1588
else if (NumGroups > Other.NumGroups)
1589
return true;
1590
else if (NumGroups < Other.NumGroups)
1591
return false;
1592
else if (RLAmt == 0 && Other.RLAmt != 0)
1593
return true;
1594
else if (RLAmt != 0 && Other.RLAmt == 0)
1595
return false;
1596
else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1597
return true;
1598
return false;
1599
}
1600
};
1601
1602
using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1603
using ValueBitsMemoizer =
1604
DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1605
ValueBitsMemoizer Memoizer;
1606
1607
// Return a pair of bool and a SmallVector pointer to a memoization entry.
1608
// The bool is true if something interesting was deduced, otherwise if we're
1609
// providing only a generic representation of V (or something else likewise
1610
// uninteresting for instruction selection) through the SmallVector.
1611
std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1612
unsigned NumBits) {
1613
auto &ValueEntry = Memoizer[V];
1614
if (ValueEntry)
1615
return std::make_pair(ValueEntry->first, &ValueEntry->second);
1616
ValueEntry.reset(new ValueBitsMemoizedValue());
1617
bool &Interesting = ValueEntry->first;
1618
SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1619
Bits.resize(NumBits);
1620
1621
switch (V.getOpcode()) {
1622
default: break;
1623
case ISD::ROTL:
1624
if (isa<ConstantSDNode>(V.getOperand(1))) {
1625
assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1626
unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1627
1628
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1629
1630
for (unsigned i = 0; i < NumBits; ++i)
1631
Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1632
1633
return std::make_pair(Interesting = true, &Bits);
1634
}
1635
break;
1636
case ISD::SHL:
1637
case PPCISD::SHL:
1638
if (isa<ConstantSDNode>(V.getOperand(1))) {
1639
// sld takes 7 bits, slw takes 6.
1640
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1641
1642
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1643
1644
if (ShiftAmt >= NumBits) {
1645
for (unsigned i = 0; i < NumBits; ++i)
1646
Bits[i] = ValueBit(ValueBit::ConstZero);
1647
} else {
1648
for (unsigned i = ShiftAmt; i < NumBits; ++i)
1649
Bits[i] = LHSBits[i - ShiftAmt];
1650
for (unsigned i = 0; i < ShiftAmt; ++i)
1651
Bits[i] = ValueBit(ValueBit::ConstZero);
1652
}
1653
1654
return std::make_pair(Interesting = true, &Bits);
1655
}
1656
break;
1657
case ISD::SRL:
1658
case PPCISD::SRL:
1659
if (isa<ConstantSDNode>(V.getOperand(1))) {
1660
// srd takes lowest 7 bits, srw takes 6.
1661
unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1662
1663
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1664
1665
if (ShiftAmt >= NumBits) {
1666
for (unsigned i = 0; i < NumBits; ++i)
1667
Bits[i] = ValueBit(ValueBit::ConstZero);
1668
} else {
1669
for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1670
Bits[i] = LHSBits[i + ShiftAmt];
1671
for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1672
Bits[i] = ValueBit(ValueBit::ConstZero);
1673
}
1674
1675
return std::make_pair(Interesting = true, &Bits);
1676
}
1677
break;
1678
case ISD::AND:
1679
if (isa<ConstantSDNode>(V.getOperand(1))) {
1680
uint64_t Mask = V.getConstantOperandVal(1);
1681
1682
const SmallVector<ValueBit, 64> *LHSBits;
1683
// Mark this as interesting, only if the LHS was also interesting. This
1684
// prevents the overall procedure from matching a single immediate 'and'
1685
// (which is non-optimal because such an and might be folded with other
1686
// things if we don't select it here).
1687
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1688
1689
for (unsigned i = 0; i < NumBits; ++i)
1690
if (((Mask >> i) & 1) == 1)
1691
Bits[i] = (*LHSBits)[i];
1692
else {
1693
// AND instruction masks this bit. If the input is already zero,
1694
// we have nothing to do here. Otherwise, make the bit ConstZero.
1695
if ((*LHSBits)[i].isZero())
1696
Bits[i] = (*LHSBits)[i];
1697
else
1698
Bits[i] = ValueBit(ValueBit::ConstZero);
1699
}
1700
1701
return std::make_pair(Interesting, &Bits);
1702
}
1703
break;
1704
case ISD::OR: {
1705
const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1706
const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1707
1708
bool AllDisjoint = true;
1709
SDValue LastVal = SDValue();
1710
unsigned LastIdx = 0;
1711
for (unsigned i = 0; i < NumBits; ++i) {
1712
if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1713
// If both inputs are known to be zero and one is ConstZero and
1714
// another is VariableKnownToBeZero, we can select whichever
1715
// we like. To minimize the number of bit groups, we select
1716
// VariableKnownToBeZero if this bit is the next bit of the same
1717
// input variable from the previous bit. Otherwise, we select
1718
// ConstZero.
1719
if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1720
LHSBits[i].getValueBitIndex() == LastIdx + 1)
1721
Bits[i] = LHSBits[i];
1722
else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1723
RHSBits[i].getValueBitIndex() == LastIdx + 1)
1724
Bits[i] = RHSBits[i];
1725
else
1726
Bits[i] = ValueBit(ValueBit::ConstZero);
1727
}
1728
else if (LHSBits[i].isZero())
1729
Bits[i] = RHSBits[i];
1730
else if (RHSBits[i].isZero())
1731
Bits[i] = LHSBits[i];
1732
else {
1733
AllDisjoint = false;
1734
break;
1735
}
1736
// We remember the value and bit index of this bit.
1737
if (Bits[i].hasValue()) {
1738
LastVal = Bits[i].getValue();
1739
LastIdx = Bits[i].getValueBitIndex();
1740
}
1741
else {
1742
if (LastVal) LastVal = SDValue();
1743
LastIdx = 0;
1744
}
1745
}
1746
1747
if (!AllDisjoint)
1748
break;
1749
1750
return std::make_pair(Interesting = true, &Bits);
1751
}
1752
case ISD::ZERO_EXTEND: {
1753
// We support only the case with zero extension from i32 to i64 so far.
1754
if (V.getValueType() != MVT::i64 ||
1755
V.getOperand(0).getValueType() != MVT::i32)
1756
break;
1757
1758
const SmallVector<ValueBit, 64> *LHSBits;
1759
const unsigned NumOperandBits = 32;
1760
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1761
NumOperandBits);
1762
1763
for (unsigned i = 0; i < NumOperandBits; ++i)
1764
Bits[i] = (*LHSBits)[i];
1765
1766
for (unsigned i = NumOperandBits; i < NumBits; ++i)
1767
Bits[i] = ValueBit(ValueBit::ConstZero);
1768
1769
return std::make_pair(Interesting, &Bits);
1770
}
1771
case ISD::TRUNCATE: {
1772
EVT FromType = V.getOperand(0).getValueType();
1773
EVT ToType = V.getValueType();
1774
// We support only the case with truncate from i64 to i32.
1775
if (FromType != MVT::i64 || ToType != MVT::i32)
1776
break;
1777
const unsigned NumAllBits = FromType.getSizeInBits();
1778
SmallVector<ValueBit, 64> *InBits;
1779
std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1780
NumAllBits);
1781
const unsigned NumValidBits = ToType.getSizeInBits();
1782
1783
// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1784
// So, we cannot include this truncate.
1785
bool UseUpper32bit = false;
1786
for (unsigned i = 0; i < NumValidBits; ++i)
1787
if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1788
UseUpper32bit = true;
1789
break;
1790
}
1791
if (UseUpper32bit)
1792
break;
1793
1794
for (unsigned i = 0; i < NumValidBits; ++i)
1795
Bits[i] = (*InBits)[i];
1796
1797
return std::make_pair(Interesting, &Bits);
1798
}
1799
case ISD::AssertZext: {
1800
// For AssertZext, we look through the operand and
1801
// mark the bits known to be zero.
1802
const SmallVector<ValueBit, 64> *LHSBits;
1803
std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1804
NumBits);
1805
1806
EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1807
const unsigned NumValidBits = FromType.getSizeInBits();
1808
for (unsigned i = 0; i < NumValidBits; ++i)
1809
Bits[i] = (*LHSBits)[i];
1810
1811
// These bits are known to be zero but the AssertZext may be from a value
1812
// that already has some constant zero bits (i.e. from a masking and).
1813
for (unsigned i = NumValidBits; i < NumBits; ++i)
1814
Bits[i] = (*LHSBits)[i].hasValue()
1815
? ValueBit((*LHSBits)[i].getValue(),
1816
(*LHSBits)[i].getValueBitIndex(),
1817
ValueBit::VariableKnownToBeZero)
1818
: ValueBit(ValueBit::ConstZero);
1819
1820
return std::make_pair(Interesting, &Bits);
1821
}
1822
case ISD::LOAD:
1823
LoadSDNode *LD = cast<LoadSDNode>(V);
1824
if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1825
EVT VT = LD->getMemoryVT();
1826
const unsigned NumValidBits = VT.getSizeInBits();
1827
1828
for (unsigned i = 0; i < NumValidBits; ++i)
1829
Bits[i] = ValueBit(V, i);
1830
1831
// These bits are known to be zero.
1832
for (unsigned i = NumValidBits; i < NumBits; ++i)
1833
Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1834
1835
// Zero-extending load itself cannot be optimized. So, it is not
1836
// interesting by itself though it gives useful information.
1837
return std::make_pair(Interesting = false, &Bits);
1838
}
1839
break;
1840
}
1841
1842
for (unsigned i = 0; i < NumBits; ++i)
1843
Bits[i] = ValueBit(V, i);
1844
1845
return std::make_pair(Interesting = false, &Bits);
1846
}
1847
1848
// For each value (except the constant ones), compute the left-rotate amount
1849
// to get it from its original to final position.
1850
void computeRotationAmounts() {
1851
NeedMask = false;
1852
RLAmt.resize(Bits.size());
1853
for (unsigned i = 0; i < Bits.size(); ++i)
1854
if (Bits[i].hasValue()) {
1855
unsigned VBI = Bits[i].getValueBitIndex();
1856
if (i >= VBI)
1857
RLAmt[i] = i - VBI;
1858
else
1859
RLAmt[i] = Bits.size() - (VBI - i);
1860
} else if (Bits[i].isZero()) {
1861
NeedMask = true;
1862
RLAmt[i] = UINT32_MAX;
1863
} else {
1864
llvm_unreachable("Unknown value bit type");
1865
}
1866
}
1867
1868
// Collect groups of consecutive bits with the same underlying value and
1869
// rotation factor. If we're doing late masking, we ignore zeros, otherwise
1870
// they break up groups.
1871
void collectBitGroups(bool LateMask) {
1872
BitGroups.clear();
1873
1874
unsigned LastRLAmt = RLAmt[0];
1875
SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1876
unsigned LastGroupStartIdx = 0;
1877
bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1878
for (unsigned i = 1; i < Bits.size(); ++i) {
1879
unsigned ThisRLAmt = RLAmt[i];
1880
SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1881
if (LateMask && !ThisValue) {
1882
ThisValue = LastValue;
1883
ThisRLAmt = LastRLAmt;
1884
// If we're doing late masking, then the first bit group always starts
1885
// at zero (even if the first bits were zero).
1886
if (BitGroups.empty())
1887
LastGroupStartIdx = 0;
1888
}
1889
1890
// If this bit is known to be zero and the current group is a bit group
1891
// of zeros, we do not need to terminate the current bit group even the
1892
// Value or RLAmt does not match here. Instead, we terminate this group
1893
// when the first non-zero bit appears later.
1894
if (IsGroupOfZeros && Bits[i].isZero())
1895
continue;
1896
1897
// If this bit has the same underlying value and the same rotate factor as
1898
// the last one, then they're part of the same group.
1899
if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1900
// We cannot continue the current group if this bits is not known to
1901
// be zero in a bit group of zeros.
1902
if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1903
continue;
1904
1905
if (LastValue.getNode())
1906
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1907
i-1));
1908
LastRLAmt = ThisRLAmt;
1909
LastValue = ThisValue;
1910
LastGroupStartIdx = i;
1911
IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1912
}
1913
if (LastValue.getNode())
1914
BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1915
Bits.size()-1));
1916
1917
if (BitGroups.empty())
1918
return;
1919
1920
// We might be able to combine the first and last groups.
1921
if (BitGroups.size() > 1) {
1922
// If the first and last groups are the same, then remove the first group
1923
// in favor of the last group, making the ending index of the last group
1924
// equal to the ending index of the to-be-removed first group.
1925
if (BitGroups[0].StartIdx == 0 &&
1926
BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1927
BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1928
BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1929
LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1930
BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1931
BitGroups.erase(BitGroups.begin());
1932
}
1933
}
1934
}
1935
1936
// Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1937
// associated with each. If the number of groups are same, we prefer a group
1938
// which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1939
// instruction. If there is a degeneracy, pick the one that occurs
1940
// first (in the final value).
1941
void collectValueRotInfo() {
1942
ValueRots.clear();
1943
1944
for (auto &BG : BitGroups) {
1945
unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1946
ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1947
VRI.V = BG.V;
1948
VRI.RLAmt = BG.RLAmt;
1949
VRI.Repl32 = BG.Repl32;
1950
VRI.NumGroups += 1;
1951
VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1952
}
1953
1954
// Now that we've collected the various ValueRotInfo instances, we need to
1955
// sort them.
1956
ValueRotsVec.clear();
1957
for (auto &I : ValueRots) {
1958
ValueRotsVec.push_back(I.second);
1959
}
1960
llvm::sort(ValueRotsVec);
1961
}
1962
1963
// In 64-bit mode, rlwinm and friends have a rotation operator that
1964
// replicates the low-order 32 bits into the high-order 32-bits. The mask
1965
// indices of these instructions can only be in the lower 32 bits, so they
1966
// can only represent some 64-bit bit groups. However, when they can be used,
1967
// the 32-bit replication can be used to represent, as a single bit group,
1968
// otherwise separate bit groups. We'll convert to replicated-32-bit bit
1969
// groups when possible. Returns true if any of the bit groups were
1970
// converted.
1971
void assignRepl32BitGroups() {
1972
// If we have bits like this:
1973
//
1974
// Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1975
// V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1976
// Groups: | RLAmt = 8 | RLAmt = 40 |
1977
//
1978
// But, making use of a 32-bit operation that replicates the low-order 32
1979
// bits into the high-order 32 bits, this can be one bit group with a RLAmt
1980
// of 8.
1981
1982
auto IsAllLow32 = [this](BitGroup & BG) {
1983
if (BG.StartIdx <= BG.EndIdx) {
1984
for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1985
if (!Bits[i].hasValue())
1986
continue;
1987
if (Bits[i].getValueBitIndex() >= 32)
1988
return false;
1989
}
1990
} else {
1991
for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1992
if (!Bits[i].hasValue())
1993
continue;
1994
if (Bits[i].getValueBitIndex() >= 32)
1995
return false;
1996
}
1997
for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1998
if (!Bits[i].hasValue())
1999
continue;
2000
if (Bits[i].getValueBitIndex() >= 32)
2001
return false;
2002
}
2003
}
2004
2005
return true;
2006
};
2007
2008
for (auto &BG : BitGroups) {
2009
// If this bit group has RLAmt of 0 and will not be merged with
2010
// another bit group, we don't benefit from Repl32. We don't mark
2011
// such group to give more freedom for later instruction selection.
2012
if (BG.RLAmt == 0) {
2013
auto PotentiallyMerged = [this](BitGroup & BG) {
2014
for (auto &BG2 : BitGroups)
2015
if (&BG != &BG2 && BG.V == BG2.V &&
2016
(BG2.RLAmt == 0 || BG2.RLAmt == 32))
2017
return true;
2018
return false;
2019
};
2020
if (!PotentiallyMerged(BG))
2021
continue;
2022
}
2023
if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2024
if (IsAllLow32(BG)) {
2025
if (BG.RLAmt >= 32) {
2026
BG.RLAmt -= 32;
2027
BG.Repl32CR = true;
2028
}
2029
2030
BG.Repl32 = true;
2031
2032
LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2033
<< BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2034
<< BG.StartIdx << ", " << BG.EndIdx << "]\n");
2035
}
2036
}
2037
}
2038
2039
// Now walk through the bit groups, consolidating where possible.
2040
for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2041
// We might want to remove this bit group by merging it with the previous
2042
// group (which might be the ending group).
2043
auto IP = (I == BitGroups.begin()) ?
2044
std::prev(BitGroups.end()) : std::prev(I);
2045
if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2046
I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2047
2048
LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2049
<< I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2050
<< I->StartIdx << ", " << I->EndIdx
2051
<< "] with group with range [" << IP->StartIdx << ", "
2052
<< IP->EndIdx << "]\n");
2053
2054
IP->EndIdx = I->EndIdx;
2055
IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2056
IP->Repl32Coalesced = true;
2057
I = BitGroups.erase(I);
2058
continue;
2059
} else {
2060
// There is a special case worth handling: If there is a single group
2061
// covering the entire upper 32 bits, and it can be merged with both
2062
// the next and previous groups (which might be the same group), then
2063
// do so. If it is the same group (so there will be only one group in
2064
// total), then we need to reverse the order of the range so that it
2065
// covers the entire 64 bits.
2066
if (I->StartIdx == 32 && I->EndIdx == 63) {
2067
assert(std::next(I) == BitGroups.end() &&
2068
"bit group ends at index 63 but there is another?");
2069
auto IN = BitGroups.begin();
2070
2071
if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2072
(I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2073
IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2074
IsAllLow32(*I)) {
2075
2076
LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2077
<< " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2078
<< ", " << I->EndIdx
2079
<< "] with 32-bit replicated groups with ranges ["
2080
<< IP->StartIdx << ", " << IP->EndIdx << "] and ["
2081
<< IN->StartIdx << ", " << IN->EndIdx << "]\n");
2082
2083
if (IP == IN) {
2084
// There is only one other group; change it to cover the whole
2085
// range (backward, so that it can still be Repl32 but cover the
2086
// whole 64-bit range).
2087
IP->StartIdx = 31;
2088
IP->EndIdx = 30;
2089
IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2090
IP->Repl32Coalesced = true;
2091
I = BitGroups.erase(I);
2092
} else {
2093
// There are two separate groups, one before this group and one
2094
// after us (at the beginning). We're going to remove this group,
2095
// but also the group at the very beginning.
2096
IP->EndIdx = IN->EndIdx;
2097
IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2098
IP->Repl32Coalesced = true;
2099
I = BitGroups.erase(I);
2100
BitGroups.erase(BitGroups.begin());
2101
}
2102
2103
// This must be the last group in the vector (and we might have
2104
// just invalidated the iterator above), so break here.
2105
break;
2106
}
2107
}
2108
}
2109
2110
++I;
2111
}
2112
}
2113
2114
SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2115
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2116
}
2117
2118
uint64_t getZerosMask() {
2119
uint64_t Mask = 0;
2120
for (unsigned i = 0; i < Bits.size(); ++i) {
2121
if (Bits[i].hasValue())
2122
continue;
2123
Mask |= (UINT64_C(1) << i);
2124
}
2125
2126
return ~Mask;
2127
}
2128
2129
// This method extends an input value to 64 bit if input is 32-bit integer.
2130
// While selecting instructions in BitPermutationSelector in 64-bit mode,
2131
// an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2132
// In such case, we extend it to 64 bit to be consistent with other values.
2133
SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2134
if (V.getValueSizeInBits() == 64)
2135
return V;
2136
2137
assert(V.getValueSizeInBits() == 32);
2138
SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2139
SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2140
MVT::i64), 0);
2141
SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2142
MVT::i64, ImDef, V,
2143
SubRegIdx), 0);
2144
return ExtVal;
2145
}
2146
2147
SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2148
if (V.getValueSizeInBits() == 32)
2149
return V;
2150
2151
assert(V.getValueSizeInBits() == 64);
2152
SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2153
SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2154
MVT::i32, V, SubRegIdx), 0);
2155
return SubVal;
2156
}
2157
2158
// Depending on the number of groups for a particular value, it might be
2159
// better to rotate, mask explicitly (using andi/andis), and then or the
2160
// result. Select this part of the result first.
2161
void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2162
if (BPermRewriterNoMasking)
2163
return;
2164
2165
for (ValueRotInfo &VRI : ValueRotsVec) {
2166
unsigned Mask = 0;
2167
for (unsigned i = 0; i < Bits.size(); ++i) {
2168
if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2169
continue;
2170
if (RLAmt[i] != VRI.RLAmt)
2171
continue;
2172
Mask |= (1u << i);
2173
}
2174
2175
// Compute the masks for andi/andis that would be necessary.
2176
unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2177
assert((ANDIMask != 0 || ANDISMask != 0) &&
2178
"No set bits in mask for value bit groups");
2179
bool NeedsRotate = VRI.RLAmt != 0;
2180
2181
// We're trying to minimize the number of instructions. If we have one
2182
// group, using one of andi/andis can break even. If we have three
2183
// groups, we can use both andi and andis and break even (to use both
2184
// andi and andis we also need to or the results together). We need four
2185
// groups if we also need to rotate. To use andi/andis we need to do more
2186
// than break even because rotate-and-mask instructions tend to be easier
2187
// to schedule.
2188
2189
// FIXME: We've biased here against using andi/andis, which is right for
2190
// POWER cores, but not optimal everywhere. For example, on the A2,
2191
// andi/andis have single-cycle latency whereas the rotate-and-mask
2192
// instructions take two cycles, and it would be better to bias toward
2193
// andi/andis in break-even cases.
2194
2195
unsigned NumAndInsts = (unsigned) NeedsRotate +
2196
(unsigned) (ANDIMask != 0) +
2197
(unsigned) (ANDISMask != 0) +
2198
(unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2199
(unsigned) (bool) Res;
2200
2201
LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2202
<< " RL: " << VRI.RLAmt << ":"
2203
<< "\n\t\t\tisel using masking: " << NumAndInsts
2204
<< " using rotates: " << VRI.NumGroups << "\n");
2205
2206
if (NumAndInsts >= VRI.NumGroups)
2207
continue;
2208
2209
LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2210
2211
if (InstCnt) *InstCnt += NumAndInsts;
2212
2213
SDValue VRot;
2214
if (VRI.RLAmt) {
2215
SDValue Ops[] =
2216
{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2217
getI32Imm(0, dl), getI32Imm(31, dl) };
2218
VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2219
Ops), 0);
2220
} else {
2221
VRot = TruncateToInt32(VRI.V, dl);
2222
}
2223
2224
SDValue ANDIVal, ANDISVal;
2225
if (ANDIMask != 0)
2226
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2227
VRot, getI32Imm(ANDIMask, dl)),
2228
0);
2229
if (ANDISMask != 0)
2230
ANDISVal =
2231
SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2232
getI32Imm(ANDISMask, dl)),
2233
0);
2234
2235
SDValue TotalVal;
2236
if (!ANDIVal)
2237
TotalVal = ANDISVal;
2238
else if (!ANDISVal)
2239
TotalVal = ANDIVal;
2240
else
2241
TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242
ANDIVal, ANDISVal), 0);
2243
2244
if (!Res)
2245
Res = TotalVal;
2246
else
2247
Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2248
Res, TotalVal), 0);
2249
2250
// Now, remove all groups with this underlying value and rotation
2251
// factor.
2252
eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2253
return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2254
});
2255
}
2256
}
2257
2258
// Instruction selection for the 32-bit case.
2259
SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2260
SDLoc dl(N);
2261
SDValue Res;
2262
2263
if (InstCnt) *InstCnt = 0;
2264
2265
// Take care of cases that should use andi/andis first.
2266
SelectAndParts32(dl, Res, InstCnt);
2267
2268
// If we've not yet selected a 'starting' instruction, and we have no zeros
2269
// to fill in, select the (Value, RLAmt) with the highest priority (largest
2270
// number of groups), and start with this rotated value.
2271
if ((!NeedMask || LateMask) && !Res) {
2272
ValueRotInfo &VRI = ValueRotsVec[0];
2273
if (VRI.RLAmt) {
2274
if (InstCnt) *InstCnt += 1;
2275
SDValue Ops[] =
2276
{ TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2277
getI32Imm(0, dl), getI32Imm(31, dl) };
2278
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2279
0);
2280
} else {
2281
Res = TruncateToInt32(VRI.V, dl);
2282
}
2283
2284
// Now, remove all groups with this underlying value and rotation factor.
2285
eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2286
return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2287
});
2288
}
2289
2290
if (InstCnt) *InstCnt += BitGroups.size();
2291
2292
// Insert the other groups (one at a time).
2293
for (auto &BG : BitGroups) {
2294
if (!Res) {
2295
SDValue Ops[] =
2296
{ TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2297
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2298
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2299
Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2300
} else {
2301
SDValue Ops[] =
2302
{ Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2303
getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2304
getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2305
Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2306
}
2307
}
2308
2309
if (LateMask) {
2310
unsigned Mask = (unsigned) getZerosMask();
2311
2312
unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2313
assert((ANDIMask != 0 || ANDISMask != 0) &&
2314
"No set bits in zeros mask?");
2315
2316
if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2317
(unsigned) (ANDISMask != 0) +
2318
(unsigned) (ANDIMask != 0 && ANDISMask != 0);
2319
2320
SDValue ANDIVal, ANDISVal;
2321
if (ANDIMask != 0)
2322
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2323
Res, getI32Imm(ANDIMask, dl)),
2324
0);
2325
if (ANDISMask != 0)
2326
ANDISVal =
2327
SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2328
getI32Imm(ANDISMask, dl)),
2329
0);
2330
2331
if (!ANDIVal)
2332
Res = ANDISVal;
2333
else if (!ANDISVal)
2334
Res = ANDIVal;
2335
else
2336
Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2337
ANDIVal, ANDISVal), 0);
2338
}
2339
2340
return Res.getNode();
2341
}
2342
2343
unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2344
unsigned MaskStart, unsigned MaskEnd,
2345
bool IsIns) {
2346
// In the notation used by the instructions, 'start' and 'end' are reversed
2347
// because bits are counted from high to low order.
2348
unsigned InstMaskStart = 64 - MaskEnd - 1,
2349
InstMaskEnd = 64 - MaskStart - 1;
2350
2351
if (Repl32)
2352
return 1;
2353
2354
if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2355
InstMaskEnd == 63 - RLAmt)
2356
return 1;
2357
2358
return 2;
2359
}
2360
2361
// For 64-bit values, not all combinations of rotates and masks are
2362
// available. Produce one if it is available.
2363
SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2364
bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2365
unsigned *InstCnt = nullptr) {
2366
// In the notation used by the instructions, 'start' and 'end' are reversed
2367
// because bits are counted from high to low order.
2368
unsigned InstMaskStart = 64 - MaskEnd - 1,
2369
InstMaskEnd = 64 - MaskStart - 1;
2370
2371
if (InstCnt) *InstCnt += 1;
2372
2373
if (Repl32) {
2374
// This rotation amount assumes that the lower 32 bits of the quantity
2375
// are replicated in the high 32 bits by the rotation operator (which is
2376
// done by rlwinm and friends).
2377
assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2378
assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2379
SDValue Ops[] =
2380
{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2381
getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2382
return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2383
Ops), 0);
2384
}
2385
2386
if (InstMaskEnd == 63) {
2387
SDValue Ops[] =
2388
{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2389
getI32Imm(InstMaskStart, dl) };
2390
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2391
}
2392
2393
if (InstMaskStart == 0) {
2394
SDValue Ops[] =
2395
{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2396
getI32Imm(InstMaskEnd, dl) };
2397
return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2398
}
2399
2400
if (InstMaskEnd == 63 - RLAmt) {
2401
SDValue Ops[] =
2402
{ ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2403
getI32Imm(InstMaskStart, dl) };
2404
return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2405
}
2406
2407
// We cannot do this with a single instruction, so we'll use two. The
2408
// problem is that we're not free to choose both a rotation amount and mask
2409
// start and end independently. We can choose an arbitrary mask start and
2410
// end, but then the rotation amount is fixed. Rotation, however, can be
2411
// inverted, and so by applying an "inverse" rotation first, we can get the
2412
// desired result.
2413
if (InstCnt) *InstCnt += 1;
2414
2415
// The rotation mask for the second instruction must be MaskStart.
2416
unsigned RLAmt2 = MaskStart;
2417
// The first instruction must rotate V so that the overall rotation amount
2418
// is RLAmt.
2419
unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2420
if (RLAmt1)
2421
V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2422
return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2423
}
2424
2425
// For 64-bit values, not all combinations of rotates and masks are
2426
// available. Produce a rotate-mask-and-insert if one is available.
2427
SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2428
unsigned RLAmt, bool Repl32, unsigned MaskStart,
2429
unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2430
// In the notation used by the instructions, 'start' and 'end' are reversed
2431
// because bits are counted from high to low order.
2432
unsigned InstMaskStart = 64 - MaskEnd - 1,
2433
InstMaskEnd = 64 - MaskStart - 1;
2434
2435
if (InstCnt) *InstCnt += 1;
2436
2437
if (Repl32) {
2438
// This rotation amount assumes that the lower 32 bits of the quantity
2439
// are replicated in the high 32 bits by the rotation operator (which is
2440
// done by rlwinm and friends).
2441
assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2442
assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2443
SDValue Ops[] =
2444
{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2445
getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2446
return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2447
Ops), 0);
2448
}
2449
2450
if (InstMaskEnd == 63 - RLAmt) {
2451
SDValue Ops[] =
2452
{ ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2453
getI32Imm(InstMaskStart, dl) };
2454
return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2455
}
2456
2457
// We cannot do this with a single instruction, so we'll use two. The
2458
// problem is that we're not free to choose both a rotation amount and mask
2459
// start and end independently. We can choose an arbitrary mask start and
2460
// end, but then the rotation amount is fixed. Rotation, however, can be
2461
// inverted, and so by applying an "inverse" rotation first, we can get the
2462
// desired result.
2463
if (InstCnt) *InstCnt += 1;
2464
2465
// The rotation mask for the second instruction must be MaskStart.
2466
unsigned RLAmt2 = MaskStart;
2467
// The first instruction must rotate V so that the overall rotation amount
2468
// is RLAmt.
2469
unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2470
if (RLAmt1)
2471
V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2472
return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2473
}
2474
2475
void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2476
if (BPermRewriterNoMasking)
2477
return;
2478
2479
// The idea here is the same as in the 32-bit version, but with additional
2480
// complications from the fact that Repl32 might be true. Because we
2481
// aggressively convert bit groups to Repl32 form (which, for small
2482
// rotation factors, involves no other change), and then coalesce, it might
2483
// be the case that a single 64-bit masking operation could handle both
2484
// some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2485
// form allowed coalescing, then we must use a 32-bit rotaton in order to
2486
// completely capture the new combined bit group.
2487
2488
for (ValueRotInfo &VRI : ValueRotsVec) {
2489
uint64_t Mask = 0;
2490
2491
// We need to add to the mask all bits from the associated bit groups.
2492
// If Repl32 is false, we need to add bits from bit groups that have
2493
// Repl32 true, but are trivially convertable to Repl32 false. Such a
2494
// group is trivially convertable if it overlaps only with the lower 32
2495
// bits, and the group has not been coalesced.
2496
auto MatchingBG = [VRI](const BitGroup &BG) {
2497
if (VRI.V != BG.V)
2498
return false;
2499
2500
unsigned EffRLAmt = BG.RLAmt;
2501
if (!VRI.Repl32 && BG.Repl32) {
2502
if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2503
!BG.Repl32Coalesced) {
2504
if (BG.Repl32CR)
2505
EffRLAmt += 32;
2506
} else {
2507
return false;
2508
}
2509
} else if (VRI.Repl32 != BG.Repl32) {
2510
return false;
2511
}
2512
2513
return VRI.RLAmt == EffRLAmt;
2514
};
2515
2516
for (auto &BG : BitGroups) {
2517
if (!MatchingBG(BG))
2518
continue;
2519
2520
if (BG.StartIdx <= BG.EndIdx) {
2521
for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2522
Mask |= (UINT64_C(1) << i);
2523
} else {
2524
for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2525
Mask |= (UINT64_C(1) << i);
2526
for (unsigned i = 0; i <= BG.EndIdx; ++i)
2527
Mask |= (UINT64_C(1) << i);
2528
}
2529
}
2530
2531
// We can use the 32-bit andi/andis technique if the mask does not
2532
// require any higher-order bits. This can save an instruction compared
2533
// to always using the general 64-bit technique.
2534
bool Use32BitInsts = isUInt<32>(Mask);
2535
// Compute the masks for andi/andis that would be necessary.
2536
unsigned ANDIMask = (Mask & UINT16_MAX),
2537
ANDISMask = (Mask >> 16) & UINT16_MAX;
2538
2539
bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2540
2541
unsigned NumAndInsts = (unsigned) NeedsRotate +
2542
(unsigned) (bool) Res;
2543
unsigned NumOfSelectInsts = 0;
2544
selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2545
assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2546
if (Use32BitInsts)
2547
NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2548
(unsigned) (ANDIMask != 0 && ANDISMask != 0);
2549
else
2550
NumAndInsts += NumOfSelectInsts + /* and */ 1;
2551
2552
unsigned NumRLInsts = 0;
2553
bool FirstBG = true;
2554
bool MoreBG = false;
2555
for (auto &BG : BitGroups) {
2556
if (!MatchingBG(BG)) {
2557
MoreBG = true;
2558
continue;
2559
}
2560
NumRLInsts +=
2561
SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2562
!FirstBG);
2563
FirstBG = false;
2564
}
2565
2566
LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2567
<< " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2568
<< "\n\t\t\tisel using masking: " << NumAndInsts
2569
<< " using rotates: " << NumRLInsts << "\n");
2570
2571
// When we'd use andi/andis, we bias toward using the rotates (andi only
2572
// has a record form, and is cracked on POWER cores). However, when using
2573
// general 64-bit constant formation, bias toward the constant form,
2574
// because that exposes more opportunities for CSE.
2575
if (NumAndInsts > NumRLInsts)
2576
continue;
2577
// When merging multiple bit groups, instruction or is used.
2578
// But when rotate is used, rldimi can inert the rotated value into any
2579
// register, so instruction or can be avoided.
2580
if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2581
continue;
2582
2583
LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2584
2585
if (InstCnt) *InstCnt += NumAndInsts;
2586
2587
SDValue VRot;
2588
// We actually need to generate a rotation if we have a non-zero rotation
2589
// factor or, in the Repl32 case, if we care about any of the
2590
// higher-order replicated bits. In the latter case, we generate a mask
2591
// backward so that it actually includes the entire 64 bits.
2592
if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2593
VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2594
VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2595
else
2596
VRot = VRI.V;
2597
2598
SDValue TotalVal;
2599
if (Use32BitInsts) {
2600
assert((ANDIMask != 0 || ANDISMask != 0) &&
2601
"No set bits in mask when using 32-bit ands for 64-bit value");
2602
2603
SDValue ANDIVal, ANDISVal;
2604
if (ANDIMask != 0)
2605
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2606
ExtendToInt64(VRot, dl),
2607
getI32Imm(ANDIMask, dl)),
2608
0);
2609
if (ANDISMask != 0)
2610
ANDISVal =
2611
SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2612
ExtendToInt64(VRot, dl),
2613
getI32Imm(ANDISMask, dl)),
2614
0);
2615
2616
if (!ANDIVal)
2617
TotalVal = ANDISVal;
2618
else if (!ANDISVal)
2619
TotalVal = ANDIVal;
2620
else
2621
TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2622
ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2623
} else {
2624
TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2625
TotalVal =
2626
SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2627
ExtendToInt64(VRot, dl), TotalVal),
2628
0);
2629
}
2630
2631
if (!Res)
2632
Res = TotalVal;
2633
else
2634
Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2635
ExtendToInt64(Res, dl), TotalVal),
2636
0);
2637
2638
// Now, remove all groups with this underlying value and rotation
2639
// factor.
2640
eraseMatchingBitGroups(MatchingBG);
2641
}
2642
}
2643
2644
// Instruction selection for the 64-bit case.
2645
SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2646
SDLoc dl(N);
2647
SDValue Res;
2648
2649
if (InstCnt) *InstCnt = 0;
2650
2651
// Take care of cases that should use andi/andis first.
2652
SelectAndParts64(dl, Res, InstCnt);
2653
2654
// If we've not yet selected a 'starting' instruction, and we have no zeros
2655
// to fill in, select the (Value, RLAmt) with the highest priority (largest
2656
// number of groups), and start with this rotated value.
2657
if ((!NeedMask || LateMask) && !Res) {
2658
// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2659
// groups will come first, and so the VRI representing the largest number
2660
// of groups might not be first (it might be the first Repl32 groups).
2661
unsigned MaxGroupsIdx = 0;
2662
if (!ValueRotsVec[0].Repl32) {
2663
for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2664
if (ValueRotsVec[i].Repl32) {
2665
if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2666
MaxGroupsIdx = i;
2667
break;
2668
}
2669
}
2670
2671
ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2672
bool NeedsRotate = false;
2673
if (VRI.RLAmt) {
2674
NeedsRotate = true;
2675
} else if (VRI.Repl32) {
2676
for (auto &BG : BitGroups) {
2677
if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2678
BG.Repl32 != VRI.Repl32)
2679
continue;
2680
2681
// We don't need a rotate if the bit group is confined to the lower
2682
// 32 bits.
2683
if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2684
continue;
2685
2686
NeedsRotate = true;
2687
break;
2688
}
2689
}
2690
2691
if (NeedsRotate)
2692
Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2693
VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2694
InstCnt);
2695
else
2696
Res = VRI.V;
2697
2698
// Now, remove all groups with this underlying value and rotation factor.
2699
if (Res)
2700
eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2701
return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2702
BG.Repl32 == VRI.Repl32;
2703
});
2704
}
2705
2706
// Because 64-bit rotates are more flexible than inserts, we might have a
2707
// preference regarding which one we do first (to save one instruction).
2708
if (!Res)
2709
for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2710
if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2711
false) <
2712
SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2713
true)) {
2714
if (I != BitGroups.begin()) {
2715
BitGroup BG = *I;
2716
BitGroups.erase(I);
2717
BitGroups.insert(BitGroups.begin(), BG);
2718
}
2719
2720
break;
2721
}
2722
}
2723
2724
// Insert the other groups (one at a time).
2725
for (auto &BG : BitGroups) {
2726
if (!Res)
2727
Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2728
BG.EndIdx, InstCnt);
2729
else
2730
Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2731
BG.StartIdx, BG.EndIdx, InstCnt);
2732
}
2733
2734
if (LateMask) {
2735
uint64_t Mask = getZerosMask();
2736
2737
// We can use the 32-bit andi/andis technique if the mask does not
2738
// require any higher-order bits. This can save an instruction compared
2739
// to always using the general 64-bit technique.
2740
bool Use32BitInsts = isUInt<32>(Mask);
2741
// Compute the masks for andi/andis that would be necessary.
2742
unsigned ANDIMask = (Mask & UINT16_MAX),
2743
ANDISMask = (Mask >> 16) & UINT16_MAX;
2744
2745
if (Use32BitInsts) {
2746
assert((ANDIMask != 0 || ANDISMask != 0) &&
2747
"No set bits in mask when using 32-bit ands for 64-bit value");
2748
2749
if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2750
(unsigned) (ANDISMask != 0) +
2751
(unsigned) (ANDIMask != 0 && ANDISMask != 0);
2752
2753
SDValue ANDIVal, ANDISVal;
2754
if (ANDIMask != 0)
2755
ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2756
ExtendToInt64(Res, dl),
2757
getI32Imm(ANDIMask, dl)),
2758
0);
2759
if (ANDISMask != 0)
2760
ANDISVal =
2761
SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2762
ExtendToInt64(Res, dl),
2763
getI32Imm(ANDISMask, dl)),
2764
0);
2765
2766
if (!ANDIVal)
2767
Res = ANDISVal;
2768
else if (!ANDISVal)
2769
Res = ANDIVal;
2770
else
2771
Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2772
ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2773
} else {
2774
unsigned NumOfSelectInsts = 0;
2775
SDValue MaskVal =
2776
SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2777
Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2778
ExtendToInt64(Res, dl), MaskVal),
2779
0);
2780
if (InstCnt)
2781
*InstCnt += NumOfSelectInsts + /* and */ 1;
2782
}
2783
}
2784
2785
return Res.getNode();
2786
}
2787
2788
SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2789
// Fill in BitGroups.
2790
collectBitGroups(LateMask);
2791
if (BitGroups.empty())
2792
return nullptr;
2793
2794
// For 64-bit values, figure out when we can use 32-bit instructions.
2795
if (Bits.size() == 64)
2796
assignRepl32BitGroups();
2797
2798
// Fill in ValueRotsVec.
2799
collectValueRotInfo();
2800
2801
if (Bits.size() == 32) {
2802
return Select32(N, LateMask, InstCnt);
2803
} else {
2804
assert(Bits.size() == 64 && "Not 64 bits here?");
2805
return Select64(N, LateMask, InstCnt);
2806
}
2807
2808
return nullptr;
2809
}
2810
2811
void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2812
erase_if(BitGroups, F);
2813
}
2814
2815
SmallVector<ValueBit, 64> Bits;
2816
2817
bool NeedMask = false;
2818
SmallVector<unsigned, 64> RLAmt;
2819
2820
SmallVector<BitGroup, 16> BitGroups;
2821
2822
DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2823
SmallVector<ValueRotInfo, 16> ValueRotsVec;
2824
2825
SelectionDAG *CurDAG = nullptr;
2826
2827
public:
2828
BitPermutationSelector(SelectionDAG *DAG)
2829
: CurDAG(DAG) {}
2830
2831
// Here we try to match complex bit permutations into a set of
2832
// rotate-and-shift/shift/and/or instructions, using a set of heuristics
2833
// known to produce optimal code for common cases (like i32 byte swapping).
2834
SDNode *Select(SDNode *N) {
2835
Memoizer.clear();
2836
auto Result =
2837
getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2838
if (!Result.first)
2839
return nullptr;
2840
Bits = std::move(*Result.second);
2841
2842
LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2843
" selection for: ");
2844
LLVM_DEBUG(N->dump(CurDAG));
2845
2846
// Fill it RLAmt and set NeedMask.
2847
computeRotationAmounts();
2848
2849
if (!NeedMask)
2850
return Select(N, false);
2851
2852
// We currently have two techniques for handling results with zeros: early
2853
// masking (the default) and late masking. Late masking is sometimes more
2854
// efficient, but because the structure of the bit groups is different, it
2855
// is hard to tell without generating both and comparing the results. With
2856
// late masking, we ignore zeros in the resulting value when inserting each
2857
// set of bit groups, and then mask in the zeros at the end. With early
2858
// masking, we only insert the non-zero parts of the result at every step.
2859
2860
unsigned InstCnt = 0, InstCntLateMask = 0;
2861
LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2862
SDNode *RN = Select(N, false, &InstCnt);
2863
LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2864
2865
LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2866
SDNode *RNLM = Select(N, true, &InstCntLateMask);
2867
LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2868
<< " instructions\n");
2869
2870
if (InstCnt <= InstCntLateMask) {
2871
LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2872
return RN;
2873
}
2874
2875
LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2876
return RNLM;
2877
}
2878
};
2879
2880
class IntegerCompareEliminator {
2881
SelectionDAG *CurDAG;
2882
PPCDAGToDAGISel *S;
2883
// Conversion type for interpreting results of a 32-bit instruction as
2884
// a 64-bit value or vice versa.
2885
enum ExtOrTruncConversion { Ext, Trunc };
2886
2887
// Modifiers to guide how an ISD::SETCC node's result is to be computed
2888
// in a GPR.
2889
// ZExtOrig - use the original condition code, zero-extend value
2890
// ZExtInvert - invert the condition code, zero-extend value
2891
// SExtOrig - use the original condition code, sign-extend value
2892
// SExtInvert - invert the condition code, sign-extend value
2893
enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2894
2895
// Comparisons against zero to emit GPR code sequences for. Each of these
2896
// sequences may need to be emitted for two or more equivalent patterns.
2897
// For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2898
// matters as well as the extension type: sext (-1/0), zext (1/0).
2899
// GEZExt - (zext (LHS >= 0))
2900
// GESExt - (sext (LHS >= 0))
2901
// LEZExt - (zext (LHS <= 0))
2902
// LESExt - (sext (LHS <= 0))
2903
enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2904
2905
SDNode *tryEXTEND(SDNode *N);
2906
SDNode *tryLogicOpOfCompares(SDNode *N);
2907
SDValue computeLogicOpInGPR(SDValue LogicOp);
2908
SDValue signExtendInputIfNeeded(SDValue Input);
2909
SDValue zeroExtendInputIfNeeded(SDValue Input);
2910
SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2911
SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2912
ZeroCompare CmpTy);
2913
SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914
int64_t RHSValue, SDLoc dl);
2915
SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2916
int64_t RHSValue, SDLoc dl);
2917
SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2918
int64_t RHSValue, SDLoc dl);
2919
SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2920
int64_t RHSValue, SDLoc dl);
2921
SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2922
2923
public:
2924
IntegerCompareEliminator(SelectionDAG *DAG,
2925
PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2926
assert(CurDAG->getTargetLoweringInfo()
2927
.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2928
"Only expecting to use this on 64 bit targets.");
2929
}
2930
SDNode *Select(SDNode *N) {
2931
if (CmpInGPR == ICGPR_None)
2932
return nullptr;
2933
switch (N->getOpcode()) {
2934
default: break;
2935
case ISD::ZERO_EXTEND:
2936
if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2937
CmpInGPR == ICGPR_SextI64)
2938
return nullptr;
2939
[[fallthrough]];
2940
case ISD::SIGN_EXTEND:
2941
if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2942
CmpInGPR == ICGPR_ZextI64)
2943
return nullptr;
2944
return tryEXTEND(N);
2945
case ISD::AND:
2946
case ISD::OR:
2947
case ISD::XOR:
2948
return tryLogicOpOfCompares(N);
2949
}
2950
return nullptr;
2951
}
2952
};
2953
2954
// The obvious case for wanting to keep the value in a GPR. Namely, the
2955
// result of the comparison is actually needed in a GPR.
2956
SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2957
assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2958
N->getOpcode() == ISD::SIGN_EXTEND) &&
2959
"Expecting a zero/sign extend node!");
2960
SDValue WideRes;
2961
// If we are zero-extending the result of a logical operation on i1
2962
// values, we can keep the values in GPRs.
2963
if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2964
N->getOperand(0).getValueType() == MVT::i1 &&
2965
N->getOpcode() == ISD::ZERO_EXTEND)
2966
WideRes = computeLogicOpInGPR(N->getOperand(0));
2967
else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2968
return nullptr;
2969
else
2970
WideRes =
2971
getSETCCInGPR(N->getOperand(0),
2972
N->getOpcode() == ISD::SIGN_EXTEND ?
2973
SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2974
2975
if (!WideRes)
2976
return nullptr;
2977
2978
SDLoc dl(N);
2979
bool Input32Bit = WideRes.getValueType() == MVT::i32;
2980
bool Output32Bit = N->getValueType(0) == MVT::i32;
2981
2982
NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2983
NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2984
2985
SDValue ConvOp = WideRes;
2986
if (Input32Bit != Output32Bit)
2987
ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2988
ExtOrTruncConversion::Trunc);
2989
return ConvOp.getNode();
2990
}
2991
2992
// Attempt to perform logical operations on the results of comparisons while
2993
// keeping the values in GPRs. Without doing so, these would end up being
2994
// lowered to CR-logical operations which suffer from significant latency and
2995
// low ILP.
2996
SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2997
if (N->getValueType(0) != MVT::i1)
2998
return nullptr;
2999
assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3000
"Expected a logic operation on setcc results.");
3001
SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3002
if (!LoweredLogical)
3003
return nullptr;
3004
3005
SDLoc dl(N);
3006
bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3007
unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3008
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3009
SDValue LHS = LoweredLogical.getOperand(0);
3010
SDValue RHS = LoweredLogical.getOperand(1);
3011
SDValue WideOp;
3012
SDValue OpToConvToRecForm;
3013
3014
// Look through any 32-bit to 64-bit implicit extend nodes to find the
3015
// opcode that is input to the XORI.
3016
if (IsBitwiseNegate &&
3017
LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3018
OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3019
else if (IsBitwiseNegate)
3020
// If the input to the XORI isn't an extension, that's what we're after.
3021
OpToConvToRecForm = LoweredLogical.getOperand(0);
3022
else
3023
// If this is not an XORI, it is a reg-reg logical op and we can convert
3024
// it to record-form.
3025
OpToConvToRecForm = LoweredLogical;
3026
3027
// Get the record-form version of the node we're looking to use to get the
3028
// CR result from.
3029
uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3030
int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3031
3032
// Convert the right node to record-form. This is either the logical we're
3033
// looking at or it is the input node to the negation (if we're looking at
3034
// a bitwise negation).
3035
if (NewOpc != -1 && IsBitwiseNegate) {
3036
// The input to the XORI has a record-form. Use it.
3037
assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3038
"Expected a PPC::XORI8 only for bitwise negation.");
3039
// Emit the record-form instruction.
3040
std::vector<SDValue> Ops;
3041
for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3042
Ops.push_back(OpToConvToRecForm.getOperand(i));
3043
3044
WideOp =
3045
SDValue(CurDAG->getMachineNode(NewOpc, dl,
3046
OpToConvToRecForm.getValueType(),
3047
MVT::Glue, Ops), 0);
3048
} else {
3049
assert((NewOpc != -1 || !IsBitwiseNegate) &&
3050
"No record form available for AND8/OR8/XOR8?");
3051
WideOp =
3052
SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3053
dl, MVT::i64, MVT::Glue, LHS, RHS),
3054
0);
3055
}
3056
3057
// Select this node to a single bit from CR0 set by the record-form node
3058
// just created. For bitwise negation, use the EQ bit which is the equivalent
3059
// of negating the result (i.e. it is a bit set when the result of the
3060
// operation is zero).
3061
SDValue SRIdxVal =
3062
CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3063
SDValue CRBit =
3064
SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3065
MVT::i1, CR0Reg, SRIdxVal,
3066
WideOp.getValue(1)), 0);
3067
return CRBit.getNode();
3068
}
3069
3070
// Lower a logical operation on i1 values into a GPR sequence if possible.
3071
// The result can be kept in a GPR if requested.
3072
// Three types of inputs can be handled:
3073
// - SETCC
3074
// - TRUNCATE
3075
// - Logical operation (AND/OR/XOR)
3076
// There is also a special case that is handled (namely a complement operation
3077
// achieved with xor %a, -1).
3078
SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3079
assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) &&
3080
"Can only handle logic operations here.");
3081
assert(LogicOp.getValueType() == MVT::i1 &&
3082
"Can only handle logic operations on i1 values here.");
3083
SDLoc dl(LogicOp);
3084
SDValue LHS, RHS;
3085
3086
// Special case: xor %a, -1
3087
bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3088
3089
// Produces a GPR sequence for each operand of the binary logic operation.
3090
// For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3091
// the value in a GPR and for logic operations, it will recursively produce
3092
// a GPR sequence for the operation.
3093
auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3094
unsigned OperandOpcode = Operand.getOpcode();
3095
if (OperandOpcode == ISD::SETCC)
3096
return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3097
else if (OperandOpcode == ISD::TRUNCATE) {
3098
SDValue InputOp = Operand.getOperand(0);
3099
EVT InVT = InputOp.getValueType();
3100
return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3101
PPC::RLDICL, dl, InVT, InputOp,
3102
S->getI64Imm(0, dl),
3103
S->getI64Imm(63, dl)), 0);
3104
} else if (ISD::isBitwiseLogicOp(OperandOpcode))
3105
return computeLogicOpInGPR(Operand);
3106
return SDValue();
3107
};
3108
LHS = getLogicOperand(LogicOp.getOperand(0));
3109
RHS = getLogicOperand(LogicOp.getOperand(1));
3110
3111
// If a GPR sequence can't be produced for the LHS we can't proceed.
3112
// Not producing a GPR sequence for the RHS is only a problem if this isn't
3113
// a bitwise negation operation.
3114
if (!LHS || (!RHS && !IsBitwiseNegation))
3115
return SDValue();
3116
3117
NumLogicOpsOnComparison++;
3118
3119
// We will use the inputs as 64-bit values.
3120
if (LHS.getValueType() == MVT::i32)
3121
LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3122
if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3123
RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3124
3125
unsigned NewOpc;
3126
switch (LogicOp.getOpcode()) {
3127
default: llvm_unreachable("Unknown logic operation.");
3128
case ISD::AND: NewOpc = PPC::AND8; break;
3129
case ISD::OR: NewOpc = PPC::OR8; break;
3130
case ISD::XOR: NewOpc = PPC::XOR8; break;
3131
}
3132
3133
if (IsBitwiseNegation) {
3134
RHS = S->getI64Imm(1, dl);
3135
NewOpc = PPC::XORI8;
3136
}
3137
3138
return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3139
3140
}
3141
3142
/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3143
/// Otherwise just reinterpret it as a 64-bit value.
3144
/// Useful when emitting comparison code for 32-bit values without using
3145
/// the compare instruction (which only considers the lower 32-bits).
3146
SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3147
assert(Input.getValueType() == MVT::i32 &&
3148
"Can only sign-extend 32-bit values here.");
3149
unsigned Opc = Input.getOpcode();
3150
3151
// The value was sign extended and then truncated to 32-bits. No need to
3152
// sign extend it again.
3153
if (Opc == ISD::TRUNCATE &&
3154
(Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3155
Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3156
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158
LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3159
// The input is a sign-extending load. All ppc sign-extending loads
3160
// sign-extend to the full 64-bits.
3161
if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3162
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3163
3164
ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3165
// We don't sign-extend constants.
3166
if (InputConst)
3167
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3168
3169
SDLoc dl(Input);
3170
SignExtensionsAdded++;
3171
return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3172
MVT::i64, Input), 0);
3173
}
3174
3175
/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3176
/// Otherwise just reinterpret it as a 64-bit value.
3177
/// Useful when emitting comparison code for 32-bit values without using
3178
/// the compare instruction (which only considers the lower 32-bits).
3179
SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3180
assert(Input.getValueType() == MVT::i32 &&
3181
"Can only zero-extend 32-bit values here.");
3182
unsigned Opc = Input.getOpcode();
3183
3184
// The only condition under which we can omit the actual extend instruction:
3185
// - The value is a positive constant
3186
// - The value comes from a load that isn't a sign-extending load
3187
// An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3188
bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3189
(Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3190
Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3191
if (IsTruncateOfZExt)
3192
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3193
3194
ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3195
if (InputConst && InputConst->getSExtValue() >= 0)
3196
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3197
3198
LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3199
// The input is a load that doesn't sign-extend (it will be zero-extended).
3200
if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3201
return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3202
3203
// None of the above, need to zero-extend.
3204
SDLoc dl(Input);
3205
ZeroExtensionsAdded++;
3206
return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3207
S->getI64Imm(0, dl),
3208
S->getI64Imm(32, dl)), 0);
3209
}
3210
3211
// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3212
// course not actual zero/sign extensions that will generate machine code,
3213
// they're just a way to reinterpret a 32 bit value in a register as a
3214
// 64 bit value and vice-versa.
3215
SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3216
ExtOrTruncConversion Conv) {
3217
SDLoc dl(NatWidthRes);
3218
3219
// For reinterpreting 32-bit values as 64 bit values, we generate
3220
// INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3221
if (Conv == ExtOrTruncConversion::Ext) {
3222
SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3223
SDValue SubRegIdx =
3224
CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3225
return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3226
ImDef, NatWidthRes, SubRegIdx), 0);
3227
}
3228
3229
assert(Conv == ExtOrTruncConversion::Trunc &&
3230
"Unknown convertion between 32 and 64 bit values.");
3231
// For reinterpreting 64-bit values as 32-bit values, we just need to
3232
// EXTRACT_SUBREG (i.e. extract the low word).
3233
SDValue SubRegIdx =
3234
CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3235
return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3236
NatWidthRes, SubRegIdx), 0);
3237
}
3238
3239
// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3240
// Handle both zero-extensions and sign-extensions.
3241
SDValue
3242
IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3243
ZeroCompare CmpTy) {
3244
EVT InVT = LHS.getValueType();
3245
bool Is32Bit = InVT == MVT::i32;
3246
SDValue ToExtend;
3247
3248
// Produce the value that needs to be either zero or sign extended.
3249
switch (CmpTy) {
3250
case ZeroCompare::GEZExt:
3251
case ZeroCompare::GESExt:
3252
ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3253
dl, InVT, LHS, LHS), 0);
3254
break;
3255
case ZeroCompare::LEZExt:
3256
case ZeroCompare::LESExt: {
3257
if (Is32Bit) {
3258
// Upper 32 bits cannot be undefined for this sequence.
3259
LHS = signExtendInputIfNeeded(LHS);
3260
SDValue Neg =
3261
SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3262
ToExtend =
3263
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3264
Neg, S->getI64Imm(1, dl),
3265
S->getI64Imm(63, dl)), 0);
3266
} else {
3267
SDValue Addi =
3268
SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3269
S->getI64Imm(~0ULL, dl)), 0);
3270
ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3271
Addi, LHS), 0);
3272
}
3273
break;
3274
}
3275
}
3276
3277
// For 64-bit sequences, the extensions are the same for the GE/LE cases.
3278
if (!Is32Bit &&
3279
(CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3280
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3281
ToExtend, S->getI64Imm(1, dl),
3282
S->getI64Imm(63, dl)), 0);
3283
if (!Is32Bit &&
3284
(CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3285
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3286
S->getI64Imm(63, dl)), 0);
3287
3288
assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3289
// For 32-bit sequences, the extensions differ between GE/LE cases.
3290
switch (CmpTy) {
3291
case ZeroCompare::GEZExt: {
3292
SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3293
S->getI32Imm(31, dl) };
3294
return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3295
ShiftOps), 0);
3296
}
3297
case ZeroCompare::GESExt:
3298
return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3299
S->getI32Imm(31, dl)), 0);
3300
case ZeroCompare::LEZExt:
3301
return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3302
S->getI32Imm(1, dl)), 0);
3303
case ZeroCompare::LESExt:
3304
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3305
S->getI32Imm(-1, dl)), 0);
3306
}
3307
3308
// The above case covers all the enumerators so it can't have a default clause
3309
// to avoid compiler warnings.
3310
llvm_unreachable("Unknown zero-comparison type.");
3311
}
3312
3313
/// Produces a zero-extended result of comparing two 32-bit values according to
3314
/// the passed condition code.
3315
SDValue
3316
IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3317
ISD::CondCode CC,
3318
int64_t RHSValue, SDLoc dl) {
3319
if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3320
CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
3321
return SDValue();
3322
bool IsRHSZero = RHSValue == 0;
3323
bool IsRHSOne = RHSValue == 1;
3324
bool IsRHSNegOne = RHSValue == -1LL;
3325
switch (CC) {
3326
default: return SDValue();
3327
case ISD::SETEQ: {
3328
// (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3329
// (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3330
SDValue Xor = IsRHSZero ? LHS :
3331
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3332
SDValue Clz =
3333
SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3334
SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3335
S->getI32Imm(31, dl) };
3336
return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3337
ShiftOps), 0);
3338
}
3339
case ISD::SETNE: {
3340
// (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3341
// (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3342
SDValue Xor = IsRHSZero ? LHS :
3343
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3344
SDValue Clz =
3345
SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3346
SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3347
S->getI32Imm(31, dl) };
3348
SDValue Shift =
3349
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3350
return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3351
S->getI32Imm(1, dl)), 0);
3352
}
3353
case ISD::SETGE: {
3354
// (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3355
// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3356
if(IsRHSZero)
3357
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3358
3359
// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3360
// by swapping inputs and falling through.
3361
std::swap(LHS, RHS);
3362
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3363
IsRHSZero = RHSConst && RHSConst->isZero();
3364
[[fallthrough]];
3365
}
3366
case ISD::SETLE: {
3367
if (CmpInGPR == ICGPR_NonExtIn)
3368
return SDValue();
3369
// (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3370
// (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3371
if(IsRHSZero) {
3372
if (CmpInGPR == ICGPR_NonExtIn)
3373
return SDValue();
3374
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3375
}
3376
3377
// The upper 32-bits of the register can't be undefined for this sequence.
3378
LHS = signExtendInputIfNeeded(LHS);
3379
RHS = signExtendInputIfNeeded(RHS);
3380
SDValue Sub =
3381
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3382
SDValue Shift =
3383
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3384
S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3385
0);
3386
return
3387
SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3388
MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3389
}
3390
case ISD::SETGT: {
3391
// (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3392
// (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3393
// (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3394
// Handle SETLT -1 (which is equivalent to SETGE 0).
3395
if (IsRHSNegOne)
3396
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3397
3398
if (IsRHSZero) {
3399
if (CmpInGPR == ICGPR_NonExtIn)
3400
return SDValue();
3401
// The upper 32-bits of the register can't be undefined for this sequence.
3402
LHS = signExtendInputIfNeeded(LHS);
3403
RHS = signExtendInputIfNeeded(RHS);
3404
SDValue Neg =
3405
SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3406
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3407
Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3408
}
3409
// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3410
// (%b < %a) by swapping inputs and falling through.
3411
std::swap(LHS, RHS);
3412
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3413
IsRHSZero = RHSConst && RHSConst->isZero();
3414
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3415
[[fallthrough]];
3416
}
3417
case ISD::SETLT: {
3418
// (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3419
// (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3420
// (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3421
// Handle SETLT 1 (which is equivalent to SETLE 0).
3422
if (IsRHSOne) {
3423
if (CmpInGPR == ICGPR_NonExtIn)
3424
return SDValue();
3425
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3426
}
3427
3428
if (IsRHSZero) {
3429
SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3430
S->getI32Imm(31, dl) };
3431
return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3432
ShiftOps), 0);
3433
}
3434
3435
if (CmpInGPR == ICGPR_NonExtIn)
3436
return SDValue();
3437
// The upper 32-bits of the register can't be undefined for this sequence.
3438
LHS = signExtendInputIfNeeded(LHS);
3439
RHS = signExtendInputIfNeeded(RHS);
3440
SDValue SUBFNode =
3441
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3442
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3443
SUBFNode, S->getI64Imm(1, dl),
3444
S->getI64Imm(63, dl)), 0);
3445
}
3446
case ISD::SETUGE:
3447
// (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3448
// (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3449
std::swap(LHS, RHS);
3450
[[fallthrough]];
3451
case ISD::SETULE: {
3452
if (CmpInGPR == ICGPR_NonExtIn)
3453
return SDValue();
3454
// The upper 32-bits of the register can't be undefined for this sequence.
3455
LHS = zeroExtendInputIfNeeded(LHS);
3456
RHS = zeroExtendInputIfNeeded(RHS);
3457
SDValue Subtract =
3458
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3459
SDValue SrdiNode =
3460
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3461
Subtract, S->getI64Imm(1, dl),
3462
S->getI64Imm(63, dl)), 0);
3463
return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3464
S->getI32Imm(1, dl)), 0);
3465
}
3466
case ISD::SETUGT:
3467
// (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3468
// (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3469
std::swap(LHS, RHS);
3470
[[fallthrough]];
3471
case ISD::SETULT: {
3472
if (CmpInGPR == ICGPR_NonExtIn)
3473
return SDValue();
3474
// The upper 32-bits of the register can't be undefined for this sequence.
3475
LHS = zeroExtendInputIfNeeded(LHS);
3476
RHS = zeroExtendInputIfNeeded(RHS);
3477
SDValue Subtract =
3478
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3479
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3480
Subtract, S->getI64Imm(1, dl),
3481
S->getI64Imm(63, dl)), 0);
3482
}
3483
}
3484
}
3485
3486
/// Produces a sign-extended result of comparing two 32-bit values according to
3487
/// the passed condition code.
3488
SDValue
3489
IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3490
ISD::CondCode CC,
3491
int64_t RHSValue, SDLoc dl) {
3492
if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3493
CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3494
return SDValue();
3495
bool IsRHSZero = RHSValue == 0;
3496
bool IsRHSOne = RHSValue == 1;
3497
bool IsRHSNegOne = RHSValue == -1LL;
3498
3499
switch (CC) {
3500
default: return SDValue();
3501
case ISD::SETEQ: {
3502
// (sext (setcc %a, %b, seteq)) ->
3503
// (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3504
// (sext (setcc %a, 0, seteq)) ->
3505
// (ashr (shl (ctlz %a), 58), 63)
3506
SDValue CountInput = IsRHSZero ? LHS :
3507
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3508
SDValue Cntlzw =
3509
SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3510
SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3511
S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3512
SDValue Slwi =
3513
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3514
return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3515
}
3516
case ISD::SETNE: {
3517
// Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3518
// flip the bit, finally take 2's complement.
3519
// (sext (setcc %a, %b, setne)) ->
3520
// (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3521
// Same as above, but the first xor is not needed.
3522
// (sext (setcc %a, 0, setne)) ->
3523
// (neg (xor (lshr (ctlz %a), 5), 1))
3524
SDValue Xor = IsRHSZero ? LHS :
3525
SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3526
SDValue Clz =
3527
SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3528
SDValue ShiftOps[] =
3529
{ Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3530
SDValue Shift =
3531
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3532
SDValue Xori =
3533
SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3534
S->getI32Imm(1, dl)), 0);
3535
return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3536
}
3537
case ISD::SETGE: {
3538
// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3539
// (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3540
if (IsRHSZero)
3541
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3542
3543
// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3544
// by swapping inputs and falling through.
3545
std::swap(LHS, RHS);
3546
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3547
IsRHSZero = RHSConst && RHSConst->isZero();
3548
[[fallthrough]];
3549
}
3550
case ISD::SETLE: {
3551
if (CmpInGPR == ICGPR_NonExtIn)
3552
return SDValue();
3553
// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3554
// (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3555
if (IsRHSZero)
3556
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3557
3558
// The upper 32-bits of the register can't be undefined for this sequence.
3559
LHS = signExtendInputIfNeeded(LHS);
3560
RHS = signExtendInputIfNeeded(RHS);
3561
SDValue SUBFNode =
3562
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3563
LHS, RHS), 0);
3564
SDValue Srdi =
3565
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3566
SUBFNode, S->getI64Imm(1, dl),
3567
S->getI64Imm(63, dl)), 0);
3568
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3569
S->getI32Imm(-1, dl)), 0);
3570
}
3571
case ISD::SETGT: {
3572
// (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3573
// (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3574
// (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3575
if (IsRHSNegOne)
3576
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3577
if (IsRHSZero) {
3578
if (CmpInGPR == ICGPR_NonExtIn)
3579
return SDValue();
3580
// The upper 32-bits of the register can't be undefined for this sequence.
3581
LHS = signExtendInputIfNeeded(LHS);
3582
RHS = signExtendInputIfNeeded(RHS);
3583
SDValue Neg =
3584
SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3585
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3586
S->getI64Imm(63, dl)), 0);
3587
}
3588
// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3589
// (%b < %a) by swapping inputs and falling through.
3590
std::swap(LHS, RHS);
3591
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3592
IsRHSZero = RHSConst && RHSConst->isZero();
3593
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3594
[[fallthrough]];
3595
}
3596
case ISD::SETLT: {
3597
// (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3598
// (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3599
// (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3600
if (IsRHSOne) {
3601
if (CmpInGPR == ICGPR_NonExtIn)
3602
return SDValue();
3603
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3604
}
3605
if (IsRHSZero)
3606
return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3607
S->getI32Imm(31, dl)), 0);
3608
3609
if (CmpInGPR == ICGPR_NonExtIn)
3610
return SDValue();
3611
// The upper 32-bits of the register can't be undefined for this sequence.
3612
LHS = signExtendInputIfNeeded(LHS);
3613
RHS = signExtendInputIfNeeded(RHS);
3614
SDValue SUBFNode =
3615
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3616
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3617
SUBFNode, S->getI64Imm(63, dl)), 0);
3618
}
3619
case ISD::SETUGE:
3620
// (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3621
// (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3622
std::swap(LHS, RHS);
3623
[[fallthrough]];
3624
case ISD::SETULE: {
3625
if (CmpInGPR == ICGPR_NonExtIn)
3626
return SDValue();
3627
// The upper 32-bits of the register can't be undefined for this sequence.
3628
LHS = zeroExtendInputIfNeeded(LHS);
3629
RHS = zeroExtendInputIfNeeded(RHS);
3630
SDValue Subtract =
3631
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3632
SDValue Shift =
3633
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3634
S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3635
0);
3636
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3637
S->getI32Imm(-1, dl)), 0);
3638
}
3639
case ISD::SETUGT:
3640
// (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3641
// (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3642
std::swap(LHS, RHS);
3643
[[fallthrough]];
3644
case ISD::SETULT: {
3645
if (CmpInGPR == ICGPR_NonExtIn)
3646
return SDValue();
3647
// The upper 32-bits of the register can't be undefined for this sequence.
3648
LHS = zeroExtendInputIfNeeded(LHS);
3649
RHS = zeroExtendInputIfNeeded(RHS);
3650
SDValue Subtract =
3651
SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3652
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3653
Subtract, S->getI64Imm(63, dl)), 0);
3654
}
3655
}
3656
}
3657
3658
/// Produces a zero-extended result of comparing two 64-bit values according to
3659
/// the passed condition code.
3660
SDValue
3661
IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3662
ISD::CondCode CC,
3663
int64_t RHSValue, SDLoc dl) {
3664
if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3665
CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3666
return SDValue();
3667
bool IsRHSZero = RHSValue == 0;
3668
bool IsRHSOne = RHSValue == 1;
3669
bool IsRHSNegOne = RHSValue == -1LL;
3670
switch (CC) {
3671
default: return SDValue();
3672
case ISD::SETEQ: {
3673
// (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3674
// (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3675
SDValue Xor = IsRHSZero ? LHS :
3676
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3677
SDValue Clz =
3678
SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3679
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3680
S->getI64Imm(58, dl),
3681
S->getI64Imm(63, dl)), 0);
3682
}
3683
case ISD::SETNE: {
3684
// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3685
// (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3686
// {addcz.reg, addcz.CA} = (addcarry %a, -1)
3687
// (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3688
SDValue Xor = IsRHSZero ? LHS :
3689
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3690
SDValue AC =
3691
SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3692
Xor, S->getI32Imm(~0U, dl)), 0);
3693
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3694
Xor, AC.getValue(1)), 0);
3695
}
3696
case ISD::SETGE: {
3697
// {subc.reg, subc.CA} = (subcarry %a, %b)
3698
// (zext (setcc %a, %b, setge)) ->
3699
// (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3700
// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3701
if (IsRHSZero)
3702
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3703
std::swap(LHS, RHS);
3704
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3705
IsRHSZero = RHSConst && RHSConst->isZero();
3706
[[fallthrough]];
3707
}
3708
case ISD::SETLE: {
3709
// {subc.reg, subc.CA} = (subcarry %b, %a)
3710
// (zext (setcc %a, %b, setge)) ->
3711
// (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3712
// (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3713
if (IsRHSZero)
3714
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3715
SDValue ShiftL =
3716
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3717
S->getI64Imm(1, dl),
3718
S->getI64Imm(63, dl)), 0);
3719
SDValue ShiftR =
3720
SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3721
S->getI64Imm(63, dl)), 0);
3722
SDValue SubtractCarry =
3723
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3724
LHS, RHS), 1);
3725
return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3726
ShiftR, ShiftL, SubtractCarry), 0);
3727
}
3728
case ISD::SETGT: {
3729
// {subc.reg, subc.CA} = (subcarry %b, %a)
3730
// (zext (setcc %a, %b, setgt)) ->
3731
// (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3732
// (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3733
if (IsRHSNegOne)
3734
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3735
if (IsRHSZero) {
3736
SDValue Addi =
3737
SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3738
S->getI64Imm(~0ULL, dl)), 0);
3739
SDValue Nor =
3740
SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3741
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3742
S->getI64Imm(1, dl),
3743
S->getI64Imm(63, dl)), 0);
3744
}
3745
std::swap(LHS, RHS);
3746
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3747
IsRHSZero = RHSConst && RHSConst->isZero();
3748
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3749
[[fallthrough]];
3750
}
3751
case ISD::SETLT: {
3752
// {subc.reg, subc.CA} = (subcarry %a, %b)
3753
// (zext (setcc %a, %b, setlt)) ->
3754
// (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3755
// (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3756
if (IsRHSOne)
3757
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3758
if (IsRHSZero)
3759
return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3760
S->getI64Imm(1, dl),
3761
S->getI64Imm(63, dl)), 0);
3762
SDValue SRADINode =
3763
SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3764
LHS, S->getI64Imm(63, dl)), 0);
3765
SDValue SRDINode =
3766
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3767
RHS, S->getI64Imm(1, dl),
3768
S->getI64Imm(63, dl)), 0);
3769
SDValue SUBFC8Carry =
3770
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3771
RHS, LHS), 1);
3772
SDValue ADDE8Node =
3773
SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3774
SRDINode, SRADINode, SUBFC8Carry), 0);
3775
return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3776
ADDE8Node, S->getI64Imm(1, dl)), 0);
3777
}
3778
case ISD::SETUGE:
3779
// {subc.reg, subc.CA} = (subcarry %a, %b)
3780
// (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3781
std::swap(LHS, RHS);
3782
[[fallthrough]];
3783
case ISD::SETULE: {
3784
// {subc.reg, subc.CA} = (subcarry %b, %a)
3785
// (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3786
SDValue SUBFC8Carry =
3787
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3788
LHS, RHS), 1);
3789
SDValue SUBFE8Node =
3790
SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3791
LHS, LHS, SUBFC8Carry), 0);
3792
return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3793
SUBFE8Node, S->getI64Imm(1, dl)), 0);
3794
}
3795
case ISD::SETUGT:
3796
// {subc.reg, subc.CA} = (subcarry %b, %a)
3797
// (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3798
std::swap(LHS, RHS);
3799
[[fallthrough]];
3800
case ISD::SETULT: {
3801
// {subc.reg, subc.CA} = (subcarry %a, %b)
3802
// (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3803
SDValue SubtractCarry =
3804
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3805
RHS, LHS), 1);
3806
SDValue ExtSub =
3807
SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3808
LHS, LHS, SubtractCarry), 0);
3809
return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3810
ExtSub), 0);
3811
}
3812
}
3813
}
3814
3815
/// Produces a sign-extended result of comparing two 64-bit values according to
3816
/// the passed condition code.
3817
SDValue
3818
IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3819
ISD::CondCode CC,
3820
int64_t RHSValue, SDLoc dl) {
3821
if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3822
CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3823
return SDValue();
3824
bool IsRHSZero = RHSValue == 0;
3825
bool IsRHSOne = RHSValue == 1;
3826
bool IsRHSNegOne = RHSValue == -1LL;
3827
switch (CC) {
3828
default: return SDValue();
3829
case ISD::SETEQ: {
3830
// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3831
// (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3832
// {addcz.reg, addcz.CA} = (addcarry %a, -1)
3833
// (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3834
SDValue AddInput = IsRHSZero ? LHS :
3835
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3836
SDValue Addic =
3837
SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3838
AddInput, S->getI32Imm(~0U, dl)), 0);
3839
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3840
Addic, Addic.getValue(1)), 0);
3841
}
3842
case ISD::SETNE: {
3843
// {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3844
// (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3845
// {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3846
// (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3847
SDValue Xor = IsRHSZero ? LHS :
3848
SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3849
SDValue SC =
3850
SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3851
Xor, S->getI32Imm(0, dl)), 0);
3852
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3853
SC, SC.getValue(1)), 0);
3854
}
3855
case ISD::SETGE: {
3856
// {subc.reg, subc.CA} = (subcarry %a, %b)
3857
// (zext (setcc %a, %b, setge)) ->
3858
// (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3859
// (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3860
if (IsRHSZero)
3861
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3862
std::swap(LHS, RHS);
3863
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3864
IsRHSZero = RHSConst && RHSConst->isZero();
3865
[[fallthrough]];
3866
}
3867
case ISD::SETLE: {
3868
// {subc.reg, subc.CA} = (subcarry %b, %a)
3869
// (zext (setcc %a, %b, setge)) ->
3870
// (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3871
// (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3872
if (IsRHSZero)
3873
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3874
SDValue ShiftR =
3875
SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3876
S->getI64Imm(63, dl)), 0);
3877
SDValue ShiftL =
3878
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3879
S->getI64Imm(1, dl),
3880
S->getI64Imm(63, dl)), 0);
3881
SDValue SubtractCarry =
3882
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3883
LHS, RHS), 1);
3884
SDValue Adde =
3885
SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3886
ShiftR, ShiftL, SubtractCarry), 0);
3887
return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3888
}
3889
case ISD::SETGT: {
3890
// {subc.reg, subc.CA} = (subcarry %b, %a)
3891
// (zext (setcc %a, %b, setgt)) ->
3892
// -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3893
// (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3894
if (IsRHSNegOne)
3895
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3896
if (IsRHSZero) {
3897
SDValue Add =
3898
SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3899
S->getI64Imm(-1, dl)), 0);
3900
SDValue Nor =
3901
SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3902
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3903
S->getI64Imm(63, dl)), 0);
3904
}
3905
std::swap(LHS, RHS);
3906
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3907
IsRHSZero = RHSConst && RHSConst->isZero();
3908
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3909
[[fallthrough]];
3910
}
3911
case ISD::SETLT: {
3912
// {subc.reg, subc.CA} = (subcarry %a, %b)
3913
// (zext (setcc %a, %b, setlt)) ->
3914
// -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3915
// (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3916
if (IsRHSOne)
3917
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3918
if (IsRHSZero) {
3919
return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3920
S->getI64Imm(63, dl)), 0);
3921
}
3922
SDValue SRADINode =
3923
SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3924
LHS, S->getI64Imm(63, dl)), 0);
3925
SDValue SRDINode =
3926
SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3927
RHS, S->getI64Imm(1, dl),
3928
S->getI64Imm(63, dl)), 0);
3929
SDValue SUBFC8Carry =
3930
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3931
RHS, LHS), 1);
3932
SDValue ADDE8Node =
3933
SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3934
SRDINode, SRADINode, SUBFC8Carry), 0);
3935
SDValue XORI8Node =
3936
SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3937
ADDE8Node, S->getI64Imm(1, dl)), 0);
3938
return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3939
XORI8Node), 0);
3940
}
3941
case ISD::SETUGE:
3942
// {subc.reg, subc.CA} = (subcarry %a, %b)
3943
// (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3944
std::swap(LHS, RHS);
3945
[[fallthrough]];
3946
case ISD::SETULE: {
3947
// {subc.reg, subc.CA} = (subcarry %b, %a)
3948
// (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3949
SDValue SubtractCarry =
3950
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3951
LHS, RHS), 1);
3952
SDValue ExtSub =
3953
SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3954
LHS, SubtractCarry), 0);
3955
return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3956
ExtSub, ExtSub), 0);
3957
}
3958
case ISD::SETUGT:
3959
// {subc.reg, subc.CA} = (subcarry %b, %a)
3960
// (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3961
std::swap(LHS, RHS);
3962
[[fallthrough]];
3963
case ISD::SETULT: {
3964
// {subc.reg, subc.CA} = (subcarry %a, %b)
3965
// (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3966
SDValue SubCarry =
3967
SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3968
RHS, LHS), 1);
3969
return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3970
LHS, LHS, SubCarry), 0);
3971
}
3972
}
3973
}
3974
3975
/// Do all uses of this SDValue need the result in a GPR?
3976
/// This is meant to be used on values that have type i1 since
3977
/// it is somewhat meaningless to ask if values of other types
3978
/// should be kept in GPR's.
3979
static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3980
assert(Compare.getOpcode() == ISD::SETCC &&
3981
"An ISD::SETCC node required here.");
3982
3983
// For values that have a single use, the caller should obviously already have
3984
// checked if that use is an extending use. We check the other uses here.
3985
if (Compare.hasOneUse())
3986
return true;
3987
// We want the value in a GPR if it is being extended, used for a select, or
3988
// used in logical operations.
3989
for (auto *CompareUse : Compare.getNode()->uses())
3990
if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3991
CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3992
CompareUse->getOpcode() != ISD::SELECT &&
3993
!ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3994
OmittedForNonExtendUses++;
3995
return false;
3996
}
3997
return true;
3998
}
3999
4000
/// Returns an equivalent of a SETCC node but with the result the same width as
4001
/// the inputs. This can also be used for SELECT_CC if either the true or false
4002
/// values is a power of two while the other is zero.
4003
SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4004
SetccInGPROpts ConvOpts) {
4005
assert((Compare.getOpcode() == ISD::SETCC ||
4006
Compare.getOpcode() == ISD::SELECT_CC) &&
4007
"An ISD::SETCC node required here.");
4008
4009
// Don't convert this comparison to a GPR sequence because there are uses
4010
// of the i1 result (i.e. uses that require the result in the CR).
4011
if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4012
return SDValue();
4013
4014
SDValue LHS = Compare.getOperand(0);
4015
SDValue RHS = Compare.getOperand(1);
4016
4017
// The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4018
int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4019
ISD::CondCode CC =
4020
cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4021
EVT InputVT = LHS.getValueType();
4022
if (InputVT != MVT::i32 && InputVT != MVT::i64)
4023
return SDValue();
4024
4025
if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4026
ConvOpts == SetccInGPROpts::SExtInvert)
4027
CC = ISD::getSetCCInverse(CC, InputVT);
4028
4029
bool Inputs32Bit = InputVT == MVT::i32;
4030
4031
SDLoc dl(Compare);
4032
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4033
int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4034
bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4035
ConvOpts == SetccInGPROpts::SExtInvert;
4036
4037
if (IsSext && Inputs32Bit)
4038
return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4039
else if (Inputs32Bit)
4040
return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4041
else if (IsSext)
4042
return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4043
return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4044
}
4045
4046
} // end anonymous namespace
4047
4048
bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4049
if (N->getValueType(0) != MVT::i32 &&
4050
N->getValueType(0) != MVT::i64)
4051
return false;
4052
4053
// This optimization will emit code that assumes 64-bit registers
4054
// so we don't want to run it in 32-bit mode. Also don't run it
4055
// on functions that are not to be optimized.
4056
if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4057
return false;
4058
4059
// For POWER10, it is more profitable to use the set boolean extension
4060
// instructions rather than the integer compare elimination codegen.
4061
// Users can override this via the command line option, `--ppc-gpr-icmps`.
4062
if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4063
return false;
4064
4065
switch (N->getOpcode()) {
4066
default: break;
4067
case ISD::ZERO_EXTEND:
4068
case ISD::SIGN_EXTEND:
4069
case ISD::AND:
4070
case ISD::OR:
4071
case ISD::XOR: {
4072
IntegerCompareEliminator ICmpElim(CurDAG, this);
4073
if (SDNode *New = ICmpElim.Select(N)) {
4074
ReplaceNode(N, New);
4075
return true;
4076
}
4077
}
4078
}
4079
return false;
4080
}
4081
4082
bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4083
if (N->getValueType(0) != MVT::i32 &&
4084
N->getValueType(0) != MVT::i64)
4085
return false;
4086
4087
if (!UseBitPermRewriter)
4088
return false;
4089
4090
switch (N->getOpcode()) {
4091
default: break;
4092
case ISD::SRL:
4093
// If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4094
// uses the BRH instruction.
4095
if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4096
N->getOperand(0).getOpcode() == ISD::BSWAP) {
4097
auto &OpRight = N->getOperand(1);
4098
ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4099
if (SRLConst && SRLConst->getSExtValue() == 16)
4100
return false;
4101
}
4102
[[fallthrough]];
4103
case ISD::ROTL:
4104
case ISD::SHL:
4105
case ISD::AND:
4106
case ISD::OR: {
4107
BitPermutationSelector BPS(CurDAG);
4108
if (SDNode *New = BPS.Select(N)) {
4109
ReplaceNode(N, New);
4110
return true;
4111
}
4112
return false;
4113
}
4114
}
4115
4116
return false;
4117
}
4118
4119
/// SelectCC - Select a comparison of the specified values with the specified
4120
/// condition code, returning the CR# of the expression.
4121
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4122
const SDLoc &dl, SDValue Chain) {
4123
// Always select the LHS.
4124
unsigned Opc;
4125
4126
if (LHS.getValueType() == MVT::i32) {
4127
unsigned Imm;
4128
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4129
if (isInt32Immediate(RHS, Imm)) {
4130
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
4131
if (isUInt<16>(Imm))
4132
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4133
getI32Imm(Imm & 0xFFFF, dl)),
4134
0);
4135
// If this is a 16-bit signed immediate, fold it.
4136
if (isInt<16>((int)Imm))
4137
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4138
getI32Imm(Imm & 0xFFFF, dl)),
4139
0);
4140
4141
// For non-equality comparisons, the default code would materialize the
4142
// constant, then compare against it, like this:
4143
// lis r2, 4660
4144
// ori r2, r2, 22136
4145
// cmpw cr0, r3, r2
4146
// Since we are just comparing for equality, we can emit this instead:
4147
// xoris r0,r3,0x1234
4148
// cmplwi cr0,r0,0x5678
4149
// beq cr0,L6
4150
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4151
getI32Imm(Imm >> 16, dl)), 0);
4152
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4153
getI32Imm(Imm & 0xFFFF, dl)), 0);
4154
}
4155
Opc = PPC::CMPLW;
4156
} else if (ISD::isUnsignedIntSetCC(CC)) {
4157
if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4158
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4159
getI32Imm(Imm & 0xFFFF, dl)), 0);
4160
Opc = PPC::CMPLW;
4161
} else {
4162
int16_t SImm;
4163
if (isIntS16Immediate(RHS, SImm))
4164
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4165
getI32Imm((int)SImm & 0xFFFF,
4166
dl)),
4167
0);
4168
Opc = PPC::CMPW;
4169
}
4170
} else if (LHS.getValueType() == MVT::i64) {
4171
uint64_t Imm;
4172
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4173
if (isInt64Immediate(RHS.getNode(), Imm)) {
4174
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
4175
if (isUInt<16>(Imm))
4176
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4177
getI32Imm(Imm & 0xFFFF, dl)),
4178
0);
4179
// If this is a 16-bit signed immediate, fold it.
4180
if (isInt<16>(Imm))
4181
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4182
getI32Imm(Imm & 0xFFFF, dl)),
4183
0);
4184
4185
// For non-equality comparisons, the default code would materialize the
4186
// constant, then compare against it, like this:
4187
// lis r2, 4660
4188
// ori r2, r2, 22136
4189
// cmpd cr0, r3, r2
4190
// Since we are just comparing for equality, we can emit this instead:
4191
// xoris r0,r3,0x1234
4192
// cmpldi cr0,r0,0x5678
4193
// beq cr0,L6
4194
if (isUInt<32>(Imm)) {
4195
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4196
getI64Imm(Imm >> 16, dl)), 0);
4197
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4198
getI64Imm(Imm & 0xFFFF, dl)),
4199
0);
4200
}
4201
}
4202
Opc = PPC::CMPLD;
4203
} else if (ISD::isUnsignedIntSetCC(CC)) {
4204
if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4205
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4206
getI64Imm(Imm & 0xFFFF, dl)), 0);
4207
Opc = PPC::CMPLD;
4208
} else {
4209
int16_t SImm;
4210
if (isIntS16Immediate(RHS, SImm))
4211
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4212
getI64Imm(SImm & 0xFFFF, dl)),
4213
0);
4214
Opc = PPC::CMPD;
4215
}
4216
} else if (LHS.getValueType() == MVT::f32) {
4217
if (Subtarget->hasSPE()) {
4218
switch (CC) {
4219
default:
4220
case ISD::SETEQ:
4221
case ISD::SETNE:
4222
Opc = PPC::EFSCMPEQ;
4223
break;
4224
case ISD::SETLT:
4225
case ISD::SETGE:
4226
case ISD::SETOLT:
4227
case ISD::SETOGE:
4228
case ISD::SETULT:
4229
case ISD::SETUGE:
4230
Opc = PPC::EFSCMPLT;
4231
break;
4232
case ISD::SETGT:
4233
case ISD::SETLE:
4234
case ISD::SETOGT:
4235
case ISD::SETOLE:
4236
case ISD::SETUGT:
4237
case ISD::SETULE:
4238
Opc = PPC::EFSCMPGT;
4239
break;
4240
}
4241
} else
4242
Opc = PPC::FCMPUS;
4243
} else if (LHS.getValueType() == MVT::f64) {
4244
if (Subtarget->hasSPE()) {
4245
switch (CC) {
4246
default:
4247
case ISD::SETEQ:
4248
case ISD::SETNE:
4249
Opc = PPC::EFDCMPEQ;
4250
break;
4251
case ISD::SETLT:
4252
case ISD::SETGE:
4253
case ISD::SETOLT:
4254
case ISD::SETOGE:
4255
case ISD::SETULT:
4256
case ISD::SETUGE:
4257
Opc = PPC::EFDCMPLT;
4258
break;
4259
case ISD::SETGT:
4260
case ISD::SETLE:
4261
case ISD::SETOGT:
4262
case ISD::SETOLE:
4263
case ISD::SETUGT:
4264
case ISD::SETULE:
4265
Opc = PPC::EFDCMPGT;
4266
break;
4267
}
4268
} else
4269
Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4270
} else {
4271
assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4272
assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4273
Opc = PPC::XSCMPUQP;
4274
}
4275
if (Chain)
4276
return SDValue(
4277
CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4278
0);
4279
else
4280
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4281
}
4282
4283
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
4284
const PPCSubtarget *Subtarget) {
4285
// For SPE instructions, the result is in GT bit of the CR
4286
bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4287
4288
switch (CC) {
4289
case ISD::SETUEQ:
4290
case ISD::SETONE:
4291
case ISD::SETOLE:
4292
case ISD::SETOGE:
4293
llvm_unreachable("Should be lowered by legalize!");
4294
default: llvm_unreachable("Unknown condition!");
4295
case ISD::SETOEQ:
4296
case ISD::SETEQ:
4297
return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4298
case ISD::SETUNE:
4299
case ISD::SETNE:
4300
return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4301
case ISD::SETOLT:
4302
case ISD::SETLT:
4303
return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4304
case ISD::SETULE:
4305
case ISD::SETLE:
4306
return PPC::PRED_LE;
4307
case ISD::SETOGT:
4308
case ISD::SETGT:
4309
return PPC::PRED_GT;
4310
case ISD::SETUGE:
4311
case ISD::SETGE:
4312
return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4313
case ISD::SETO: return PPC::PRED_NU;
4314
case ISD::SETUO: return PPC::PRED_UN;
4315
// These two are invalid for floating point. Assume we have int.
4316
case ISD::SETULT: return PPC::PRED_LT;
4317
case ISD::SETUGT: return PPC::PRED_GT;
4318
}
4319
}
4320
4321
/// getCRIdxForSetCC - Return the index of the condition register field
4322
/// associated with the SetCC condition, and whether or not the field is
4323
/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4324
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4325
Invert = false;
4326
switch (CC) {
4327
default: llvm_unreachable("Unknown condition!");
4328
case ISD::SETOLT:
4329
case ISD::SETLT: return 0; // Bit #0 = SETOLT
4330
case ISD::SETOGT:
4331
case ISD::SETGT: return 1; // Bit #1 = SETOGT
4332
case ISD::SETOEQ:
4333
case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4334
case ISD::SETUO: return 3; // Bit #3 = SETUO
4335
case ISD::SETUGE:
4336
case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4337
case ISD::SETULE:
4338
case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4339
case ISD::SETUNE:
4340
case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4341
case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4342
case ISD::SETUEQ:
4343
case ISD::SETOGE:
4344
case ISD::SETOLE:
4345
case ISD::SETONE:
4346
llvm_unreachable("Invalid branch code: should be expanded by legalize");
4347
// These are invalid for floating point. Assume integer.
4348
case ISD::SETULT: return 0;
4349
case ISD::SETUGT: return 1;
4350
}
4351
}
4352
4353
// getVCmpInst: return the vector compare instruction for the specified
4354
// vector type and condition code. Since this is for altivec specific code,
4355
// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4356
// and v4f32).
4357
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4358
bool HasVSX, bool &Swap, bool &Negate) {
4359
Swap = false;
4360
Negate = false;
4361
4362
if (VecVT.isFloatingPoint()) {
4363
/* Handle some cases by swapping input operands. */
4364
switch (CC) {
4365
case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4366
case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4367
case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4368
case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4369
case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4370
case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4371
default: break;
4372
}
4373
/* Handle some cases by negating the result. */
4374
switch (CC) {
4375
case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4376
case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4377
case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4378
case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4379
default: break;
4380
}
4381
/* We have instructions implementing the remaining cases. */
4382
switch (CC) {
4383
case ISD::SETEQ:
4384
case ISD::SETOEQ:
4385
if (VecVT == MVT::v4f32)
4386
return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4387
else if (VecVT == MVT::v2f64)
4388
return PPC::XVCMPEQDP;
4389
break;
4390
case ISD::SETGT:
4391
case ISD::SETOGT:
4392
if (VecVT == MVT::v4f32)
4393
return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4394
else if (VecVT == MVT::v2f64)
4395
return PPC::XVCMPGTDP;
4396
break;
4397
case ISD::SETGE:
4398
case ISD::SETOGE:
4399
if (VecVT == MVT::v4f32)
4400
return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4401
else if (VecVT == MVT::v2f64)
4402
return PPC::XVCMPGEDP;
4403
break;
4404
default:
4405
break;
4406
}
4407
llvm_unreachable("Invalid floating-point vector compare condition");
4408
} else {
4409
/* Handle some cases by swapping input operands. */
4410
switch (CC) {
4411
case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4412
case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4413
case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4414
case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4415
default: break;
4416
}
4417
/* Handle some cases by negating the result. */
4418
switch (CC) {
4419
case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4420
case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4421
case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4422
case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4423
default: break;
4424
}
4425
/* We have instructions implementing the remaining cases. */
4426
switch (CC) {
4427
case ISD::SETEQ:
4428
case ISD::SETUEQ:
4429
if (VecVT == MVT::v16i8)
4430
return PPC::VCMPEQUB;
4431
else if (VecVT == MVT::v8i16)
4432
return PPC::VCMPEQUH;
4433
else if (VecVT == MVT::v4i32)
4434
return PPC::VCMPEQUW;
4435
else if (VecVT == MVT::v2i64)
4436
return PPC::VCMPEQUD;
4437
else if (VecVT == MVT::v1i128)
4438
return PPC::VCMPEQUQ;
4439
break;
4440
case ISD::SETGT:
4441
if (VecVT == MVT::v16i8)
4442
return PPC::VCMPGTSB;
4443
else if (VecVT == MVT::v8i16)
4444
return PPC::VCMPGTSH;
4445
else if (VecVT == MVT::v4i32)
4446
return PPC::VCMPGTSW;
4447
else if (VecVT == MVT::v2i64)
4448
return PPC::VCMPGTSD;
4449
else if (VecVT == MVT::v1i128)
4450
return PPC::VCMPGTSQ;
4451
break;
4452
case ISD::SETUGT:
4453
if (VecVT == MVT::v16i8)
4454
return PPC::VCMPGTUB;
4455
else if (VecVT == MVT::v8i16)
4456
return PPC::VCMPGTUH;
4457
else if (VecVT == MVT::v4i32)
4458
return PPC::VCMPGTUW;
4459
else if (VecVT == MVT::v2i64)
4460
return PPC::VCMPGTUD;
4461
else if (VecVT == MVT::v1i128)
4462
return PPC::VCMPGTUQ;
4463
break;
4464
default:
4465
break;
4466
}
4467
llvm_unreachable("Invalid integer vector compare condition");
4468
}
4469
}
4470
4471
bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4472
SDLoc dl(N);
4473
unsigned Imm;
4474
bool IsStrict = N->isStrictFPOpcode();
4475
ISD::CondCode CC =
4476
cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4477
EVT PtrVT =
4478
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4479
bool isPPC64 = (PtrVT == MVT::i64);
4480
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4481
4482
SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4483
SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4484
4485
if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4486
// We can codegen setcc op, imm very efficiently compared to a brcond.
4487
// Check for those cases here.
4488
// setcc op, 0
4489
if (Imm == 0) {
4490
SDValue Op = LHS;
4491
switch (CC) {
4492
default: break;
4493
case ISD::SETEQ: {
4494
Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4495
SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4496
getI32Imm(31, dl) };
4497
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4498
return true;
4499
}
4500
case ISD::SETNE: {
4501
if (isPPC64) break;
4502
SDValue AD =
4503
SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4504
Op, getI32Imm(~0U, dl)), 0);
4505
CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4506
return true;
4507
}
4508
case ISD::SETLT: {
4509
SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4510
getI32Imm(31, dl) };
4511
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4512
return true;
4513
}
4514
case ISD::SETGT: {
4515
SDValue T =
4516
SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4517
T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4518
SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4519
getI32Imm(31, dl) };
4520
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4521
return true;
4522
}
4523
}
4524
} else if (Imm == ~0U) { // setcc op, -1
4525
SDValue Op = LHS;
4526
switch (CC) {
4527
default: break;
4528
case ISD::SETEQ:
4529
if (isPPC64) break;
4530
Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4531
Op, getI32Imm(1, dl)), 0);
4532
CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4533
SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4534
MVT::i32,
4535
getI32Imm(0, dl)),
4536
0), Op.getValue(1));
4537
return true;
4538
case ISD::SETNE: {
4539
if (isPPC64) break;
4540
Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4541
SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4542
Op, getI32Imm(~0U, dl));
4543
CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4544
SDValue(AD, 1));
4545
return true;
4546
}
4547
case ISD::SETLT: {
4548
SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4549
getI32Imm(1, dl)), 0);
4550
SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4551
Op), 0);
4552
SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4553
getI32Imm(31, dl) };
4554
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4555
return true;
4556
}
4557
case ISD::SETGT: {
4558
SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4559
getI32Imm(31, dl) };
4560
Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4561
CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4562
return true;
4563
}
4564
}
4565
}
4566
}
4567
4568
// Altivec Vector compare instructions do not set any CR register by default and
4569
// vector compare operations return the same type as the operands.
4570
if (!IsStrict && LHS.getValueType().isVector()) {
4571
if (Subtarget->hasSPE())
4572
return false;
4573
4574
EVT VecVT = LHS.getValueType();
4575
bool Swap, Negate;
4576
unsigned int VCmpInst =
4577
getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4578
if (Swap)
4579
std::swap(LHS, RHS);
4580
4581
EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4582
if (Negate) {
4583
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4584
CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4585
ResVT, VCmp, VCmp);
4586
return true;
4587
}
4588
4589
CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4590
return true;
4591
}
4592
4593
if (Subtarget->useCRBits())
4594
return false;
4595
4596
bool Inv;
4597
unsigned Idx = getCRIdxForSetCC(CC, Inv);
4598
SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4599
if (IsStrict)
4600
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4601
SDValue IntCR;
4602
4603
// SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4604
// The correct compare instruction is already set by SelectCC()
4605
if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4606
Idx = 1;
4607
}
4608
4609
// Force the ccreg into CR7.
4610
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4611
4612
SDValue InGlue; // Null incoming flag value.
4613
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4614
InGlue).getValue(1);
4615
4616
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4617
CCReg), 0);
4618
4619
SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4620
getI32Imm(31, dl), getI32Imm(31, dl) };
4621
if (!Inv) {
4622
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4623
return true;
4624
}
4625
4626
// Get the specified bit.
4627
SDValue Tmp =
4628
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4629
CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4630
return true;
4631
}
4632
4633
/// Does this node represent a load/store node whose address can be represented
4634
/// with a register plus an immediate that's a multiple of \p Val:
4635
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4636
LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4637
StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4638
MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4639
SDValue AddrOp;
4640
if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4641
AddrOp = N->getOperand(1);
4642
else if (STN)
4643
AddrOp = STN->getOperand(2);
4644
4645
// If the address points a frame object or a frame object with an offset,
4646
// we need to check the object alignment.
4647
short Imm = 0;
4648
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4649
AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4650
AddrOp)) {
4651
// If op0 is a frame index that is under aligned, we can't do it either,
4652
// because it is translated to r31 or r1 + slot + offset. We won't know the
4653
// slot number until the stack frame is finalized.
4654
const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4655
unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4656
if ((SlotAlign % Val) != 0)
4657
return false;
4658
4659
// If we have an offset, we need further check on the offset.
4660
if (AddrOp.getOpcode() != ISD::ADD)
4661
return true;
4662
}
4663
4664
if (AddrOp.getOpcode() == ISD::ADD)
4665
return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4666
4667
// If the address comes from the outside, the offset will be zero.
4668
return AddrOp.getOpcode() == ISD::CopyFromReg;
4669
}
4670
4671
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4672
// Transfer memoperands.
4673
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4674
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4675
}
4676
4677
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4678
bool &NeedSwapOps, bool &IsUnCmp) {
4679
4680
assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4681
4682
SDValue LHS = N->getOperand(0);
4683
SDValue RHS = N->getOperand(1);
4684
SDValue TrueRes = N->getOperand(2);
4685
SDValue FalseRes = N->getOperand(3);
4686
ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4687
if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4688
N->getSimpleValueType(0) != MVT::i32))
4689
return false;
4690
4691
// We are looking for any of:
4692
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4693
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4694
// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4695
// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4696
int64_t TrueResVal = TrueConst->getSExtValue();
4697
if ((TrueResVal < -1 || TrueResVal > 1) ||
4698
(TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4699
(TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4700
(TrueResVal == 0 &&
4701
(FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4702
return false;
4703
4704
SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4705
? FalseRes
4706
: FalseRes.getOperand(0);
4707
bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4708
if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4709
SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4710
return false;
4711
4712
// Without this setb optimization, the outer SELECT_CC will be manually
4713
// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4714
// transforms pseudo instruction to isel instruction. When there are more than
4715
// one use for result like zext/sext, with current optimization we only see
4716
// isel is replaced by setb but can't see any significant gain. Since
4717
// setb has longer latency than original isel, we should avoid this. Another
4718
// point is that setb requires comparison always kept, it can break the
4719
// opportunity to get the comparison away if we have in future.
4720
if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4721
return false;
4722
4723
SDValue InnerLHS = SetOrSelCC.getOperand(0);
4724
SDValue InnerRHS = SetOrSelCC.getOperand(1);
4725
ISD::CondCode InnerCC =
4726
cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4727
// If the inner comparison is a select_cc, make sure the true/false values are
4728
// 1/-1 and canonicalize it if needed.
4729
if (InnerIsSel) {
4730
ConstantSDNode *SelCCTrueConst =
4731
dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4732
ConstantSDNode *SelCCFalseConst =
4733
dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4734
if (!SelCCTrueConst || !SelCCFalseConst)
4735
return false;
4736
int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4737
int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4738
// The values must be -1/1 (requiring a swap) or 1/-1.
4739
if (SelCCTVal == -1 && SelCCFVal == 1) {
4740
std::swap(InnerLHS, InnerRHS);
4741
} else if (SelCCTVal != 1 || SelCCFVal != -1)
4742
return false;
4743
}
4744
4745
// Canonicalize unsigned case
4746
if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4747
IsUnCmp = true;
4748
InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4749
}
4750
4751
bool InnerSwapped = false;
4752
if (LHS == InnerRHS && RHS == InnerLHS)
4753
InnerSwapped = true;
4754
else if (LHS != InnerLHS || RHS != InnerRHS)
4755
return false;
4756
4757
switch (CC) {
4758
// (select_cc lhs, rhs, 0, \
4759
// (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4760
case ISD::SETEQ:
4761
if (!InnerIsSel)
4762
return false;
4763
if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4764
return false;
4765
NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4766
break;
4767
4768
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4769
// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4770
// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4771
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4772
// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4773
// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4774
case ISD::SETULT:
4775
if (!IsUnCmp && InnerCC != ISD::SETNE)
4776
return false;
4777
IsUnCmp = true;
4778
[[fallthrough]];
4779
case ISD::SETLT:
4780
if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4781
(InnerCC == ISD::SETLT && InnerSwapped))
4782
NeedSwapOps = (TrueResVal == 1);
4783
else
4784
return false;
4785
break;
4786
4787
// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4788
// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4789
// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4790
// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4791
// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4792
// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4793
case ISD::SETUGT:
4794
if (!IsUnCmp && InnerCC != ISD::SETNE)
4795
return false;
4796
IsUnCmp = true;
4797
[[fallthrough]];
4798
case ISD::SETGT:
4799
if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4800
(InnerCC == ISD::SETGT && InnerSwapped))
4801
NeedSwapOps = (TrueResVal == -1);
4802
else
4803
return false;
4804
break;
4805
4806
default:
4807
return false;
4808
}
4809
4810
LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4811
LLVM_DEBUG(N->dump());
4812
4813
return true;
4814
}
4815
4816
// Return true if it's a software square-root/divide operand.
4817
static bool isSWTestOp(SDValue N) {
4818
if (N.getOpcode() == PPCISD::FTSQRT)
4819
return true;
4820
if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4821
N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4822
return false;
4823
switch (N.getConstantOperandVal(0)) {
4824
case Intrinsic::ppc_vsx_xvtdivdp:
4825
case Intrinsic::ppc_vsx_xvtdivsp:
4826
case Intrinsic::ppc_vsx_xvtsqrtdp:
4827
case Intrinsic::ppc_vsx_xvtsqrtsp:
4828
return true;
4829
}
4830
return false;
4831
}
4832
4833
bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4834
assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4835
// We are looking for following patterns, where `truncate to i1` actually has
4836
// the same semantic with `and 1`.
4837
// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4838
// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4839
// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4840
// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4841
// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4842
// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4843
// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4844
// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4845
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4846
if (CC != ISD::SETEQ && CC != ISD::SETNE)
4847
return false;
4848
4849
SDValue CmpRHS = N->getOperand(3);
4850
if (!isNullConstant(CmpRHS))
4851
return false;
4852
4853
SDValue CmpLHS = N->getOperand(2);
4854
if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4855
return false;
4856
4857
unsigned PCC = 0;
4858
bool IsCCNE = CC == ISD::SETNE;
4859
if (CmpLHS.getOpcode() == ISD::AND &&
4860
isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4861
switch (CmpLHS.getConstantOperandVal(1)) {
4862
case 1:
4863
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4864
break;
4865
case 2:
4866
PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4867
break;
4868
case 4:
4869
PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4870
break;
4871
case 8:
4872
PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4873
break;
4874
default:
4875
return false;
4876
}
4877
else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4878
CmpLHS.getValueType() == MVT::i1)
4879
PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4880
4881
if (PCC) {
4882
SDLoc dl(N);
4883
SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4884
N->getOperand(0)};
4885
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4886
return true;
4887
}
4888
return false;
4889
}
4890
4891
bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4892
// Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4893
// value, for example when crbits is disabled. If so, select the
4894
// loop_decrement intrinsics now.
4895
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4896
SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4897
4898
if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4899
isNullConstant(LHS.getOperand(1)))
4900
return false;
4901
4902
if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4903
LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4904
return false;
4905
4906
if (!isa<ConstantSDNode>(RHS))
4907
return false;
4908
4909
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4910
"Counter decrement comparison is not EQ or NE");
4911
4912
SDValue OldDecrement = LHS.getOperand(0);
4913
assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4914
4915
SDLoc DecrementLoc(OldDecrement);
4916
SDValue ChainInput = OldDecrement.getOperand(0);
4917
SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4918
: getI32Imm(1, DecrementLoc)};
4919
unsigned DecrementOpcode =
4920
Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4921
SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4922
MVT::i1, DecrementOps);
4923
4924
unsigned Val = RHS->getAsZExtVal();
4925
bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4926
unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4927
4928
ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4929
CurDAG->RemoveDeadNode(LHS.getNode());
4930
4931
// Mark the old loop_decrement intrinsic as dead.
4932
ReplaceUses(OldDecrement.getValue(1), ChainInput);
4933
CurDAG->RemoveDeadNode(OldDecrement.getNode());
4934
4935
SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4936
ChainInput, N->getOperand(0));
4937
4938
CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4939
N->getOperand(4), Chain);
4940
return true;
4941
}
4942
4943
bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4944
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4945
unsigned Imm;
4946
if (!isInt32Immediate(N->getOperand(1), Imm))
4947
return false;
4948
4949
SDLoc dl(N);
4950
SDValue Val = N->getOperand(0);
4951
unsigned SH, MB, ME;
4952
// If this is an and of a value rotated between 0 and 31 bits and then and'd
4953
// with a mask, emit rlwinm
4954
if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4955
Val = Val.getOperand(0);
4956
SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4957
getI32Imm(ME, dl)};
4958
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4959
return true;
4960
}
4961
4962
// If this is just a masked value where the input is not handled, and
4963
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4964
if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4965
SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4966
getI32Imm(ME, dl)};
4967
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4968
return true;
4969
}
4970
4971
// AND X, 0 -> 0, not "rlwinm 32".
4972
if (Imm == 0) {
4973
ReplaceUses(SDValue(N, 0), N->getOperand(1));
4974
return true;
4975
}
4976
4977
return false;
4978
}
4979
4980
bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4981
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4982
uint64_t Imm64;
4983
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4984
return false;
4985
4986
unsigned MB, ME;
4987
if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4988
// MB ME
4989
// +----------------------+
4990
// |xxxxxxxxxxx00011111000|
4991
// +----------------------+
4992
// 0 32 64
4993
// We can only do it if the MB is larger than 32 and MB <= ME
4994
// as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4995
// we didn't rotate it.
4996
SDLoc dl(N);
4997
SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4998
getI64Imm(ME - 32, dl)};
4999
CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5000
return true;
5001
}
5002
5003
return false;
5004
}
5005
5006
bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5007
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5008
uint64_t Imm64;
5009
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5010
return false;
5011
5012
// Do nothing if it is 16-bit imm as the pattern in the .td file handle
5013
// it well with "andi.".
5014
if (isUInt<16>(Imm64))
5015
return false;
5016
5017
SDLoc Loc(N);
5018
SDValue Val = N->getOperand(0);
5019
5020
// Optimized with two rldicl's as follows:
5021
// Add missing bits on left to the mask and check that the mask is a
5022
// wrapped run of ones, i.e.
5023
// Change pattern |0001111100000011111111|
5024
// to |1111111100000011111111|.
5025
unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5026
if (NumOfLeadingZeros != 0)
5027
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5028
5029
unsigned MB, ME;
5030
if (!isRunOfOnes64(Imm64, MB, ME))
5031
return false;
5032
5033
// ME MB MB-ME+63
5034
// +----------------------+ +----------------------+
5035
// |1111111100000011111111| -> |0000001111111111111111|
5036
// +----------------------+ +----------------------+
5037
// 0 63 0 63
5038
// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5039
unsigned OnesOnLeft = ME + 1;
5040
unsigned ZerosInBetween = (MB - ME + 63) & 63;
5041
// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5042
// on the left the bits that are already zeros in the mask.
5043
Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5044
getI64Imm(OnesOnLeft, Loc),
5045
getI64Imm(ZerosInBetween, Loc)),
5046
0);
5047
// MB-ME+63 ME MB
5048
// +----------------------+ +----------------------+
5049
// |0000001111111111111111| -> |0001111100000011111111|
5050
// +----------------------+ +----------------------+
5051
// 0 63 0 63
5052
// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5053
// left the number of ones we previously added.
5054
SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5055
getI64Imm(NumOfLeadingZeros, Loc)};
5056
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5057
return true;
5058
}
5059
5060
bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5061
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5062
unsigned Imm;
5063
if (!isInt32Immediate(N->getOperand(1), Imm))
5064
return false;
5065
5066
SDValue Val = N->getOperand(0);
5067
unsigned Imm2;
5068
// ISD::OR doesn't get all the bitfield insertion fun.
5069
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5070
// bitfield insert.
5071
if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5072
return false;
5073
5074
// The idea here is to check whether this is equivalent to:
5075
// (c1 & m) | (x & ~m)
5076
// where m is a run-of-ones mask. The logic here is that, for each bit in
5077
// c1 and c2:
5078
// - if both are 1, then the output will be 1.
5079
// - if both are 0, then the output will be 0.
5080
// - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5081
// come from x.
5082
// - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5083
// be 0.
5084
// If that last condition is never the case, then we can form m from the
5085
// bits that are the same between c1 and c2.
5086
unsigned MB, ME;
5087
if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5088
SDLoc dl(N);
5089
SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5090
getI32Imm(MB, dl), getI32Imm(ME, dl)};
5091
ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5092
return true;
5093
}
5094
5095
return false;
5096
}
5097
5098
bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5099
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5100
5101
uint64_t Imm64;
5102
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5103
return false;
5104
5105
SDValue Val = N->getOperand(0);
5106
5107
if (Val.getOpcode() != ISD::ROTL)
5108
return false;
5109
5110
// Looking to try to avoid a situation like this one:
5111
// %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5112
// %and1 = and i64 %2, 9223372036854775807
5113
// In this function we are looking to try to match RLDCL. However, the above
5114
// DAG would better match RLDICL instead which is not what we are looking
5115
// for here.
5116
SDValue RotateAmt = Val.getOperand(1);
5117
if (RotateAmt.getOpcode() == ISD::Constant)
5118
return false;
5119
5120
unsigned MB = 64 - llvm::countr_one(Imm64);
5121
SDLoc dl(N);
5122
SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5123
CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5124
return true;
5125
}
5126
5127
bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5128
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5129
uint64_t Imm64;
5130
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5131
return false;
5132
5133
// If this is a 64-bit zero-extension mask, emit rldicl.
5134
unsigned MB = 64 - llvm::countr_one(Imm64);
5135
unsigned SH = 0;
5136
unsigned Imm;
5137
SDValue Val = N->getOperand(0);
5138
SDLoc dl(N);
5139
5140
if (Val.getOpcode() == ISD::ANY_EXTEND) {
5141
auto Op0 = Val.getOperand(0);
5142
if (Op0.getOpcode() == ISD::SRL &&
5143
isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5144
5145
auto ResultType = Val.getNode()->getValueType(0);
5146
auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5147
SDValue IDVal(ImDef, 0);
5148
5149
Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5150
IDVal, Op0.getOperand(0),
5151
getI32Imm(1, dl)),
5152
0);
5153
SH = 64 - Imm;
5154
}
5155
}
5156
5157
// If the operand is a logical right shift, we can fold it into this
5158
// instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5159
// for n <= mb. The right shift is really a left rotate followed by a
5160
// mask, and this mask is a more-restrictive sub-mask of the mask implied
5161
// by the shift.
5162
if (Val.getOpcode() == ISD::SRL &&
5163
isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5164
assert(Imm < 64 && "Illegal shift amount");
5165
Val = Val.getOperand(0);
5166
SH = 64 - Imm;
5167
}
5168
5169
SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5170
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5171
return true;
5172
}
5173
5174
bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5175
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5176
uint64_t Imm64;
5177
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5178
!isMask_64(~Imm64))
5179
return false;
5180
5181
// If this is a negated 64-bit zero-extension mask,
5182
// i.e. the immediate is a sequence of ones from most significant side
5183
// and all zero for reminder, we should use rldicr.
5184
unsigned MB = 63 - llvm::countr_one(~Imm64);
5185
unsigned SH = 0;
5186
SDLoc dl(N);
5187
SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5188
CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5189
return true;
5190
}
5191
5192
bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5193
assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5194
uint64_t Imm64;
5195
unsigned MB, ME;
5196
SDValue N0 = N->getOperand(0);
5197
5198
// We won't get fewer instructions if the imm is 32-bit integer.
5199
// rldimi requires the imm to have consecutive ones with both sides zero.
5200
// Also, make sure the first Op has only one use, otherwise this may increase
5201
// register pressure since rldimi is destructive.
5202
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5203
isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5204
return false;
5205
5206
unsigned SH = 63 - ME;
5207
SDLoc Dl(N);
5208
// Use select64Imm for making LI instr instead of directly putting Imm64
5209
SDValue Ops[] = {
5210
N->getOperand(0),
5211
SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5212
getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5213
CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5214
return true;
5215
}
5216
5217
// Select - Convert the specified operand from a target-independent to a
5218
// target-specific node if it hasn't already been changed.
5219
void PPCDAGToDAGISel::Select(SDNode *N) {
5220
SDLoc dl(N);
5221
if (N->isMachineOpcode()) {
5222
N->setNodeId(-1);
5223
return; // Already selected.
5224
}
5225
5226
// In case any misguided DAG-level optimizations form an ADD with a
5227
// TargetConstant operand, crash here instead of miscompiling (by selecting
5228
// an r+r add instead of some kind of r+i add).
5229
if (N->getOpcode() == ISD::ADD &&
5230
N->getOperand(1).getOpcode() == ISD::TargetConstant)
5231
llvm_unreachable("Invalid ADD with TargetConstant operand");
5232
5233
// Try matching complex bit permutations before doing anything else.
5234
if (tryBitPermutation(N))
5235
return;
5236
5237
// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5238
if (tryIntCompareInGPR(N))
5239
return;
5240
5241
switch (N->getOpcode()) {
5242
default: break;
5243
5244
case ISD::Constant:
5245
if (N->getValueType(0) == MVT::i64) {
5246
ReplaceNode(N, selectI64Imm(CurDAG, N));
5247
return;
5248
}
5249
break;
5250
5251
case ISD::INTRINSIC_VOID: {
5252
auto IntrinsicID = N->getConstantOperandVal(1);
5253
if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5254
IntrinsicID != Intrinsic::ppc_trapd &&
5255
IntrinsicID != Intrinsic::ppc_trap)
5256
break;
5257
unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5258
IntrinsicID == Intrinsic::ppc_trapd)
5259
? PPC::TDI
5260
: PPC::TWI;
5261
SmallVector<SDValue, 4> OpsWithMD;
5262
unsigned MDIndex;
5263
if (IntrinsicID == Intrinsic::ppc_tdw ||
5264
IntrinsicID == Intrinsic::ppc_tw) {
5265
SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5266
int16_t SImmOperand2;
5267
int16_t SImmOperand3;
5268
int16_t SImmOperand4;
5269
bool isOperand2IntS16Immediate =
5270
isIntS16Immediate(N->getOperand(2), SImmOperand2);
5271
bool isOperand3IntS16Immediate =
5272
isIntS16Immediate(N->getOperand(3), SImmOperand3);
5273
// We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5274
// reg or imm + imm. The imm + imm form will be optimized to either an
5275
// unconditional trap or a nop in a later pass.
5276
if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5277
Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5278
else if (isOperand3IntS16Immediate)
5279
// The 2nd and 3rd operands are reg + imm.
5280
Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5281
else {
5282
// The 2nd and 3rd operands are imm + reg.
5283
bool isOperand4IntS16Immediate =
5284
isIntS16Immediate(N->getOperand(4), SImmOperand4);
5285
(void)isOperand4IntS16Immediate;
5286
assert(isOperand4IntS16Immediate &&
5287
"The 4th operand is not an Immediate");
5288
// We need to flip the condition immediate TO.
5289
int16_t TO = int(SImmOperand4) & 0x1F;
5290
// We swap the first and second bit of TO if they are not same.
5291
if ((TO & 0x1) != ((TO & 0x2) >> 1))
5292
TO = (TO & 0x1) ? TO + 1 : TO - 1;
5293
// We swap the fourth and fifth bit of TO if they are not same.
5294
if ((TO & 0x8) != ((TO & 0x10) >> 1))
5295
TO = (TO & 0x8) ? TO + 8 : TO - 8;
5296
Ops[0] = getI32Imm(TO, dl);
5297
Ops[1] = N->getOperand(3);
5298
Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5299
}
5300
OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5301
MDIndex = 5;
5302
} else {
5303
OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5304
MDIndex = 3;
5305
}
5306
5307
if (N->getNumOperands() > MDIndex) {
5308
SDValue MDV = N->getOperand(MDIndex);
5309
const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5310
assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5311
assert((isa<MDString>(MD->getOperand(0)) &&
5312
cast<MDString>(MD->getOperand(0))->getString() ==
5313
"ppc-trap-reason") &&
5314
"Unsupported annotation data type!");
5315
for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5316
assert(isa<MDString>(MD->getOperand(i)) &&
5317
"Invalid data type for annotation ppc-trap-reason!");
5318
OpsWithMD.push_back(
5319
getI32Imm(std::stoi(cast<MDString>(
5320
MD->getOperand(i))->getString().str()), dl));
5321
}
5322
}
5323
OpsWithMD.push_back(N->getOperand(0)); // chain
5324
CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5325
return;
5326
}
5327
5328
case ISD::INTRINSIC_WO_CHAIN: {
5329
// We emit the PPC::FSELS instruction here because of type conflicts with
5330
// the comparison operand. The FSELS instruction is defined to use an 8-byte
5331
// comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5332
// value for the comparison. When selecting through a .td file, a type
5333
// error is raised. Must check this first so we never break on the
5334
// !Subtarget->isISA3_1() check.
5335
auto IntID = N->getConstantOperandVal(0);
5336
if (IntID == Intrinsic::ppc_fsels) {
5337
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5338
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5339
return;
5340
}
5341
5342
if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5343
auto Pred = N->getConstantOperandVal(1);
5344
unsigned Opcode =
5345
IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5346
unsigned SubReg = 0;
5347
unsigned ShiftVal = 0;
5348
bool Reverse = false;
5349
switch (Pred) {
5350
case 0:
5351
SubReg = PPC::sub_eq;
5352
ShiftVal = 1;
5353
break;
5354
case 1:
5355
SubReg = PPC::sub_eq;
5356
ShiftVal = 1;
5357
Reverse = true;
5358
break;
5359
case 2:
5360
SubReg = PPC::sub_lt;
5361
ShiftVal = 3;
5362
break;
5363
case 3:
5364
SubReg = PPC::sub_lt;
5365
ShiftVal = 3;
5366
Reverse = true;
5367
break;
5368
case 4:
5369
SubReg = PPC::sub_gt;
5370
ShiftVal = 2;
5371
break;
5372
case 5:
5373
SubReg = PPC::sub_gt;
5374
ShiftVal = 2;
5375
Reverse = true;
5376
break;
5377
case 6:
5378
SubReg = PPC::sub_un;
5379
break;
5380
case 7:
5381
SubReg = PPC::sub_un;
5382
Reverse = true;
5383
break;
5384
}
5385
5386
EVT VTs[] = {MVT::v16i8, MVT::Glue};
5387
SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5388
CurDAG->getTargetConstant(0, dl, MVT::i32)};
5389
SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5390
SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5391
// On Power10, we can use SETBC[R]. On prior architectures, we have to use
5392
// MFOCRF and shift/negate the value.
5393
if (Subtarget->isISA3_1()) {
5394
SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5395
SDValue CRBit = SDValue(
5396
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5397
CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5398
0);
5399
CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5400
CRBit);
5401
} else {
5402
SDValue Move =
5403
SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5404
BCDOp.getValue(1)),
5405
0);
5406
SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5407
getI32Imm(31, dl), getI32Imm(31, dl)};
5408
if (!Reverse)
5409
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5410
else {
5411
SDValue Shift = SDValue(
5412
CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5413
CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5414
}
5415
}
5416
return;
5417
}
5418
5419
if (!Subtarget->isISA3_1())
5420
break;
5421
unsigned Opcode = 0;
5422
switch (IntID) {
5423
default:
5424
break;
5425
case Intrinsic::ppc_altivec_vstribr_p:
5426
Opcode = PPC::VSTRIBR_rec;
5427
break;
5428
case Intrinsic::ppc_altivec_vstribl_p:
5429
Opcode = PPC::VSTRIBL_rec;
5430
break;
5431
case Intrinsic::ppc_altivec_vstrihr_p:
5432
Opcode = PPC::VSTRIHR_rec;
5433
break;
5434
case Intrinsic::ppc_altivec_vstrihl_p:
5435
Opcode = PPC::VSTRIHL_rec;
5436
break;
5437
}
5438
if (!Opcode)
5439
break;
5440
5441
// Generate the appropriate vector string isolate intrinsic to match.
5442
EVT VTs[] = {MVT::v16i8, MVT::Glue};
5443
SDValue VecStrOp =
5444
SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5445
// Vector string isolate instructions update the EQ bit of CR6.
5446
// Generate a SETBC instruction to extract the bit and place it in a GPR.
5447
SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5448
SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5449
SDValue CRBit = SDValue(
5450
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5451
CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5452
0);
5453
CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5454
return;
5455
}
5456
5457
case ISD::SETCC:
5458
case ISD::STRICT_FSETCC:
5459
case ISD::STRICT_FSETCCS:
5460
if (trySETCC(N))
5461
return;
5462
break;
5463
// These nodes will be transformed into GETtlsADDR32 node, which
5464
// later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5465
case PPCISD::ADDI_TLSLD_L_ADDR:
5466
case PPCISD::ADDI_TLSGD_L_ADDR: {
5467
const Module *Mod = MF->getFunction().getParent();
5468
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5469
!Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5470
Mod->getPICLevel() == PICLevel::SmallPIC)
5471
break;
5472
// Attach global base pointer on GETtlsADDR32 node in order to
5473
// generate secure plt code for TLS symbols.
5474
getGlobalBaseReg();
5475
} break;
5476
case PPCISD::CALL: {
5477
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5478
!TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
5479
!Subtarget->isTargetELF())
5480
break;
5481
5482
SDValue Op = N->getOperand(1);
5483
5484
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5485
if (GA->getTargetFlags() == PPCII::MO_PLT)
5486
getGlobalBaseReg();
5487
}
5488
else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5489
if (ES->getTargetFlags() == PPCII::MO_PLT)
5490
getGlobalBaseReg();
5491
}
5492
}
5493
break;
5494
5495
case PPCISD::GlobalBaseReg:
5496
ReplaceNode(N, getGlobalBaseReg());
5497
return;
5498
5499
case ISD::FrameIndex:
5500
selectFrameIndex(N, N);
5501
return;
5502
5503
case PPCISD::MFOCRF: {
5504
SDValue InGlue = N->getOperand(1);
5505
ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5506
N->getOperand(0), InGlue));
5507
return;
5508
}
5509
5510
case PPCISD::READ_TIME_BASE:
5511
ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5512
MVT::Other, N->getOperand(0)));
5513
return;
5514
5515
case PPCISD::SRA_ADDZE: {
5516
SDValue N0 = N->getOperand(0);
5517
SDValue ShiftAmt =
5518
CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5519
getConstantIntValue(), dl,
5520
N->getValueType(0));
5521
if (N->getValueType(0) == MVT::i64) {
5522
SDNode *Op =
5523
CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5524
N0, ShiftAmt);
5525
CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5526
SDValue(Op, 1));
5527
return;
5528
} else {
5529
assert(N->getValueType(0) == MVT::i32 &&
5530
"Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5531
SDNode *Op =
5532
CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5533
N0, ShiftAmt);
5534
CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5535
SDValue(Op, 1));
5536
return;
5537
}
5538
}
5539
5540
case ISD::STORE: {
5541
// Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5542
// X-form stores.
5543
StoreSDNode *ST = cast<StoreSDNode>(N);
5544
if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5545
ST->getAddressingMode() != ISD::PRE_INC)
5546
if (tryTLSXFormStore(ST))
5547
return;
5548
break;
5549
}
5550
case ISD::LOAD: {
5551
// Handle preincrement loads.
5552
LoadSDNode *LD = cast<LoadSDNode>(N);
5553
EVT LoadedVT = LD->getMemoryVT();
5554
5555
// Normal loads are handled by code generated from the .td file.
5556
if (LD->getAddressingMode() != ISD::PRE_INC) {
5557
// Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5558
// X-form loads.
5559
if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5560
if (tryTLSXFormLoad(LD))
5561
return;
5562
break;
5563
}
5564
5565
SDValue Offset = LD->getOffset();
5566
if (Offset.getOpcode() == ISD::TargetConstant ||
5567
Offset.getOpcode() == ISD::TargetGlobalAddress) {
5568
5569
unsigned Opcode;
5570
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5571
if (LD->getValueType(0) != MVT::i64) {
5572
// Handle PPC32 integer and normal FP loads.
5573
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5574
switch (LoadedVT.getSimpleVT().SimpleTy) {
5575
default: llvm_unreachable("Invalid PPC load type!");
5576
case MVT::f64: Opcode = PPC::LFDU; break;
5577
case MVT::f32: Opcode = PPC::LFSU; break;
5578
case MVT::i32: Opcode = PPC::LWZU; break;
5579
case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5580
case MVT::i1:
5581
case MVT::i8: Opcode = PPC::LBZU; break;
5582
}
5583
} else {
5584
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5585
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5586
switch (LoadedVT.getSimpleVT().SimpleTy) {
5587
default: llvm_unreachable("Invalid PPC load type!");
5588
case MVT::i64: Opcode = PPC::LDU; break;
5589
case MVT::i32: Opcode = PPC::LWZU8; break;
5590
case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5591
case MVT::i1:
5592
case MVT::i8: Opcode = PPC::LBZU8; break;
5593
}
5594
}
5595
5596
SDValue Chain = LD->getChain();
5597
SDValue Base = LD->getBasePtr();
5598
SDValue Ops[] = { Offset, Base, Chain };
5599
SDNode *MN = CurDAG->getMachineNode(
5600
Opcode, dl, LD->getValueType(0),
5601
PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5602
transferMemOperands(N, MN);
5603
ReplaceNode(N, MN);
5604
return;
5605
} else {
5606
unsigned Opcode;
5607
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5608
if (LD->getValueType(0) != MVT::i64) {
5609
// Handle PPC32 integer and normal FP loads.
5610
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5611
switch (LoadedVT.getSimpleVT().SimpleTy) {
5612
default: llvm_unreachable("Invalid PPC load type!");
5613
case MVT::f64: Opcode = PPC::LFDUX; break;
5614
case MVT::f32: Opcode = PPC::LFSUX; break;
5615
case MVT::i32: Opcode = PPC::LWZUX; break;
5616
case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5617
case MVT::i1:
5618
case MVT::i8: Opcode = PPC::LBZUX; break;
5619
}
5620
} else {
5621
assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5622
assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5623
"Invalid sext update load");
5624
switch (LoadedVT.getSimpleVT().SimpleTy) {
5625
default: llvm_unreachable("Invalid PPC load type!");
5626
case MVT::i64: Opcode = PPC::LDUX; break;
5627
case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5628
case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5629
case MVT::i1:
5630
case MVT::i8: Opcode = PPC::LBZUX8; break;
5631
}
5632
}
5633
5634
SDValue Chain = LD->getChain();
5635
SDValue Base = LD->getBasePtr();
5636
SDValue Ops[] = { Base, Offset, Chain };
5637
SDNode *MN = CurDAG->getMachineNode(
5638
Opcode, dl, LD->getValueType(0),
5639
PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5640
transferMemOperands(N, MN);
5641
ReplaceNode(N, MN);
5642
return;
5643
}
5644
}
5645
5646
case ISD::AND:
5647
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5648
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5649
tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5650
tryAsPairOfRLDICL(N))
5651
return;
5652
5653
// Other cases are autogenerated.
5654
break;
5655
case ISD::OR: {
5656
if (N->getValueType(0) == MVT::i32)
5657
if (tryBitfieldInsert(N))
5658
return;
5659
5660
int16_t Imm;
5661
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5662
isIntS16Immediate(N->getOperand(1), Imm)) {
5663
KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5664
5665
// If this is equivalent to an add, then we can fold it with the
5666
// FrameIndex calculation.
5667
if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5668
selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5669
return;
5670
}
5671
}
5672
5673
// If this is 'or' against an imm with consecutive ones and both sides zero,
5674
// try to emit rldimi
5675
if (tryAsSingleRLDIMI(N))
5676
return;
5677
5678
// OR with a 32-bit immediate can be handled by ori + oris
5679
// without creating an immediate in a GPR.
5680
uint64_t Imm64 = 0;
5681
bool IsPPC64 = Subtarget->isPPC64();
5682
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5683
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
5684
// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5685
uint64_t ImmHi = Imm64 >> 16;
5686
uint64_t ImmLo = Imm64 & 0xFFFF;
5687
if (ImmHi != 0 && ImmLo != 0) {
5688
SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5689
N->getOperand(0),
5690
getI16Imm(ImmLo, dl));
5691
SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5692
CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5693
return;
5694
}
5695
}
5696
5697
// Other cases are autogenerated.
5698
break;
5699
}
5700
case ISD::XOR: {
5701
// XOR with a 32-bit immediate can be handled by xori + xoris
5702
// without creating an immediate in a GPR.
5703
uint64_t Imm64 = 0;
5704
bool IsPPC64 = Subtarget->isPPC64();
5705
if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5706
(Imm64 & ~0xFFFFFFFFuLL) == 0) {
5707
// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5708
uint64_t ImmHi = Imm64 >> 16;
5709
uint64_t ImmLo = Imm64 & 0xFFFF;
5710
if (ImmHi != 0 && ImmLo != 0) {
5711
SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5712
N->getOperand(0),
5713
getI16Imm(ImmLo, dl));
5714
SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5715
CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5716
return;
5717
}
5718
}
5719
5720
break;
5721
}
5722
case ISD::ADD: {
5723
int16_t Imm;
5724
if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5725
isIntS16Immediate(N->getOperand(1), Imm)) {
5726
selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5727
return;
5728
}
5729
5730
break;
5731
}
5732
case ISD::SHL: {
5733
unsigned Imm, SH, MB, ME;
5734
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5735
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5736
SDValue Ops[] = { N->getOperand(0).getOperand(0),
5737
getI32Imm(SH, dl), getI32Imm(MB, dl),
5738
getI32Imm(ME, dl) };
5739
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5740
return;
5741
}
5742
5743
// Other cases are autogenerated.
5744
break;
5745
}
5746
case ISD::SRL: {
5747
unsigned Imm, SH, MB, ME;
5748
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5749
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5750
SDValue Ops[] = { N->getOperand(0).getOperand(0),
5751
getI32Imm(SH, dl), getI32Imm(MB, dl),
5752
getI32Imm(ME, dl) };
5753
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5754
return;
5755
}
5756
5757
// Other cases are autogenerated.
5758
break;
5759
}
5760
case ISD::MUL: {
5761
SDValue Op1 = N->getOperand(1);
5762
if (Op1.getOpcode() != ISD::Constant ||
5763
(Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5764
break;
5765
5766
// If the multiplier fits int16, we can handle it with mulli.
5767
int64_t Imm = Op1->getAsZExtVal();
5768
unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5769
if (isInt<16>(Imm) || !Shift)
5770
break;
5771
5772
// If the shifted value fits int16, we can do this transformation:
5773
// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5774
// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5775
uint64_t ImmSh = Imm >> Shift;
5776
if (!isInt<16>(ImmSh))
5777
break;
5778
5779
uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5780
if (Op1.getValueType() == MVT::i64) {
5781
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5782
SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5783
N->getOperand(0), SDImm);
5784
5785
SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5786
getI32Imm(63 - Shift, dl)};
5787
CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5788
return;
5789
} else {
5790
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5791
SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5792
N->getOperand(0), SDImm);
5793
5794
SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5795
getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5796
CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5797
return;
5798
}
5799
break;
5800
}
5801
// FIXME: Remove this once the ANDI glue bug is fixed:
5802
case PPCISD::ANDI_rec_1_EQ_BIT:
5803
case PPCISD::ANDI_rec_1_GT_BIT: {
5804
if (!ANDIGlueBug)
5805
break;
5806
5807
EVT InVT = N->getOperand(0).getValueType();
5808
assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5809
"Invalid input type for ANDI_rec_1_EQ_BIT");
5810
5811
unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5812
SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5813
N->getOperand(0),
5814
CurDAG->getTargetConstant(1, dl, InVT)),
5815
0);
5816
SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5817
SDValue SRIdxVal = CurDAG->getTargetConstant(
5818
N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5819
dl, MVT::i32);
5820
5821
CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5822
SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5823
return;
5824
}
5825
case ISD::SELECT_CC: {
5826
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5827
EVT PtrVT =
5828
CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
5829
bool isPPC64 = (PtrVT == MVT::i64);
5830
5831
// If this is a select of i1 operands, we'll pattern match it.
5832
if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5833
break;
5834
5835
if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5836
bool NeedSwapOps = false;
5837
bool IsUnCmp = false;
5838
if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5839
SDValue LHS = N->getOperand(0);
5840
SDValue RHS = N->getOperand(1);
5841
if (NeedSwapOps)
5842
std::swap(LHS, RHS);
5843
5844
// Make use of SelectCC to generate the comparison to set CR bits, for
5845
// equality comparisons having one literal operand, SelectCC probably
5846
// doesn't need to materialize the whole literal and just use xoris to
5847
// check it first, it leads the following comparison result can't
5848
// exactly represent GT/LT relationship. So to avoid this we specify
5849
// SETGT/SETUGT here instead of SETEQ.
5850
SDValue GenCC =
5851
SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5852
CurDAG->SelectNodeTo(
5853
N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5854
N->getValueType(0), GenCC);
5855
NumP9Setb++;
5856
return;
5857
}
5858
}
5859
5860
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5861
if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5862
isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5863
CC == ISD::SETNE &&
5864
// FIXME: Implement this optzn for PPC64.
5865
N->getValueType(0) == MVT::i32) {
5866
SDNode *Tmp =
5867
CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5868
N->getOperand(0), getI32Imm(~0U, dl));
5869
CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5870
N->getOperand(0), SDValue(Tmp, 1));
5871
return;
5872
}
5873
5874
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5875
5876
if (N->getValueType(0) == MVT::i1) {
5877
// An i1 select is: (c & t) | (!c & f).
5878
bool Inv;
5879
unsigned Idx = getCRIdxForSetCC(CC, Inv);
5880
5881
unsigned SRI;
5882
switch (Idx) {
5883
default: llvm_unreachable("Invalid CC index");
5884
case 0: SRI = PPC::sub_lt; break;
5885
case 1: SRI = PPC::sub_gt; break;
5886
case 2: SRI = PPC::sub_eq; break;
5887
case 3: SRI = PPC::sub_un; break;
5888
}
5889
5890
SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5891
5892
SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5893
CCBit, CCBit), 0);
5894
SDValue C = Inv ? NotCCBit : CCBit,
5895
NotC = Inv ? CCBit : NotCCBit;
5896
5897
SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5898
C, N->getOperand(2)), 0);
5899
SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5900
NotC, N->getOperand(3)), 0);
5901
5902
CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5903
return;
5904
}
5905
5906
unsigned BROpc =
5907
getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5908
5909
unsigned SelectCCOp;
5910
if (N->getValueType(0) == MVT::i32)
5911
SelectCCOp = PPC::SELECT_CC_I4;
5912
else if (N->getValueType(0) == MVT::i64)
5913
SelectCCOp = PPC::SELECT_CC_I8;
5914
else if (N->getValueType(0) == MVT::f32) {
5915
if (Subtarget->hasP8Vector())
5916
SelectCCOp = PPC::SELECT_CC_VSSRC;
5917
else if (Subtarget->hasSPE())
5918
SelectCCOp = PPC::SELECT_CC_SPE4;
5919
else
5920
SelectCCOp = PPC::SELECT_CC_F4;
5921
} else if (N->getValueType(0) == MVT::f64) {
5922
if (Subtarget->hasVSX())
5923
SelectCCOp = PPC::SELECT_CC_VSFRC;
5924
else if (Subtarget->hasSPE())
5925
SelectCCOp = PPC::SELECT_CC_SPE;
5926
else
5927
SelectCCOp = PPC::SELECT_CC_F8;
5928
} else if (N->getValueType(0) == MVT::f128)
5929
SelectCCOp = PPC::SELECT_CC_F16;
5930
else if (Subtarget->hasSPE())
5931
SelectCCOp = PPC::SELECT_CC_SPE;
5932
else if (N->getValueType(0) == MVT::v2f64 ||
5933
N->getValueType(0) == MVT::v2i64)
5934
SelectCCOp = PPC::SELECT_CC_VSRC;
5935
else
5936
SelectCCOp = PPC::SELECT_CC_VRRC;
5937
5938
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5939
getI32Imm(BROpc, dl) };
5940
CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5941
return;
5942
}
5943
case ISD::VECTOR_SHUFFLE:
5944
if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5945
N->getValueType(0) == MVT::v2i64)) {
5946
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5947
5948
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5949
Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5950
unsigned DM[2];
5951
5952
for (int i = 0; i < 2; ++i)
5953
if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5954
DM[i] = 0;
5955
else
5956
DM[i] = 1;
5957
5958
if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5959
Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5960
isa<LoadSDNode>(Op1.getOperand(0))) {
5961
LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5962
SDValue Base, Offset;
5963
5964
if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5965
(LD->getMemoryVT() == MVT::f64 ||
5966
LD->getMemoryVT() == MVT::i64) &&
5967
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5968
SDValue Chain = LD->getChain();
5969
SDValue Ops[] = { Base, Offset, Chain };
5970
MachineMemOperand *MemOp = LD->getMemOperand();
5971
SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5972
N->getValueType(0), Ops);
5973
CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5974
return;
5975
}
5976
}
5977
5978
// For little endian, we must swap the input operands and adjust
5979
// the mask elements (reverse and invert them).
5980
if (Subtarget->isLittleEndian()) {
5981
std::swap(Op1, Op2);
5982
unsigned tmp = DM[0];
5983
DM[0] = 1 - DM[1];
5984
DM[1] = 1 - tmp;
5985
}
5986
5987
SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5988
MVT::i32);
5989
SDValue Ops[] = { Op1, Op2, DMV };
5990
CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5991
return;
5992
}
5993
5994
break;
5995
case PPCISD::BDNZ:
5996
case PPCISD::BDZ: {
5997
bool IsPPC64 = Subtarget->isPPC64();
5998
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5999
CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6000
? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6001
: (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6002
MVT::Other, Ops);
6003
return;
6004
}
6005
case PPCISD::COND_BRANCH: {
6006
// Op #0 is the Chain.
6007
// Op #1 is the PPC::PRED_* number.
6008
// Op #2 is the CR#
6009
// Op #3 is the Dest MBB
6010
// Op #4 is the Flag.
6011
// Prevent PPC::PRED_* from being selected into LI.
6012
unsigned PCC = N->getConstantOperandVal(1);
6013
if (EnableBranchHint)
6014
PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6015
6016
SDValue Pred = getI32Imm(PCC, dl);
6017
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6018
N->getOperand(0), N->getOperand(4) };
6019
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6020
return;
6021
}
6022
case ISD::BR_CC: {
6023
if (tryFoldSWTestBRCC(N))
6024
return;
6025
if (trySelectLoopCountIntrinsic(N))
6026
return;
6027
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6028
unsigned PCC =
6029
getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6030
6031
if (N->getOperand(2).getValueType() == MVT::i1) {
6032
unsigned Opc;
6033
bool Swap;
6034
switch (PCC) {
6035
default: llvm_unreachable("Unexpected Boolean-operand predicate");
6036
case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6037
case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6038
case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6039
case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6040
case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6041
case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6042
}
6043
6044
// A signed comparison of i1 values produces the opposite result to an
6045
// unsigned one if the condition code includes less-than or greater-than.
6046
// This is because 1 is the most negative signed i1 number and the most
6047
// positive unsigned i1 number. The CR-logical operations used for such
6048
// comparisons are non-commutative so for signed comparisons vs. unsigned
6049
// ones, the input operands just need to be swapped.
6050
if (ISD::isSignedIntSetCC(CC))
6051
Swap = !Swap;
6052
6053
SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6054
N->getOperand(Swap ? 3 : 2),
6055
N->getOperand(Swap ? 2 : 3)), 0);
6056
CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6057
N->getOperand(0));
6058
return;
6059
}
6060
6061
if (EnableBranchHint)
6062
PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6063
6064
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6065
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6066
N->getOperand(4), N->getOperand(0) };
6067
CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6068
return;
6069
}
6070
case ISD::BRIND: {
6071
// FIXME: Should custom lower this.
6072
SDValue Chain = N->getOperand(0);
6073
SDValue Target = N->getOperand(1);
6074
unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6075
unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6076
Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6077
Chain), 0);
6078
CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6079
return;
6080
}
6081
case PPCISD::TOC_ENTRY: {
6082
const bool isPPC64 = Subtarget->isPPC64();
6083
const bool isELFABI = Subtarget->isSVR4ABI();
6084
const bool isAIXABI = Subtarget->isAIXABI();
6085
6086
// PowerPC only support small, medium and large code model.
6087
const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6088
6089
assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6090
"PowerPC doesn't support tiny or kernel code models.");
6091
6092
if (isAIXABI && CModel == CodeModel::Medium)
6093
report_fatal_error("Medium code model is not supported on AIX.");
6094
6095
// For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6096
// this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6097
// small code model, we need to check for a toc-data attribute.
6098
if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6099
break;
6100
6101
auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6102
EVT OperandTy) {
6103
SDValue GA = TocEntry->getOperand(0);
6104
SDValue TocBase = TocEntry->getOperand(1);
6105
SDNode *MN = nullptr;
6106
if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6107
// toc-data access doesn't involve in loading from got, no need to
6108
// keep memory operands.
6109
MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6110
else {
6111
MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6112
transferMemOperands(TocEntry, MN);
6113
}
6114
ReplaceNode(TocEntry, MN);
6115
};
6116
6117
// Handle 32-bit small code model.
6118
if (!isPPC64 && CModel == CodeModel::Small) {
6119
// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6120
// PPC::ADDItoc, or PPC::LWZtoc
6121
if (isELFABI) {
6122
assert(TM.isPositionIndependent() &&
6123
"32-bit ELF can only have TOC entries in position independent"
6124
" code.");
6125
// 32-bit ELF always uses a small code model toc access.
6126
replaceWith(PPC::LWZtoc, N, MVT::i32);
6127
return;
6128
}
6129
6130
assert(isAIXABI && "ELF ABI already handled");
6131
6132
if (hasTocDataAttr(N->getOperand(0))) {
6133
replaceWith(PPC::ADDItoc, N, MVT::i32);
6134
return;
6135
}
6136
6137
replaceWith(PPC::LWZtoc, N, MVT::i32);
6138
return;
6139
}
6140
6141
if (isPPC64 && CModel == CodeModel::Small) {
6142
assert(isAIXABI && "ELF ABI handled in common SelectCode");
6143
6144
if (hasTocDataAttr(N->getOperand(0))) {
6145
replaceWith(PPC::ADDItoc8, N, MVT::i64);
6146
return;
6147
}
6148
// Break if it doesn't have toc data attribute. Proceed with common
6149
// SelectCode.
6150
break;
6151
}
6152
6153
assert(CModel != CodeModel::Small && "All small code models handled.");
6154
6155
assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6156
" ELF/AIX or 32-bit AIX in the following.");
6157
6158
// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6159
// 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6160
// that does not contain TOC data symbols. We generate two instructions as
6161
// described below. The first source operand is a symbol reference. If it
6162
// must be referenced via the TOC according to Subtarget, we generate:
6163
// [32-bit AIX]
6164
// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6165
// [64-bit ELF/AIX]
6166
// LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6167
// Otherwise for medium code model ELF we generate:
6168
// ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6169
6170
// And finally for AIX with toc-data we generate:
6171
// [32-bit AIX]
6172
// ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6173
// [64-bit AIX]
6174
// ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6175
6176
SDValue GA = N->getOperand(0);
6177
SDValue TOCbase = N->getOperand(1);
6178
6179
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
6180
SDNode *Tmp = CurDAG->getMachineNode(
6181
isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6182
6183
// On AIX, if the symbol has the toc-data attribute it will be defined
6184
// in the TOC entry, so we use an ADDItocL/ADDItocL8.
6185
if (isAIXABI && hasTocDataAttr(GA)) {
6186
ReplaceNode(
6187
N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6188
dl, VT, SDValue(Tmp, 0), GA));
6189
return;
6190
}
6191
6192
if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6193
// If it is accessed as got-indirect, we need an extra LWZ/LD to load
6194
// the address.
6195
SDNode *MN = CurDAG->getMachineNode(
6196
isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6197
6198
transferMemOperands(N, MN);
6199
ReplaceNode(N, MN);
6200
return;
6201
}
6202
6203
assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6204
// Build the address relative to the TOC-pointer.
6205
ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6206
SDValue(Tmp, 0), GA));
6207
return;
6208
}
6209
case PPCISD::PPC32_PICGOT:
6210
// Generate a PIC-safe GOT reference.
6211
assert(Subtarget->is32BitELFABI() &&
6212
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6213
CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6214
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6215
MVT::i32);
6216
return;
6217
6218
case PPCISD::VADD_SPLAT: {
6219
// This expands into one of three sequences, depending on whether
6220
// the first operand is odd or even, positive or negative.
6221
assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6222
isa<ConstantSDNode>(N->getOperand(1)) &&
6223
"Invalid operand on VADD_SPLAT!");
6224
6225
int Elt = N->getConstantOperandVal(0);
6226
int EltSize = N->getConstantOperandVal(1);
6227
unsigned Opc1, Opc2, Opc3;
6228
EVT VT;
6229
6230
if (EltSize == 1) {
6231
Opc1 = PPC::VSPLTISB;
6232
Opc2 = PPC::VADDUBM;
6233
Opc3 = PPC::VSUBUBM;
6234
VT = MVT::v16i8;
6235
} else if (EltSize == 2) {
6236
Opc1 = PPC::VSPLTISH;
6237
Opc2 = PPC::VADDUHM;
6238
Opc3 = PPC::VSUBUHM;
6239
VT = MVT::v8i16;
6240
} else {
6241
assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6242
Opc1 = PPC::VSPLTISW;
6243
Opc2 = PPC::VADDUWM;
6244
Opc3 = PPC::VSUBUWM;
6245
VT = MVT::v4i32;
6246
}
6247
6248
if ((Elt & 1) == 0) {
6249
// Elt is even, in the range [-32,-18] + [16,30].
6250
//
6251
// Convert: VADD_SPLAT elt, size
6252
// Into: tmp = VSPLTIS[BHW] elt
6253
// VADDU[BHW]M tmp, tmp
6254
// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6255
SDValue EltVal = getI32Imm(Elt >> 1, dl);
6256
SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6257
SDValue TmpVal = SDValue(Tmp, 0);
6258
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6259
return;
6260
} else if (Elt > 0) {
6261
// Elt is odd and positive, in the range [17,31].
6262
//
6263
// Convert: VADD_SPLAT elt, size
6264
// Into: tmp1 = VSPLTIS[BHW] elt-16
6265
// tmp2 = VSPLTIS[BHW] -16
6266
// VSUBU[BHW]M tmp1, tmp2
6267
SDValue EltVal = getI32Imm(Elt - 16, dl);
6268
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6269
EltVal = getI32Imm(-16, dl);
6270
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6271
ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6272
SDValue(Tmp2, 0)));
6273
return;
6274
} else {
6275
// Elt is odd and negative, in the range [-31,-17].
6276
//
6277
// Convert: VADD_SPLAT elt, size
6278
// Into: tmp1 = VSPLTIS[BHW] elt+16
6279
// tmp2 = VSPLTIS[BHW] -16
6280
// VADDU[BHW]M tmp1, tmp2
6281
SDValue EltVal = getI32Imm(Elt + 16, dl);
6282
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6283
EltVal = getI32Imm(-16, dl);
6284
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6285
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6286
SDValue(Tmp2, 0)));
6287
return;
6288
}
6289
}
6290
case PPCISD::LD_SPLAT: {
6291
// Here we want to handle splat load for type v16i8 and v8i16 when there is
6292
// no direct move, we don't need to use stack for this case. If target has
6293
// direct move, we should be able to get the best selection in the .td file.
6294
if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6295
break;
6296
6297
EVT Type = N->getValueType(0);
6298
if (Type != MVT::v16i8 && Type != MVT::v8i16)
6299
break;
6300
6301
// If the alignment for the load is 16 or bigger, we don't need the
6302
// permutated mask to get the required value. The value must be the 0
6303
// element in big endian target or 7/15 in little endian target in the
6304
// result vsx register of lvx instruction.
6305
// Select the instruction in the .td file.
6306
if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6307
isOffsetMultipleOf(N, 16))
6308
break;
6309
6310
SDValue ZeroReg =
6311
CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6312
Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
6313
unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6314
// v16i8 LD_SPLAT addr
6315
// ======>
6316
// Mask = LVSR/LVSL 0, addr
6317
// LoadLow = LVX 0, addr
6318
// Perm = VPERM LoadLow, LoadLow, Mask
6319
// Splat = VSPLTB 15/0, Perm
6320
//
6321
// v8i16 LD_SPLAT addr
6322
// ======>
6323
// Mask = LVSR/LVSL 0, addr
6324
// LoadLow = LVX 0, addr
6325
// LoadHigh = LVX (LI, 1), addr
6326
// Perm = VPERM LoadLow, LoadHigh, Mask
6327
// Splat = VSPLTH 7/0, Perm
6328
unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6329
unsigned SplatElemIndex =
6330
Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6331
6332
SDNode *Mask = CurDAG->getMachineNode(
6333
Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6334
N->getOperand(1));
6335
6336
SDNode *LoadLow =
6337
CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6338
{ZeroReg, N->getOperand(1), N->getOperand(0)});
6339
6340
SDNode *LoadHigh = LoadLow;
6341
if (Type == MVT::v8i16) {
6342
LoadHigh = CurDAG->getMachineNode(
6343
PPC::LVX, dl, MVT::v16i8, MVT::Other,
6344
{SDValue(CurDAG->getMachineNode(
6345
LIOpcode, dl, MVT::i32,
6346
CurDAG->getTargetConstant(1, dl, MVT::i8)),
6347
0),
6348
N->getOperand(1), SDValue(LoadLow, 1)});
6349
}
6350
6351
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6352
transferMemOperands(N, LoadHigh);
6353
6354
SDNode *Perm =
6355
CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6356
SDValue(LoadHigh, 0), SDValue(Mask, 0));
6357
CurDAG->SelectNodeTo(N, SplatOp, Type,
6358
CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6359
SDValue(Perm, 0));
6360
return;
6361
}
6362
}
6363
6364
SelectCode(N);
6365
}
6366
6367
// If the target supports the cmpb instruction, do the idiom recognition here.
6368
// We don't do this as a DAG combine because we don't want to do it as nodes
6369
// are being combined (because we might miss part of the eventual idiom). We
6370
// don't want to do it during instruction selection because we want to reuse
6371
// the logic for lowering the masking operations already part of the
6372
// instruction selector.
6373
SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6374
SDLoc dl(N);
6375
6376
assert(N->getOpcode() == ISD::OR &&
6377
"Only OR nodes are supported for CMPB");
6378
6379
SDValue Res;
6380
if (!Subtarget->hasCMPB())
6381
return Res;
6382
6383
if (N->getValueType(0) != MVT::i32 &&
6384
N->getValueType(0) != MVT::i64)
6385
return Res;
6386
6387
EVT VT = N->getValueType(0);
6388
6389
SDValue RHS, LHS;
6390
bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6391
uint64_t Mask = 0, Alt = 0;
6392
6393
auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6394
uint64_t &Mask, uint64_t &Alt,
6395
SDValue &LHS, SDValue &RHS) {
6396
if (O.getOpcode() != ISD::SELECT_CC)
6397
return false;
6398
ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6399
6400
if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6401
!isa<ConstantSDNode>(O.getOperand(3)))
6402
return false;
6403
6404
uint64_t PM = O.getConstantOperandVal(2);
6405
uint64_t PAlt = O.getConstantOperandVal(3);
6406
for (b = 0; b < 8; ++b) {
6407
uint64_t Mask = UINT64_C(0xFF) << (8*b);
6408
if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6409
break;
6410
}
6411
6412
if (b == 8)
6413
return false;
6414
Mask |= PM;
6415
Alt |= PAlt;
6416
6417
if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6418
O.getConstantOperandVal(1) != 0) {
6419
SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6420
if (Op0.getOpcode() == ISD::TRUNCATE)
6421
Op0 = Op0.getOperand(0);
6422
if (Op1.getOpcode() == ISD::TRUNCATE)
6423
Op1 = Op1.getOperand(0);
6424
6425
if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6426
Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6427
isa<ConstantSDNode>(Op0.getOperand(1))) {
6428
6429
unsigned Bits = Op0.getValueSizeInBits();
6430
if (b != Bits/8-1)
6431
return false;
6432
if (Op0.getConstantOperandVal(1) != Bits-8)
6433
return false;
6434
6435
LHS = Op0.getOperand(0);
6436
RHS = Op1.getOperand(0);
6437
return true;
6438
}
6439
6440
// When we have small integers (i16 to be specific), the form present
6441
// post-legalization uses SETULT in the SELECT_CC for the
6442
// higher-order byte, depending on the fact that the
6443
// even-higher-order bytes are known to all be zero, for example:
6444
// select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6445
// (so when the second byte is the same, because all higher-order
6446
// bits from bytes 3 and 4 are known to be zero, the result of the
6447
// xor can be at most 255)
6448
if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6449
isa<ConstantSDNode>(O.getOperand(1))) {
6450
6451
uint64_t ULim = O.getConstantOperandVal(1);
6452
if (ULim != (UINT64_C(1) << b*8))
6453
return false;
6454
6455
// Now we need to make sure that the upper bytes are known to be
6456
// zero.
6457
unsigned Bits = Op0.getValueSizeInBits();
6458
if (!CurDAG->MaskedValueIsZero(
6459
Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6460
return false;
6461
6462
LHS = Op0.getOperand(0);
6463
RHS = Op0.getOperand(1);
6464
return true;
6465
}
6466
6467
return false;
6468
}
6469
6470
if (CC != ISD::SETEQ)
6471
return false;
6472
6473
SDValue Op = O.getOperand(0);
6474
if (Op.getOpcode() == ISD::AND) {
6475
if (!isa<ConstantSDNode>(Op.getOperand(1)))
6476
return false;
6477
if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6478
return false;
6479
6480
SDValue XOR = Op.getOperand(0);
6481
if (XOR.getOpcode() == ISD::TRUNCATE)
6482
XOR = XOR.getOperand(0);
6483
if (XOR.getOpcode() != ISD::XOR)
6484
return false;
6485
6486
LHS = XOR.getOperand(0);
6487
RHS = XOR.getOperand(1);
6488
return true;
6489
} else if (Op.getOpcode() == ISD::SRL) {
6490
if (!isa<ConstantSDNode>(Op.getOperand(1)))
6491
return false;
6492
unsigned Bits = Op.getValueSizeInBits();
6493
if (b != Bits/8-1)
6494
return false;
6495
if (Op.getConstantOperandVal(1) != Bits-8)
6496
return false;
6497
6498
SDValue XOR = Op.getOperand(0);
6499
if (XOR.getOpcode() == ISD::TRUNCATE)
6500
XOR = XOR.getOperand(0);
6501
if (XOR.getOpcode() != ISD::XOR)
6502
return false;
6503
6504
LHS = XOR.getOperand(0);
6505
RHS = XOR.getOperand(1);
6506
return true;
6507
}
6508
6509
return false;
6510
};
6511
6512
SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
6513
while (!Queue.empty()) {
6514
SDValue V = Queue.pop_back_val();
6515
6516
for (const SDValue &O : V.getNode()->ops()) {
6517
unsigned b = 0;
6518
uint64_t M = 0, A = 0;
6519
SDValue OLHS, ORHS;
6520
if (O.getOpcode() == ISD::OR) {
6521
Queue.push_back(O);
6522
} else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6523
if (!LHS) {
6524
LHS = OLHS;
6525
RHS = ORHS;
6526
BytesFound[b] = true;
6527
Mask |= M;
6528
Alt |= A;
6529
} else if ((LHS == ORHS && RHS == OLHS) ||
6530
(RHS == ORHS && LHS == OLHS)) {
6531
BytesFound[b] = true;
6532
Mask |= M;
6533
Alt |= A;
6534
} else {
6535
return Res;
6536
}
6537
} else {
6538
return Res;
6539
}
6540
}
6541
}
6542
6543
unsigned LastB = 0, BCnt = 0;
6544
for (unsigned i = 0; i < 8; ++i)
6545
if (BytesFound[LastB]) {
6546
++BCnt;
6547
LastB = i;
6548
}
6549
6550
if (!LastB || BCnt < 2)
6551
return Res;
6552
6553
// Because we'll be zero-extending the output anyway if don't have a specific
6554
// value for each input byte (via the Mask), we can 'anyext' the inputs.
6555
if (LHS.getValueType() != VT) {
6556
LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6557
RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6558
}
6559
6560
Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6561
6562
bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6563
if (NonTrivialMask && !Alt) {
6564
// Res = Mask & CMPB
6565
Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6566
CurDAG->getConstant(Mask, dl, VT));
6567
} else if (Alt) {
6568
// Res = (CMPB & Mask) | (~CMPB & Alt)
6569
// Which, as suggested here:
6570
// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6571
// can be written as:
6572
// Res = Alt ^ ((Alt ^ Mask) & CMPB)
6573
// useful because the (Alt ^ Mask) can be pre-computed.
6574
Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6575
CurDAG->getConstant(Mask ^ Alt, dl, VT));
6576
Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6577
CurDAG->getConstant(Alt, dl, VT));
6578
}
6579
6580
return Res;
6581
}
6582
6583
// When CR bit registers are enabled, an extension of an i1 variable to a i32
6584
// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6585
// involves constant materialization of a 0 or a 1 or both. If the result of
6586
// the extension is then operated upon by some operator that can be constant
6587
// folded with a constant 0 or 1, and that constant can be materialized using
6588
// only one instruction (like a zero or one), then we should fold in those
6589
// operations with the select.
6590
void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6591
if (!Subtarget->useCRBits())
6592
return;
6593
6594
if (N->getOpcode() != ISD::ZERO_EXTEND &&
6595
N->getOpcode() != ISD::SIGN_EXTEND &&
6596
N->getOpcode() != ISD::ANY_EXTEND)
6597
return;
6598
6599
if (N->getOperand(0).getValueType() != MVT::i1)
6600
return;
6601
6602
if (!N->hasOneUse())
6603
return;
6604
6605
SDLoc dl(N);
6606
EVT VT = N->getValueType(0);
6607
SDValue Cond = N->getOperand(0);
6608
SDValue ConstTrue =
6609
CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6610
SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6611
6612
do {
6613
SDNode *User = *N->use_begin();
6614
if (User->getNumOperands() != 2)
6615
break;
6616
6617
auto TryFold = [this, N, User, dl](SDValue Val) {
6618
SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6619
SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6620
SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6621
6622
return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6623
User->getValueType(0), {O0, O1});
6624
};
6625
6626
// FIXME: When the semantics of the interaction between select and undef
6627
// are clearly defined, it may turn out to be unnecessary to break here.
6628
SDValue TrueRes = TryFold(ConstTrue);
6629
if (!TrueRes || TrueRes.isUndef())
6630
break;
6631
SDValue FalseRes = TryFold(ConstFalse);
6632
if (!FalseRes || FalseRes.isUndef())
6633
break;
6634
6635
// For us to materialize these using one instruction, we must be able to
6636
// represent them as signed 16-bit integers.
6637
uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6638
if (!isInt<16>(True) || !isInt<16>(False))
6639
break;
6640
6641
// We can replace User with a new SELECT node, and try again to see if we
6642
// can fold the select with its user.
6643
Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6644
N = User;
6645
ConstTrue = TrueRes;
6646
ConstFalse = FalseRes;
6647
} while (N->hasOneUse());
6648
}
6649
6650
void PPCDAGToDAGISel::PreprocessISelDAG() {
6651
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6652
6653
bool MadeChange = false;
6654
while (Position != CurDAG->allnodes_begin()) {
6655
SDNode *N = &*--Position;
6656
if (N->use_empty())
6657
continue;
6658
6659
SDValue Res;
6660
switch (N->getOpcode()) {
6661
default: break;
6662
case ISD::OR:
6663
Res = combineToCMPB(N);
6664
break;
6665
}
6666
6667
if (!Res)
6668
foldBoolExts(Res, N);
6669
6670
if (Res) {
6671
LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6672
LLVM_DEBUG(N->dump(CurDAG));
6673
LLVM_DEBUG(dbgs() << "\nNew: ");
6674
LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6675
LLVM_DEBUG(dbgs() << "\n");
6676
6677
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6678
MadeChange = true;
6679
}
6680
}
6681
6682
if (MadeChange)
6683
CurDAG->RemoveDeadNodes();
6684
}
6685
6686
/// PostprocessISelDAG - Perform some late peephole optimizations
6687
/// on the DAG representation.
6688
void PPCDAGToDAGISel::PostprocessISelDAG() {
6689
// Skip peepholes at -O0.
6690
if (TM.getOptLevel() == CodeGenOptLevel::None)
6691
return;
6692
6693
PeepholePPC64();
6694
PeepholeCROps();
6695
PeepholePPC64ZExt();
6696
}
6697
6698
// Check if all users of this node will become isel where the second operand
6699
// is the constant zero. If this is so, and if we can negate the condition,
6700
// then we can flip the true and false operands. This will allow the zero to
6701
// be folded with the isel so that we don't need to materialize a register
6702
// containing zero.
6703
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6704
for (const SDNode *User : N->uses()) {
6705
if (!User->isMachineOpcode())
6706
return false;
6707
if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6708
User->getMachineOpcode() != PPC::SELECT_I8)
6709
return false;
6710
6711
SDNode *Op1 = User->getOperand(1).getNode();
6712
SDNode *Op2 = User->getOperand(2).getNode();
6713
// If we have a degenerate select with two equal operands, swapping will
6714
// not do anything, and we may run into an infinite loop.
6715
if (Op1 == Op2)
6716
return false;
6717
6718
if (!Op2->isMachineOpcode())
6719
return false;
6720
6721
if (Op2->getMachineOpcode() != PPC::LI &&
6722
Op2->getMachineOpcode() != PPC::LI8)
6723
return false;
6724
6725
if (!isNullConstant(Op2->getOperand(0)))
6726
return false;
6727
}
6728
6729
return true;
6730
}
6731
6732
void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6733
SmallVector<SDNode *, 4> ToReplace;
6734
for (SDNode *User : N->uses()) {
6735
assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6736
User->getMachineOpcode() == PPC::SELECT_I8) &&
6737
"Must have all select users");
6738
ToReplace.push_back(User);
6739
}
6740
6741
for (SDNode *User : ToReplace) {
6742
SDNode *ResNode =
6743
CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6744
User->getValueType(0), User->getOperand(0),
6745
User->getOperand(2),
6746
User->getOperand(1));
6747
6748
LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6749
LLVM_DEBUG(User->dump(CurDAG));
6750
LLVM_DEBUG(dbgs() << "\nNew: ");
6751
LLVM_DEBUG(ResNode->dump(CurDAG));
6752
LLVM_DEBUG(dbgs() << "\n");
6753
6754
ReplaceUses(User, ResNode);
6755
}
6756
}
6757
6758
void PPCDAGToDAGISel::PeepholeCROps() {
6759
bool IsModified;
6760
do {
6761
IsModified = false;
6762
for (SDNode &Node : CurDAG->allnodes()) {
6763
MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6764
if (!MachineNode || MachineNode->use_empty())
6765
continue;
6766
SDNode *ResNode = MachineNode;
6767
6768
bool Op1Set = false, Op1Unset = false,
6769
Op1Not = false,
6770
Op2Set = false, Op2Unset = false,
6771
Op2Not = false;
6772
6773
unsigned Opcode = MachineNode->getMachineOpcode();
6774
switch (Opcode) {
6775
default: break;
6776
case PPC::CRAND:
6777
case PPC::CRNAND:
6778
case PPC::CROR:
6779
case PPC::CRXOR:
6780
case PPC::CRNOR:
6781
case PPC::CREQV:
6782
case PPC::CRANDC:
6783
case PPC::CRORC: {
6784
SDValue Op = MachineNode->getOperand(1);
6785
if (Op.isMachineOpcode()) {
6786
if (Op.getMachineOpcode() == PPC::CRSET)
6787
Op2Set = true;
6788
else if (Op.getMachineOpcode() == PPC::CRUNSET)
6789
Op2Unset = true;
6790
else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6791
Op.getOperand(0) == Op.getOperand(1)) ||
6792
Op.getMachineOpcode() == PPC::CRNOT)
6793
Op2Not = true;
6794
}
6795
[[fallthrough]];
6796
}
6797
case PPC::BC:
6798
case PPC::BCn:
6799
case PPC::SELECT_I4:
6800
case PPC::SELECT_I8:
6801
case PPC::SELECT_F4:
6802
case PPC::SELECT_F8:
6803
case PPC::SELECT_SPE:
6804
case PPC::SELECT_SPE4:
6805
case PPC::SELECT_VRRC:
6806
case PPC::SELECT_VSFRC:
6807
case PPC::SELECT_VSSRC:
6808
case PPC::SELECT_VSRC: {
6809
SDValue Op = MachineNode->getOperand(0);
6810
if (Op.isMachineOpcode()) {
6811
if (Op.getMachineOpcode() == PPC::CRSET)
6812
Op1Set = true;
6813
else if (Op.getMachineOpcode() == PPC::CRUNSET)
6814
Op1Unset = true;
6815
else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6816
Op.getOperand(0) == Op.getOperand(1)) ||
6817
Op.getMachineOpcode() == PPC::CRNOT)
6818
Op1Not = true;
6819
}
6820
}
6821
break;
6822
}
6823
6824
bool SelectSwap = false;
6825
switch (Opcode) {
6826
default: break;
6827
case PPC::CRAND:
6828
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6829
// x & x = x
6830
ResNode = MachineNode->getOperand(0).getNode();
6831
else if (Op1Set)
6832
// 1 & y = y
6833
ResNode = MachineNode->getOperand(1).getNode();
6834
else if (Op2Set)
6835
// x & 1 = x
6836
ResNode = MachineNode->getOperand(0).getNode();
6837
else if (Op1Unset || Op2Unset)
6838
// x & 0 = 0 & y = 0
6839
ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6840
MVT::i1);
6841
else if (Op1Not)
6842
// ~x & y = andc(y, x)
6843
ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6844
MVT::i1, MachineNode->getOperand(1),
6845
MachineNode->getOperand(0).
6846
getOperand(0));
6847
else if (Op2Not)
6848
// x & ~y = andc(x, y)
6849
ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6850
MVT::i1, MachineNode->getOperand(0),
6851
MachineNode->getOperand(1).
6852
getOperand(0));
6853
else if (AllUsersSelectZero(MachineNode)) {
6854
ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6855
MVT::i1, MachineNode->getOperand(0),
6856
MachineNode->getOperand(1));
6857
SelectSwap = true;
6858
}
6859
break;
6860
case PPC::CRNAND:
6861
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6862
// nand(x, x) -> nor(x, x)
6863
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6864
MVT::i1, MachineNode->getOperand(0),
6865
MachineNode->getOperand(0));
6866
else if (Op1Set)
6867
// nand(1, y) -> nor(y, y)
6868
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6869
MVT::i1, MachineNode->getOperand(1),
6870
MachineNode->getOperand(1));
6871
else if (Op2Set)
6872
// nand(x, 1) -> nor(x, x)
6873
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6874
MVT::i1, MachineNode->getOperand(0),
6875
MachineNode->getOperand(0));
6876
else if (Op1Unset || Op2Unset)
6877
// nand(x, 0) = nand(0, y) = 1
6878
ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6879
MVT::i1);
6880
else if (Op1Not)
6881
// nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6882
ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6883
MVT::i1, MachineNode->getOperand(0).
6884
getOperand(0),
6885
MachineNode->getOperand(1));
6886
else if (Op2Not)
6887
// nand(x, ~y) = ~x | y = orc(y, x)
6888
ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6889
MVT::i1, MachineNode->getOperand(1).
6890
getOperand(0),
6891
MachineNode->getOperand(0));
6892
else if (AllUsersSelectZero(MachineNode)) {
6893
ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6894
MVT::i1, MachineNode->getOperand(0),
6895
MachineNode->getOperand(1));
6896
SelectSwap = true;
6897
}
6898
break;
6899
case PPC::CROR:
6900
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6901
// x | x = x
6902
ResNode = MachineNode->getOperand(0).getNode();
6903
else if (Op1Set || Op2Set)
6904
// x | 1 = 1 | y = 1
6905
ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6906
MVT::i1);
6907
else if (Op1Unset)
6908
// 0 | y = y
6909
ResNode = MachineNode->getOperand(1).getNode();
6910
else if (Op2Unset)
6911
// x | 0 = x
6912
ResNode = MachineNode->getOperand(0).getNode();
6913
else if (Op1Not)
6914
// ~x | y = orc(y, x)
6915
ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6916
MVT::i1, MachineNode->getOperand(1),
6917
MachineNode->getOperand(0).
6918
getOperand(0));
6919
else if (Op2Not)
6920
// x | ~y = orc(x, y)
6921
ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6922
MVT::i1, MachineNode->getOperand(0),
6923
MachineNode->getOperand(1).
6924
getOperand(0));
6925
else if (AllUsersSelectZero(MachineNode)) {
6926
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6927
MVT::i1, MachineNode->getOperand(0),
6928
MachineNode->getOperand(1));
6929
SelectSwap = true;
6930
}
6931
break;
6932
case PPC::CRXOR:
6933
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6934
// xor(x, x) = 0
6935
ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6936
MVT::i1);
6937
else if (Op1Set)
6938
// xor(1, y) -> nor(y, y)
6939
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6940
MVT::i1, MachineNode->getOperand(1),
6941
MachineNode->getOperand(1));
6942
else if (Op2Set)
6943
// xor(x, 1) -> nor(x, x)
6944
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6945
MVT::i1, MachineNode->getOperand(0),
6946
MachineNode->getOperand(0));
6947
else if (Op1Unset)
6948
// xor(0, y) = y
6949
ResNode = MachineNode->getOperand(1).getNode();
6950
else if (Op2Unset)
6951
// xor(x, 0) = x
6952
ResNode = MachineNode->getOperand(0).getNode();
6953
else if (Op1Not)
6954
// xor(~x, y) = eqv(x, y)
6955
ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6956
MVT::i1, MachineNode->getOperand(0).
6957
getOperand(0),
6958
MachineNode->getOperand(1));
6959
else if (Op2Not)
6960
// xor(x, ~y) = eqv(x, y)
6961
ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6962
MVT::i1, MachineNode->getOperand(0),
6963
MachineNode->getOperand(1).
6964
getOperand(0));
6965
else if (AllUsersSelectZero(MachineNode)) {
6966
ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6967
MVT::i1, MachineNode->getOperand(0),
6968
MachineNode->getOperand(1));
6969
SelectSwap = true;
6970
}
6971
break;
6972
case PPC::CRNOR:
6973
if (Op1Set || Op2Set)
6974
// nor(1, y) -> 0
6975
ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6976
MVT::i1);
6977
else if (Op1Unset)
6978
// nor(0, y) = ~y -> nor(y, y)
6979
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6980
MVT::i1, MachineNode->getOperand(1),
6981
MachineNode->getOperand(1));
6982
else if (Op2Unset)
6983
// nor(x, 0) = ~x
6984
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6985
MVT::i1, MachineNode->getOperand(0),
6986
MachineNode->getOperand(0));
6987
else if (Op1Not)
6988
// nor(~x, y) = andc(x, y)
6989
ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6990
MVT::i1, MachineNode->getOperand(0).
6991
getOperand(0),
6992
MachineNode->getOperand(1));
6993
else if (Op2Not)
6994
// nor(x, ~y) = andc(y, x)
6995
ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6996
MVT::i1, MachineNode->getOperand(1).
6997
getOperand(0),
6998
MachineNode->getOperand(0));
6999
else if (AllUsersSelectZero(MachineNode)) {
7000
ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7001
MVT::i1, MachineNode->getOperand(0),
7002
MachineNode->getOperand(1));
7003
SelectSwap = true;
7004
}
7005
break;
7006
case PPC::CREQV:
7007
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7008
// eqv(x, x) = 1
7009
ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7010
MVT::i1);
7011
else if (Op1Set)
7012
// eqv(1, y) = y
7013
ResNode = MachineNode->getOperand(1).getNode();
7014
else if (Op2Set)
7015
// eqv(x, 1) = x
7016
ResNode = MachineNode->getOperand(0).getNode();
7017
else if (Op1Unset)
7018
// eqv(0, y) = ~y -> nor(y, y)
7019
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7020
MVT::i1, MachineNode->getOperand(1),
7021
MachineNode->getOperand(1));
7022
else if (Op2Unset)
7023
// eqv(x, 0) = ~x
7024
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7025
MVT::i1, MachineNode->getOperand(0),
7026
MachineNode->getOperand(0));
7027
else if (Op1Not)
7028
// eqv(~x, y) = xor(x, y)
7029
ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7030
MVT::i1, MachineNode->getOperand(0).
7031
getOperand(0),
7032
MachineNode->getOperand(1));
7033
else if (Op2Not)
7034
// eqv(x, ~y) = xor(x, y)
7035
ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7036
MVT::i1, MachineNode->getOperand(0),
7037
MachineNode->getOperand(1).
7038
getOperand(0));
7039
else if (AllUsersSelectZero(MachineNode)) {
7040
ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7041
MVT::i1, MachineNode->getOperand(0),
7042
MachineNode->getOperand(1));
7043
SelectSwap = true;
7044
}
7045
break;
7046
case PPC::CRANDC:
7047
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7048
// andc(x, x) = 0
7049
ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7050
MVT::i1);
7051
else if (Op1Set)
7052
// andc(1, y) = ~y
7053
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7054
MVT::i1, MachineNode->getOperand(1),
7055
MachineNode->getOperand(1));
7056
else if (Op1Unset || Op2Set)
7057
// andc(0, y) = andc(x, 1) = 0
7058
ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7059
MVT::i1);
7060
else if (Op2Unset)
7061
// andc(x, 0) = x
7062
ResNode = MachineNode->getOperand(0).getNode();
7063
else if (Op1Not)
7064
// andc(~x, y) = ~(x | y) = nor(x, y)
7065
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7066
MVT::i1, MachineNode->getOperand(0).
7067
getOperand(0),
7068
MachineNode->getOperand(1));
7069
else if (Op2Not)
7070
// andc(x, ~y) = x & y
7071
ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7072
MVT::i1, MachineNode->getOperand(0),
7073
MachineNode->getOperand(1).
7074
getOperand(0));
7075
else if (AllUsersSelectZero(MachineNode)) {
7076
ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7077
MVT::i1, MachineNode->getOperand(1),
7078
MachineNode->getOperand(0));
7079
SelectSwap = true;
7080
}
7081
break;
7082
case PPC::CRORC:
7083
if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7084
// orc(x, x) = 1
7085
ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7086
MVT::i1);
7087
else if (Op1Set || Op2Unset)
7088
// orc(1, y) = orc(x, 0) = 1
7089
ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7090
MVT::i1);
7091
else if (Op2Set)
7092
// orc(x, 1) = x
7093
ResNode = MachineNode->getOperand(0).getNode();
7094
else if (Op1Unset)
7095
// orc(0, y) = ~y
7096
ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7097
MVT::i1, MachineNode->getOperand(1),
7098
MachineNode->getOperand(1));
7099
else if (Op1Not)
7100
// orc(~x, y) = ~(x & y) = nand(x, y)
7101
ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7102
MVT::i1, MachineNode->getOperand(0).
7103
getOperand(0),
7104
MachineNode->getOperand(1));
7105
else if (Op2Not)
7106
// orc(x, ~y) = x | y
7107
ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7108
MVT::i1, MachineNode->getOperand(0),
7109
MachineNode->getOperand(1).
7110
getOperand(0));
7111
else if (AllUsersSelectZero(MachineNode)) {
7112
ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7113
MVT::i1, MachineNode->getOperand(1),
7114
MachineNode->getOperand(0));
7115
SelectSwap = true;
7116
}
7117
break;
7118
case PPC::SELECT_I4:
7119
case PPC::SELECT_I8:
7120
case PPC::SELECT_F4:
7121
case PPC::SELECT_F8:
7122
case PPC::SELECT_SPE:
7123
case PPC::SELECT_SPE4:
7124
case PPC::SELECT_VRRC:
7125
case PPC::SELECT_VSFRC:
7126
case PPC::SELECT_VSSRC:
7127
case PPC::SELECT_VSRC:
7128
if (Op1Set)
7129
ResNode = MachineNode->getOperand(1).getNode();
7130
else if (Op1Unset)
7131
ResNode = MachineNode->getOperand(2).getNode();
7132
else if (Op1Not)
7133
ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7134
SDLoc(MachineNode),
7135
MachineNode->getValueType(0),
7136
MachineNode->getOperand(0).
7137
getOperand(0),
7138
MachineNode->getOperand(2),
7139
MachineNode->getOperand(1));
7140
break;
7141
case PPC::BC:
7142
case PPC::BCn:
7143
if (Op1Not)
7144
ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7145
PPC::BC,
7146
SDLoc(MachineNode),
7147
MVT::Other,
7148
MachineNode->getOperand(0).
7149
getOperand(0),
7150
MachineNode->getOperand(1),
7151
MachineNode->getOperand(2));
7152
// FIXME: Handle Op1Set, Op1Unset here too.
7153
break;
7154
}
7155
7156
// If we're inverting this node because it is used only by selects that
7157
// we'd like to swap, then swap the selects before the node replacement.
7158
if (SelectSwap)
7159
SwapAllSelectUsers(MachineNode);
7160
7161
if (ResNode != MachineNode) {
7162
LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7163
LLVM_DEBUG(MachineNode->dump(CurDAG));
7164
LLVM_DEBUG(dbgs() << "\nNew: ");
7165
LLVM_DEBUG(ResNode->dump(CurDAG));
7166
LLVM_DEBUG(dbgs() << "\n");
7167
7168
ReplaceUses(MachineNode, ResNode);
7169
IsModified = true;
7170
}
7171
}
7172
if (IsModified)
7173
CurDAG->RemoveDeadNodes();
7174
} while (IsModified);
7175
}
7176
7177
// Gather the set of 32-bit operations that are known to have their
7178
// higher-order 32 bits zero, where ToPromote contains all such operations.
7179
static bool PeepholePPC64ZExtGather(SDValue Op32,
7180
SmallPtrSetImpl<SDNode *> &ToPromote) {
7181
if (!Op32.isMachineOpcode())
7182
return false;
7183
7184
// First, check for the "frontier" instructions (those that will clear the
7185
// higher-order 32 bits.
7186
7187
// For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7188
// around. If it does not, then these instructions will clear the
7189
// higher-order bits.
7190
if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7191
Op32.getMachineOpcode() == PPC::RLWNM) &&
7192
Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7193
ToPromote.insert(Op32.getNode());
7194
return true;
7195
}
7196
7197
// SLW and SRW always clear the higher-order bits.
7198
if (Op32.getMachineOpcode() == PPC::SLW ||
7199
Op32.getMachineOpcode() == PPC::SRW) {
7200
ToPromote.insert(Op32.getNode());
7201
return true;
7202
}
7203
7204
// For LI and LIS, we need the immediate to be positive (so that it is not
7205
// sign extended).
7206
if (Op32.getMachineOpcode() == PPC::LI ||
7207
Op32.getMachineOpcode() == PPC::LIS) {
7208
if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7209
return false;
7210
7211
ToPromote.insert(Op32.getNode());
7212
return true;
7213
}
7214
7215
// LHBRX and LWBRX always clear the higher-order bits.
7216
if (Op32.getMachineOpcode() == PPC::LHBRX ||
7217
Op32.getMachineOpcode() == PPC::LWBRX) {
7218
ToPromote.insert(Op32.getNode());
7219
return true;
7220
}
7221
7222
// CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7223
if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7224
Op32.getMachineOpcode() == PPC::CNTTZW) {
7225
ToPromote.insert(Op32.getNode());
7226
return true;
7227
}
7228
7229
// Next, check for those instructions we can look through.
7230
7231
// Assuming the mask does not wrap around, then the higher-order bits are
7232
// taken directly from the first operand.
7233
if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7234
Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7235
SmallPtrSet<SDNode *, 16> ToPromote1;
7236
if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7237
return false;
7238
7239
ToPromote.insert(Op32.getNode());
7240
ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7241
return true;
7242
}
7243
7244
// For OR, the higher-order bits are zero if that is true for both operands.
7245
// For SELECT_I4, the same is true (but the relevant operand numbers are
7246
// shifted by 1).
7247
if (Op32.getMachineOpcode() == PPC::OR ||
7248
Op32.getMachineOpcode() == PPC::SELECT_I4) {
7249
unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7250
SmallPtrSet<SDNode *, 16> ToPromote1;
7251
if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7252
return false;
7253
if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7254
return false;
7255
7256
ToPromote.insert(Op32.getNode());
7257
ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7258
return true;
7259
}
7260
7261
// For ORI and ORIS, we need the higher-order bits of the first operand to be
7262
// zero, and also for the constant to be positive (so that it is not sign
7263
// extended).
7264
if (Op32.getMachineOpcode() == PPC::ORI ||
7265
Op32.getMachineOpcode() == PPC::ORIS) {
7266
SmallPtrSet<SDNode *, 16> ToPromote1;
7267
if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7268
return false;
7269
if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7270
return false;
7271
7272
ToPromote.insert(Op32.getNode());
7273
ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7274
return true;
7275
}
7276
7277
// The higher-order bits of AND are zero if that is true for at least one of
7278
// the operands.
7279
if (Op32.getMachineOpcode() == PPC::AND) {
7280
SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7281
bool Op0OK =
7282
PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7283
bool Op1OK =
7284
PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7285
if (!Op0OK && !Op1OK)
7286
return false;
7287
7288
ToPromote.insert(Op32.getNode());
7289
7290
if (Op0OK)
7291
ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7292
7293
if (Op1OK)
7294
ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
7295
7296
return true;
7297
}
7298
7299
// For ANDI and ANDIS, the higher-order bits are zero if either that is true
7300
// of the first operand, or if the second operand is positive (so that it is
7301
// not sign extended).
7302
if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7303
Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7304
SmallPtrSet<SDNode *, 16> ToPromote1;
7305
bool Op0OK =
7306
PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7307
bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7308
if (!Op0OK && !Op1OK)
7309
return false;
7310
7311
ToPromote.insert(Op32.getNode());
7312
7313
if (Op0OK)
7314
ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7315
7316
return true;
7317
}
7318
7319
return false;
7320
}
7321
7322
void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7323
if (!Subtarget->isPPC64())
7324
return;
7325
7326
// When we zero-extend from i32 to i64, we use a pattern like this:
7327
// def : Pat<(i64 (zext i32:$in)),
7328
// (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7329
// 0, 32)>;
7330
// There are several 32-bit shift/rotate instructions, however, that will
7331
// clear the higher-order bits of their output, rendering the RLDICL
7332
// unnecessary. When that happens, we remove it here, and redefine the
7333
// relevant 32-bit operation to be a 64-bit operation.
7334
7335
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7336
7337
bool MadeChange = false;
7338
while (Position != CurDAG->allnodes_begin()) {
7339
SDNode *N = &*--Position;
7340
// Skip dead nodes and any non-machine opcodes.
7341
if (N->use_empty() || !N->isMachineOpcode())
7342
continue;
7343
7344
if (N->getMachineOpcode() != PPC::RLDICL)
7345
continue;
7346
7347
if (N->getConstantOperandVal(1) != 0 ||
7348
N->getConstantOperandVal(2) != 32)
7349
continue;
7350
7351
SDValue ISR = N->getOperand(0);
7352
if (!ISR.isMachineOpcode() ||
7353
ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7354
continue;
7355
7356
if (!ISR.hasOneUse())
7357
continue;
7358
7359
if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7360
continue;
7361
7362
SDValue IDef = ISR.getOperand(0);
7363
if (!IDef.isMachineOpcode() ||
7364
IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7365
continue;
7366
7367
// We now know that we're looking at a canonical i32 -> i64 zext. See if we
7368
// can get rid of it.
7369
7370
SDValue Op32 = ISR->getOperand(1);
7371
if (!Op32.isMachineOpcode())
7372
continue;
7373
7374
// There are some 32-bit instructions that always clear the high-order 32
7375
// bits, there are also some instructions (like AND) that we can look
7376
// through.
7377
SmallPtrSet<SDNode *, 16> ToPromote;
7378
if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7379
continue;
7380
7381
// If the ToPromote set contains nodes that have uses outside of the set
7382
// (except for the original INSERT_SUBREG), then abort the transformation.
7383
bool OutsideUse = false;
7384
for (SDNode *PN : ToPromote) {
7385
for (SDNode *UN : PN->uses()) {
7386
if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7387
OutsideUse = true;
7388
break;
7389
}
7390
}
7391
7392
if (OutsideUse)
7393
break;
7394
}
7395
if (OutsideUse)
7396
continue;
7397
7398
MadeChange = true;
7399
7400
// We now know that this zero extension can be removed by promoting to
7401
// nodes in ToPromote to 64-bit operations, where for operations in the
7402
// frontier of the set, we need to insert INSERT_SUBREGs for their
7403
// operands.
7404
for (SDNode *PN : ToPromote) {
7405
unsigned NewOpcode;
7406
switch (PN->getMachineOpcode()) {
7407
default:
7408
llvm_unreachable("Don't know the 64-bit variant of this instruction");
7409
case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7410
case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7411
case PPC::SLW: NewOpcode = PPC::SLW8; break;
7412
case PPC::SRW: NewOpcode = PPC::SRW8; break;
7413
case PPC::LI: NewOpcode = PPC::LI8; break;
7414
case PPC::LIS: NewOpcode = PPC::LIS8; break;
7415
case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7416
case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7417
case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7418
case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7419
case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7420
case PPC::OR: NewOpcode = PPC::OR8; break;
7421
case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7422
case PPC::ORI: NewOpcode = PPC::ORI8; break;
7423
case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7424
case PPC::AND: NewOpcode = PPC::AND8; break;
7425
case PPC::ANDI_rec:
7426
NewOpcode = PPC::ANDI8_rec;
7427
break;
7428
case PPC::ANDIS_rec:
7429
NewOpcode = PPC::ANDIS8_rec;
7430
break;
7431
}
7432
7433
// Note: During the replacement process, the nodes will be in an
7434
// inconsistent state (some instructions will have operands with values
7435
// of the wrong type). Once done, however, everything should be right
7436
// again.
7437
7438
SmallVector<SDValue, 4> Ops;
7439
for (const SDValue &V : PN->ops()) {
7440
if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7441
!isa<ConstantSDNode>(V)) {
7442
SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7443
SDNode *ReplOp =
7444
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7445
ISR.getNode()->getVTList(), ReplOpOps);
7446
Ops.push_back(SDValue(ReplOp, 0));
7447
} else {
7448
Ops.push_back(V);
7449
}
7450
}
7451
7452
// Because all to-be-promoted nodes only have users that are other
7453
// promoted nodes (or the original INSERT_SUBREG), we can safely replace
7454
// the i32 result value type with i64.
7455
7456
SmallVector<EVT, 2> NewVTs;
7457
SDVTList VTs = PN->getVTList();
7458
for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7459
if (VTs.VTs[i] == MVT::i32)
7460
NewVTs.push_back(MVT::i64);
7461
else
7462
NewVTs.push_back(VTs.VTs[i]);
7463
7464
LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7465
LLVM_DEBUG(PN->dump(CurDAG));
7466
7467
CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7468
7469
LLVM_DEBUG(dbgs() << "\nNew: ");
7470
LLVM_DEBUG(PN->dump(CurDAG));
7471
LLVM_DEBUG(dbgs() << "\n");
7472
}
7473
7474
// Now we replace the original zero extend and its associated INSERT_SUBREG
7475
// with the value feeding the INSERT_SUBREG (which has now been promoted to
7476
// return an i64).
7477
7478
LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7479
LLVM_DEBUG(N->dump(CurDAG));
7480
LLVM_DEBUG(dbgs() << "\nNew: ");
7481
LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7482
LLVM_DEBUG(dbgs() << "\n");
7483
7484
ReplaceUses(N, Op32.getNode());
7485
}
7486
7487
if (MadeChange)
7488
CurDAG->RemoveDeadNodes();
7489
}
7490
7491
static bool isVSXSwap(SDValue N) {
7492
if (!N->isMachineOpcode())
7493
return false;
7494
unsigned Opc = N->getMachineOpcode();
7495
7496
// Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7497
// operand is 2.
7498
if (Opc == PPC::XXPERMDIs) {
7499
return isa<ConstantSDNode>(N->getOperand(1)) &&
7500
N->getConstantOperandVal(1) == 2;
7501
} else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7502
return N->getOperand(0) == N->getOperand(1) &&
7503
isa<ConstantSDNode>(N->getOperand(2)) &&
7504
N->getConstantOperandVal(2) == 2;
7505
}
7506
7507
return false;
7508
}
7509
7510
// TODO: Make this complete and replace with a table-gen bit.
7511
static bool isLaneInsensitive(SDValue N) {
7512
if (!N->isMachineOpcode())
7513
return false;
7514
unsigned Opc = N->getMachineOpcode();
7515
7516
switch (Opc) {
7517
default:
7518
return false;
7519
case PPC::VAVGSB:
7520
case PPC::VAVGUB:
7521
case PPC::VAVGSH:
7522
case PPC::VAVGUH:
7523
case PPC::VAVGSW:
7524
case PPC::VAVGUW:
7525
case PPC::VMAXFP:
7526
case PPC::VMAXSB:
7527
case PPC::VMAXUB:
7528
case PPC::VMAXSH:
7529
case PPC::VMAXUH:
7530
case PPC::VMAXSW:
7531
case PPC::VMAXUW:
7532
case PPC::VMINFP:
7533
case PPC::VMINSB:
7534
case PPC::VMINUB:
7535
case PPC::VMINSH:
7536
case PPC::VMINUH:
7537
case PPC::VMINSW:
7538
case PPC::VMINUW:
7539
case PPC::VADDFP:
7540
case PPC::VADDUBM:
7541
case PPC::VADDUHM:
7542
case PPC::VADDUWM:
7543
case PPC::VSUBFP:
7544
case PPC::VSUBUBM:
7545
case PPC::VSUBUHM:
7546
case PPC::VSUBUWM:
7547
case PPC::VAND:
7548
case PPC::VANDC:
7549
case PPC::VOR:
7550
case PPC::VORC:
7551
case PPC::VXOR:
7552
case PPC::VNOR:
7553
case PPC::VMULUWM:
7554
return true;
7555
}
7556
}
7557
7558
// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7559
// lane-insensitive.
7560
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7561
// Our desired xxswap might be source of COPY_TO_REGCLASS.
7562
// TODO: Can we put this a common method for DAG?
7563
auto SkipRCCopy = [](SDValue V) {
7564
while (V->isMachineOpcode() &&
7565
V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7566
// All values in the chain should have single use.
7567
if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
7568
return SDValue();
7569
V = V->getOperand(0);
7570
}
7571
return V.hasOneUse() ? V : SDValue();
7572
};
7573
7574
SDValue VecOp = SkipRCCopy(N->getOperand(0));
7575
if (!VecOp || !isLaneInsensitive(VecOp))
7576
return;
7577
7578
SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7579
RHS = SkipRCCopy(VecOp.getOperand(1));
7580
if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7581
return;
7582
7583
// These swaps may still have chain-uses here, count on dead code elimination
7584
// in following passes to remove them.
7585
DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7586
DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7587
DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7588
}
7589
7590
// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7591
static bool hasAIXSmallTLSAttr(SDValue Val) {
7592
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7593
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7594
if (GV->hasAttribute("aix-small-tls"))
7595
return true;
7596
7597
return false;
7598
}
7599
7600
// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7601
// accesses?
7602
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG,
7603
SDValue ADDIToFold) {
7604
// Check if ADDIToFold (the ADDI that we want to fold into local-exec
7605
// accesses), is truly an ADDI.
7606
if (!ADDIToFold.isMachineOpcode() ||
7607
(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7608
return false;
7609
7610
// Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7611
// attribute or when the 'aix-small-tls' global variable attribute is present.
7612
const PPCSubtarget &Subtarget =
7613
DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7614
SDValue TLSVarNode = ADDIToFold.getOperand(1);
7615
if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7616
Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7617
return false;
7618
7619
// The second operand of the ADDIToFold should be the global TLS address
7620
// (the local-exec TLS variable). We only perform the folding if the TLS
7621
// variable is the second operand.
7622
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7623
if (!GA)
7624
return false;
7625
7626
if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7627
// The first operand of the ADDIToFold should be the thread pointer.
7628
// This transformation is only performed if the first operand of the
7629
// addi is the thread pointer.
7630
SDValue TPRegNode = ADDIToFold.getOperand(0);
7631
RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7632
if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7633
return false;
7634
}
7635
7636
// The local-[exec|dynamic] TLS variable should only have the
7637
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7638
// performed otherwise if the flag is not set.
7639
unsigned TargetFlags = GA->getTargetFlags();
7640
if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7641
TargetFlags == PPCII::MO_TLSLD_FLAG))
7642
return false;
7643
7644
// If all conditions are satisfied, the ADDI is valid for folding.
7645
return true;
7646
}
7647
7648
// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7649
// another addi, fold this sequence into a single addi if possible. Before this
7650
// optimization, the sequence appears as:
7651
// addi rN, r13, sym@[le|ld]
7652
// addi rM, rN, imm
7653
// After this optimization, we can fold the two addi into a single one:
7654
// addi rM, r13, sym@[le|ld] + imm
7655
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG) {
7656
if (N->getMachineOpcode() != PPC::ADDI8)
7657
return;
7658
7659
// InitialADDI is the addi feeding into N (also an addi), and the addi that
7660
// we want optimized out.
7661
SDValue InitialADDI = N->getOperand(0);
7662
7663
if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7664
return;
7665
7666
// The second operand of the InitialADDI should be the global TLS address
7667
// (the local-[exec|dynamic] TLS variable), with the
7668
// [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7669
// isEligibleToFoldADDIForFasterLocalAccesses().
7670
SDValue TLSVarNode = InitialADDI.getOperand(1);
7671
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7672
assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7673
"local-[exec|dynamic] accesses!");
7674
unsigned TargetFlags = GA->getTargetFlags();
7675
7676
// The second operand of the addi that we want to preserve will be an
7677
// immediate. We add this immediate, together with the address of the TLS
7678
// variable found in InitialADDI, in order to preserve the correct TLS address
7679
// information during assembly printing. The offset is likely to be non-zero
7680
// when we end up in this case.
7681
int Offset = N->getConstantOperandVal(1);
7682
TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7683
Offset, TargetFlags);
7684
7685
(void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7686
if (InitialADDI.getNode()->use_empty())
7687
DAG->RemoveDeadNode(InitialADDI.getNode());
7688
}
7689
7690
void PPCDAGToDAGISel::PeepholePPC64() {
7691
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7692
7693
while (Position != CurDAG->allnodes_begin()) {
7694
SDNode *N = &*--Position;
7695
// Skip dead nodes and any non-machine opcodes.
7696
if (N->use_empty() || !N->isMachineOpcode())
7697
continue;
7698
7699
if (isVSXSwap(SDValue(N, 0)))
7700
reduceVSXSwap(N, CurDAG);
7701
7702
// This optimization is performed for non-TOC-based local-[exec|dynamic]
7703
// accesses.
7704
foldADDIForFasterLocalAccesses(N, CurDAG);
7705
7706
unsigned FirstOp;
7707
unsigned StorageOpcode = N->getMachineOpcode();
7708
bool RequiresMod4Offset = false;
7709
7710
switch (StorageOpcode) {
7711
default: continue;
7712
7713
case PPC::LWA:
7714
case PPC::LD:
7715
case PPC::DFLOADf64:
7716
case PPC::DFLOADf32:
7717
RequiresMod4Offset = true;
7718
[[fallthrough]];
7719
case PPC::LBZ:
7720
case PPC::LBZ8:
7721
case PPC::LFD:
7722
case PPC::LFS:
7723
case PPC::LHA:
7724
case PPC::LHA8:
7725
case PPC::LHZ:
7726
case PPC::LHZ8:
7727
case PPC::LWZ:
7728
case PPC::LWZ8:
7729
FirstOp = 0;
7730
break;
7731
7732
case PPC::STD:
7733
case PPC::DFSTOREf64:
7734
case PPC::DFSTOREf32:
7735
RequiresMod4Offset = true;
7736
[[fallthrough]];
7737
case PPC::STB:
7738
case PPC::STB8:
7739
case PPC::STFD:
7740
case PPC::STFS:
7741
case PPC::STH:
7742
case PPC::STH8:
7743
case PPC::STW:
7744
case PPC::STW8:
7745
FirstOp = 1;
7746
break;
7747
}
7748
7749
// If this is a load or store with a zero offset, or within the alignment,
7750
// we may be able to fold an add-immediate into the memory operation.
7751
// The check against alignment is below, as it can't occur until we check
7752
// the arguments to N
7753
if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7754
continue;
7755
7756
SDValue Base = N->getOperand(FirstOp + 1);
7757
if (!Base.isMachineOpcode())
7758
continue;
7759
7760
unsigned Flags = 0;
7761
bool ReplaceFlags = true;
7762
7763
// When the feeding operation is an add-immediate of some sort,
7764
// determine whether we need to add relocation information to the
7765
// target flags on the immediate operand when we fold it into the
7766
// load instruction.
7767
//
7768
// For something like ADDItocL8, the relocation information is
7769
// inferred from the opcode; when we process it in the AsmPrinter,
7770
// we add the necessary relocation there. A load, though, can receive
7771
// relocation from various flavors of ADDIxxx, so we need to carry
7772
// the relocation information in the target flags.
7773
switch (Base.getMachineOpcode()) {
7774
default: continue;
7775
7776
case PPC::ADDI8:
7777
case PPC::ADDI:
7778
// In some cases (such as TLS) the relocation information
7779
// is already in place on the operand, so copying the operand
7780
// is sufficient.
7781
ReplaceFlags = false;
7782
break;
7783
case PPC::ADDIdtprelL:
7784
Flags = PPCII::MO_DTPREL_LO;
7785
break;
7786
case PPC::ADDItlsldL:
7787
Flags = PPCII::MO_TLSLD_LO;
7788
break;
7789
case PPC::ADDItocL8:
7790
// Skip the following peephole optimizations for ADDItocL8 on AIX which
7791
// is used for toc-data access.
7792
if (Subtarget->isAIXABI())
7793
continue;
7794
Flags = PPCII::MO_TOC_LO;
7795
break;
7796
}
7797
7798
SDValue ImmOpnd = Base.getOperand(1);
7799
7800
// On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7801
// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7802
// we might have needed different @ha relocation values for the offset
7803
// pointers).
7804
int MaxDisplacement = 7;
7805
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7806
const GlobalValue *GV = GA->getGlobal();
7807
Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7808
MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7809
}
7810
7811
bool UpdateHBase = false;
7812
SDValue HBase = Base.getOperand(0);
7813
7814
int Offset = N->getConstantOperandVal(FirstOp);
7815
if (ReplaceFlags) {
7816
if (Offset < 0 || Offset > MaxDisplacement) {
7817
// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7818
// one use, then we can do this for any offset, we just need to also
7819
// update the offset (i.e. the symbol addend) on the addis also.
7820
if (Base.getMachineOpcode() != PPC::ADDItocL8)
7821
continue;
7822
7823
if (!HBase.isMachineOpcode() ||
7824
HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7825
continue;
7826
7827
if (!Base.hasOneUse() || !HBase.hasOneUse())
7828
continue;
7829
7830
SDValue HImmOpnd = HBase.getOperand(1);
7831
if (HImmOpnd != ImmOpnd)
7832
continue;
7833
7834
UpdateHBase = true;
7835
}
7836
} else {
7837
// Global addresses can be folded, but only if they are sufficiently
7838
// aligned.
7839
if (RequiresMod4Offset) {
7840
if (GlobalAddressSDNode *GA =
7841
dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7842
const GlobalValue *GV = GA->getGlobal();
7843
Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7844
if (Alignment < 4)
7845
continue;
7846
}
7847
}
7848
7849
// If we're directly folding the addend from an addi instruction, then:
7850
// 1. In general, the offset on the memory access must be zero.
7851
// 2. If the addend is a constant, then it can be combined with a
7852
// non-zero offset, but only if the result meets the encoding
7853
// requirements.
7854
if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7855
Offset += C->getSExtValue();
7856
7857
if (RequiresMod4Offset && (Offset % 4) != 0)
7858
continue;
7859
7860
if (!isInt<16>(Offset))
7861
continue;
7862
7863
ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
7864
ImmOpnd.getValueType());
7865
} else if (Offset != 0) {
7866
// This optimization is performed for non-TOC-based local-[exec|dynamic]
7867
// accesses.
7868
if (isEligibleToFoldADDIForFasterLocalAccesses(CurDAG, Base)) {
7869
// Add the non-zero offset information into the load or store
7870
// instruction to be used for non-TOC-based local-[exec|dynamic]
7871
// accesses.
7872
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7873
assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7874
"addi into local-[exec|dynamic] accesses!");
7875
ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7876
MVT::i64, Offset,
7877
GA->getTargetFlags());
7878
} else
7879
continue;
7880
}
7881
}
7882
7883
// We found an opportunity. Reverse the operands from the add
7884
// immediate and substitute them into the load or store. If
7885
// needed, update the target flags for the immediate operand to
7886
// reflect the necessary relocation information.
7887
LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7888
LLVM_DEBUG(Base->dump(CurDAG));
7889
LLVM_DEBUG(dbgs() << "\nN: ");
7890
LLVM_DEBUG(N->dump(CurDAG));
7891
LLVM_DEBUG(dbgs() << "\n");
7892
7893
// If the relocation information isn't already present on the
7894
// immediate operand, add it now.
7895
if (ReplaceFlags) {
7896
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7897
SDLoc dl(GA);
7898
const GlobalValue *GV = GA->getGlobal();
7899
Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7900
// We can't perform this optimization for data whose alignment
7901
// is insufficient for the instruction encoding.
7902
if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7903
LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7904
continue;
7905
}
7906
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7907
} else if (ConstantPoolSDNode *CP =
7908
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
7909
const Constant *C = CP->getConstVal();
7910
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7911
Offset, Flags);
7912
}
7913
}
7914
7915
if (FirstOp == 1) // Store
7916
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7917
Base.getOperand(0), N->getOperand(3));
7918
else // Load
7919
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7920
N->getOperand(2));
7921
7922
if (UpdateHBase)
7923
(void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7924
ImmOpnd);
7925
7926
// The add-immediate may now be dead, in which case remove it.
7927
if (Base.getNode()->use_empty())
7928
CurDAG->RemoveDeadNode(Base.getNode());
7929
}
7930
}
7931
7932
/// createPPCISelDag - This pass converts a legalized DAG into a
7933
/// PowerPC-specific DAG, ready for instruction scheduling.
7934
///
7935
FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
7936
CodeGenOptLevel OptLevel) {
7937
return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7938
}
7939
7940