Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
35294 views
1
//===- X86InstructionSelector.cpp -----------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file implements the targeting of the InstructionSelector class for
10
/// X86.
11
/// \todo This should be generated by TableGen.
12
//===----------------------------------------------------------------------===//
13
14
#include "MCTargetDesc/X86BaseInfo.h"
15
#include "X86.h"
16
#include "X86InstrBuilder.h"
17
#include "X86InstrInfo.h"
18
#include "X86RegisterBankInfo.h"
19
#include "X86RegisterInfo.h"
20
#include "X86Subtarget.h"
21
#include "X86TargetMachine.h"
22
#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
24
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25
#include "llvm/CodeGen/GlobalISel/Utils.h"
26
#include "llvm/CodeGen/MachineBasicBlock.h"
27
#include "llvm/CodeGen/MachineConstantPool.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineInstr.h"
30
#include "llvm/CodeGen/MachineInstrBuilder.h"
31
#include "llvm/CodeGen/MachineMemOperand.h"
32
#include "llvm/CodeGen/MachineOperand.h"
33
#include "llvm/CodeGen/MachineRegisterInfo.h"
34
#include "llvm/CodeGen/RegisterBank.h"
35
#include "llvm/CodeGen/TargetOpcodes.h"
36
#include "llvm/CodeGen/TargetRegisterInfo.h"
37
#include "llvm/CodeGenTypes/LowLevelType.h"
38
#include "llvm/IR/DataLayout.h"
39
#include "llvm/IR/InstrTypes.h"
40
#include "llvm/IR/IntrinsicsX86.h"
41
#include "llvm/Support/AtomicOrdering.h"
42
#include "llvm/Support/CodeGen.h"
43
#include "llvm/Support/Debug.h"
44
#include "llvm/Support/ErrorHandling.h"
45
#include "llvm/Support/MathExtras.h"
46
#include "llvm/Support/raw_ostream.h"
47
#include <cassert>
48
#include <cstdint>
49
#include <tuple>
50
51
#define DEBUG_TYPE "X86-isel"
52
53
using namespace llvm;
54
55
namespace {
56
57
#define GET_GLOBALISEL_PREDICATE_BITSET
58
#include "X86GenGlobalISel.inc"
59
#undef GET_GLOBALISEL_PREDICATE_BITSET
60
61
class X86InstructionSelector : public InstructionSelector {
62
public:
63
X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
64
const X86RegisterBankInfo &RBI);
65
66
bool select(MachineInstr &I) override;
67
static const char *getName() { return DEBUG_TYPE; }
68
69
private:
70
/// tblgen-erated 'select' implementation, used as the initial selector for
71
/// the patterns that don't require complex C++.
72
bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
73
74
// TODO: remove after supported by Tablegen-erated instruction selection.
75
unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
76
Align Alignment) const;
77
78
bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
79
MachineFunction &MF) const;
80
bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
81
MachineFunction &MF) const;
82
bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
83
MachineFunction &MF) const;
84
bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
85
MachineFunction &MF) const;
86
bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
87
MachineFunction &MF) const;
88
bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
89
MachineFunction &MF) const;
90
bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
91
MachineFunction &MF) const;
92
bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
93
MachineFunction &MF) const;
94
bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
95
MachineFunction &MF) const;
96
bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI,
97
MachineFunction &MF) const;
98
bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const;
99
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
100
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
101
MachineFunction &MF);
102
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
103
MachineFunction &MF);
104
bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
105
MachineFunction &MF) const;
106
bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
107
MachineFunction &MF) const;
108
bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
109
MachineFunction &MF) const;
110
bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
111
const unsigned DstReg,
112
const TargetRegisterClass *DstRC,
113
const unsigned SrcReg,
114
const TargetRegisterClass *SrcRC) const;
115
bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
116
MachineFunction &MF) const;
117
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
118
bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
119
MachineFunction &MF) const;
120
bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI,
121
MachineFunction &MF) const;
122
123
// emit insert subreg instruction and insert it before MachineInstr &I
124
bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
125
MachineRegisterInfo &MRI, MachineFunction &MF) const;
126
// emit extract subreg instruction and insert it before MachineInstr &I
127
bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
128
MachineRegisterInfo &MRI, MachineFunction &MF) const;
129
130
const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
131
const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
132
MachineRegisterInfo &MRI) const;
133
134
const X86TargetMachine &TM;
135
const X86Subtarget &STI;
136
const X86InstrInfo &TII;
137
const X86RegisterInfo &TRI;
138
const X86RegisterBankInfo &RBI;
139
140
#define GET_GLOBALISEL_PREDICATES_DECL
141
#include "X86GenGlobalISel.inc"
142
#undef GET_GLOBALISEL_PREDICATES_DECL
143
144
#define GET_GLOBALISEL_TEMPORARIES_DECL
145
#include "X86GenGlobalISel.inc"
146
#undef GET_GLOBALISEL_TEMPORARIES_DECL
147
};
148
149
} // end anonymous namespace
150
151
#define GET_GLOBALISEL_IMPL
152
#include "X86GenGlobalISel.inc"
153
#undef GET_GLOBALISEL_IMPL
154
155
X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
156
const X86Subtarget &STI,
157
const X86RegisterBankInfo &RBI)
158
: TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
159
RBI(RBI),
160
#define GET_GLOBALISEL_PREDICATES_INIT
161
#include "X86GenGlobalISel.inc"
162
#undef GET_GLOBALISEL_PREDICATES_INIT
163
#define GET_GLOBALISEL_TEMPORARIES_INIT
164
#include "X86GenGlobalISel.inc"
165
#undef GET_GLOBALISEL_TEMPORARIES_INIT
166
{
167
}
168
169
// FIXME: This should be target-independent, inferred from the types declared
170
// for each class in the bank.
171
const TargetRegisterClass *
172
X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
173
if (RB.getID() == X86::GPRRegBankID) {
174
if (Ty.getSizeInBits() <= 8)
175
return &X86::GR8RegClass;
176
if (Ty.getSizeInBits() == 16)
177
return &X86::GR16RegClass;
178
if (Ty.getSizeInBits() == 32)
179
return &X86::GR32RegClass;
180
if (Ty.getSizeInBits() == 64)
181
return &X86::GR64RegClass;
182
}
183
if (RB.getID() == X86::VECRRegBankID) {
184
if (Ty.getSizeInBits() == 16)
185
return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
186
if (Ty.getSizeInBits() == 32)
187
return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
188
if (Ty.getSizeInBits() == 64)
189
return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
190
if (Ty.getSizeInBits() == 128)
191
return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
192
if (Ty.getSizeInBits() == 256)
193
return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
194
if (Ty.getSizeInBits() == 512)
195
return &X86::VR512RegClass;
196
}
197
198
if (RB.getID() == X86::PSRRegBankID) {
199
if (Ty.getSizeInBits() == 80)
200
return &X86::RFP80RegClass;
201
if (Ty.getSizeInBits() == 64)
202
return &X86::RFP64RegClass;
203
if (Ty.getSizeInBits() == 32)
204
return &X86::RFP32RegClass;
205
}
206
207
llvm_unreachable("Unknown RegBank!");
208
}
209
210
const TargetRegisterClass *
211
X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
212
MachineRegisterInfo &MRI) const {
213
const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
214
return getRegClass(Ty, RegBank);
215
}
216
217
static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
218
unsigned SubIdx = X86::NoSubRegister;
219
if (RC == &X86::GR32RegClass) {
220
SubIdx = X86::sub_32bit;
221
} else if (RC == &X86::GR16RegClass) {
222
SubIdx = X86::sub_16bit;
223
} else if (RC == &X86::GR8RegClass) {
224
SubIdx = X86::sub_8bit;
225
}
226
227
return SubIdx;
228
}
229
230
static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) {
231
assert(Reg.isPhysical());
232
if (X86::GR64RegClass.contains(Reg))
233
return &X86::GR64RegClass;
234
if (X86::GR32RegClass.contains(Reg))
235
return &X86::GR32RegClass;
236
if (X86::GR16RegClass.contains(Reg))
237
return &X86::GR16RegClass;
238
if (X86::GR8RegClass.contains(Reg))
239
return &X86::GR8RegClass;
240
241
llvm_unreachable("Unknown RegClass for PhysReg!");
242
}
243
244
// FIXME: We need some sort of API in RBI/TRI to allow generic code to
245
// constrain operands of simple instructions given a TargetRegisterClass
246
// and LLT
247
bool X86InstructionSelector::selectDebugInstr(MachineInstr &I,
248
MachineRegisterInfo &MRI) const {
249
for (MachineOperand &MO : I.operands()) {
250
if (!MO.isReg())
251
continue;
252
Register Reg = MO.getReg();
253
if (!Reg)
254
continue;
255
if (Reg.isPhysical())
256
continue;
257
LLT Ty = MRI.getType(Reg);
258
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
259
const TargetRegisterClass *RC =
260
dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank);
261
if (!RC) {
262
const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
263
RC = getRegClass(Ty, RB);
264
if (!RC) {
265
LLVM_DEBUG(
266
dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
267
break;
268
}
269
}
270
RBI.constrainGenericRegister(Reg, *RC, MRI);
271
}
272
273
return true;
274
}
275
276
// Set X86 Opcode and constrain DestReg.
277
bool X86InstructionSelector::selectCopy(MachineInstr &I,
278
MachineRegisterInfo &MRI) const {
279
Register DstReg = I.getOperand(0).getReg();
280
const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
281
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
282
283
Register SrcReg = I.getOperand(1).getReg();
284
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
285
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
286
287
if (DstReg.isPhysical()) {
288
assert(I.isCopy() && "Generic operators do not allow physical registers");
289
290
if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
291
DstRegBank.getID() == X86::GPRRegBankID) {
292
293
const TargetRegisterClass *SrcRC =
294
getRegClass(MRI.getType(SrcReg), SrcRegBank);
295
const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
296
297
if (SrcRC != DstRC) {
298
// This case can be generated by ABI lowering, performe anyext
299
Register ExtSrc = MRI.createVirtualRegister(DstRC);
300
BuildMI(*I.getParent(), I, I.getDebugLoc(),
301
TII.get(TargetOpcode::SUBREG_TO_REG))
302
.addDef(ExtSrc)
303
.addImm(0)
304
.addReg(SrcReg)
305
.addImm(getSubRegIndex(SrcRC));
306
307
I.getOperand(1).setReg(ExtSrc);
308
}
309
}
310
311
return true;
312
}
313
314
assert((!SrcReg.isPhysical() || I.isCopy()) &&
315
"No phys reg on generic operators");
316
assert((DstSize == SrcSize ||
317
// Copies are a mean to setup initial types, the number of
318
// bits may not exactly match.
319
(SrcReg.isPhysical() &&
320
DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
321
"Copy with different width?!");
322
323
const TargetRegisterClass *DstRC =
324
getRegClass(MRI.getType(DstReg), DstRegBank);
325
326
if (SrcRegBank.getID() == X86::GPRRegBankID &&
327
DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
328
SrcReg.isPhysical()) {
329
// Change the physical register to performe truncate.
330
331
const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
332
333
if (DstRC != SrcRC) {
334
I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
335
I.getOperand(1).substPhysReg(SrcReg, TRI);
336
}
337
}
338
339
// No need to constrain SrcReg. It will get constrained when
340
// we hit another of its use or its defs.
341
// Copies do not have constraints.
342
const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
343
if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
344
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
345
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
346
<< " operand\n");
347
return false;
348
}
349
}
350
I.setDesc(TII.get(X86::COPY));
351
return true;
352
}
353
354
bool X86InstructionSelector::select(MachineInstr &I) {
355
assert(I.getParent() && "Instruction should be in a basic block!");
356
assert(I.getParent()->getParent() && "Instruction should be in a function!");
357
358
MachineBasicBlock &MBB = *I.getParent();
359
MachineFunction &MF = *MBB.getParent();
360
MachineRegisterInfo &MRI = MF.getRegInfo();
361
362
unsigned Opcode = I.getOpcode();
363
if (!isPreISelGenericOpcode(Opcode)) {
364
// Certain non-generic instructions also need some special handling.
365
366
if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
367
return false;
368
369
if (I.isCopy())
370
return selectCopy(I, MRI);
371
372
if (I.isDebugInstr())
373
return selectDebugInstr(I, MRI);
374
375
return true;
376
}
377
378
assert(I.getNumOperands() == I.getNumExplicitOperands() &&
379
"Generic instruction has unexpected implicit operands\n");
380
381
if (selectImpl(I, *CoverageInfo))
382
return true;
383
384
LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
385
386
// TODO: This should be implemented by tblgen.
387
switch (I.getOpcode()) {
388
default:
389
return false;
390
case TargetOpcode::G_STORE:
391
case TargetOpcode::G_LOAD:
392
return selectLoadStoreOp(I, MRI, MF);
393
case TargetOpcode::G_PTR_ADD:
394
case TargetOpcode::G_FRAME_INDEX:
395
return selectFrameIndexOrGep(I, MRI, MF);
396
case TargetOpcode::G_GLOBAL_VALUE:
397
return selectGlobalValue(I, MRI, MF);
398
case TargetOpcode::G_CONSTANT:
399
return selectConstant(I, MRI, MF);
400
case TargetOpcode::G_FCONSTANT:
401
return materializeFP(I, MRI, MF);
402
case TargetOpcode::G_PTRTOINT:
403
case TargetOpcode::G_TRUNC:
404
return selectTruncOrPtrToInt(I, MRI, MF);
405
case TargetOpcode::G_INTTOPTR:
406
return selectCopy(I, MRI);
407
case TargetOpcode::G_ZEXT:
408
return selectZext(I, MRI, MF);
409
case TargetOpcode::G_ANYEXT:
410
return selectAnyext(I, MRI, MF);
411
case TargetOpcode::G_ICMP:
412
return selectCmp(I, MRI, MF);
413
case TargetOpcode::G_FCMP:
414
return selectFCmp(I, MRI, MF);
415
case TargetOpcode::G_UADDE:
416
case TargetOpcode::G_UADDO:
417
case TargetOpcode::G_USUBE:
418
case TargetOpcode::G_USUBO:
419
return selectUAddSub(I, MRI, MF);
420
case TargetOpcode::G_UNMERGE_VALUES:
421
return selectUnmergeValues(I, MRI, MF);
422
case TargetOpcode::G_MERGE_VALUES:
423
case TargetOpcode::G_CONCAT_VECTORS:
424
return selectMergeValues(I, MRI, MF);
425
case TargetOpcode::G_EXTRACT:
426
return selectExtract(I, MRI, MF);
427
case TargetOpcode::G_INSERT:
428
return selectInsert(I, MRI, MF);
429
case TargetOpcode::G_BRCOND:
430
return selectCondBranch(I, MRI, MF);
431
case TargetOpcode::G_IMPLICIT_DEF:
432
case TargetOpcode::G_PHI:
433
return selectImplicitDefOrPHI(I, MRI);
434
case TargetOpcode::G_MUL:
435
case TargetOpcode::G_SMULH:
436
case TargetOpcode::G_UMULH:
437
case TargetOpcode::G_SDIV:
438
case TargetOpcode::G_UDIV:
439
case TargetOpcode::G_SREM:
440
case TargetOpcode::G_UREM:
441
return selectMulDivRem(I, MRI, MF);
442
case TargetOpcode::G_SELECT:
443
return selectSelect(I, MRI, MF);
444
}
445
446
return false;
447
}
448
449
unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
450
const RegisterBank &RB,
451
unsigned Opc,
452
Align Alignment) const {
453
bool Isload = (Opc == TargetOpcode::G_LOAD);
454
bool HasAVX = STI.hasAVX();
455
bool HasAVX512 = STI.hasAVX512();
456
bool HasVLX = STI.hasVLX();
457
458
if (Ty == LLT::scalar(8)) {
459
if (X86::GPRRegBankID == RB.getID())
460
return Isload ? X86::MOV8rm : X86::MOV8mr;
461
} else if (Ty == LLT::scalar(16)) {
462
if (X86::GPRRegBankID == RB.getID())
463
return Isload ? X86::MOV16rm : X86::MOV16mr;
464
} else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
465
if (X86::GPRRegBankID == RB.getID())
466
return Isload ? X86::MOV32rm : X86::MOV32mr;
467
if (X86::VECRRegBankID == RB.getID())
468
return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
469
HasAVX ? X86::VMOVSSrm_alt :
470
X86::MOVSSrm_alt)
471
: (HasAVX512 ? X86::VMOVSSZmr :
472
HasAVX ? X86::VMOVSSmr :
473
X86::MOVSSmr);
474
if (X86::PSRRegBankID == RB.getID())
475
return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
476
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
477
if (X86::GPRRegBankID == RB.getID())
478
return Isload ? X86::MOV64rm : X86::MOV64mr;
479
if (X86::VECRRegBankID == RB.getID())
480
return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
481
HasAVX ? X86::VMOVSDrm_alt :
482
X86::MOVSDrm_alt)
483
: (HasAVX512 ? X86::VMOVSDZmr :
484
HasAVX ? X86::VMOVSDmr :
485
X86::MOVSDmr);
486
if (X86::PSRRegBankID == RB.getID())
487
return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
488
} else if (Ty == LLT::scalar(80)) {
489
return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
490
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
491
if (Alignment >= Align(16))
492
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
493
: HasAVX512
494
? X86::VMOVAPSZ128rm_NOVLX
495
: HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
496
: (HasVLX ? X86::VMOVAPSZ128mr
497
: HasAVX512
498
? X86::VMOVAPSZ128mr_NOVLX
499
: HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
500
else
501
return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
502
: HasAVX512
503
? X86::VMOVUPSZ128rm_NOVLX
504
: HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
505
: (HasVLX ? X86::VMOVUPSZ128mr
506
: HasAVX512
507
? X86::VMOVUPSZ128mr_NOVLX
508
: HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
509
} else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
510
if (Alignment >= Align(32))
511
return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
512
: HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
513
: X86::VMOVAPSYrm)
514
: (HasVLX ? X86::VMOVAPSZ256mr
515
: HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
516
: X86::VMOVAPSYmr);
517
else
518
return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
519
: HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
520
: X86::VMOVUPSYrm)
521
: (HasVLX ? X86::VMOVUPSZ256mr
522
: HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
523
: X86::VMOVUPSYmr);
524
} else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
525
if (Alignment >= Align(64))
526
return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
527
else
528
return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
529
}
530
return Opc;
531
}
532
533
// Fill in an address from the given instruction.
534
static void X86SelectAddress(const MachineInstr &I,
535
const MachineRegisterInfo &MRI,
536
X86AddressMode &AM) {
537
assert(I.getOperand(0).isReg() && "unsupported opperand.");
538
assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
539
"unsupported type.");
540
541
if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
542
if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
543
int64_t Imm = *COff;
544
if (isInt<32>(Imm)) { // Check for displacement overflow.
545
AM.Disp = static_cast<int32_t>(Imm);
546
AM.Base.Reg = I.getOperand(1).getReg();
547
return;
548
}
549
}
550
} else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
551
AM.Base.FrameIndex = I.getOperand(1).getIndex();
552
AM.BaseType = X86AddressMode::FrameIndexBase;
553
return;
554
}
555
556
// Default behavior.
557
AM.Base.Reg = I.getOperand(0).getReg();
558
}
559
560
bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
561
MachineRegisterInfo &MRI,
562
MachineFunction &MF) const {
563
unsigned Opc = I.getOpcode();
564
565
assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
566
"Only G_STORE and G_LOAD are expected for selection");
567
568
const Register DefReg = I.getOperand(0).getReg();
569
LLT Ty = MRI.getType(DefReg);
570
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
571
572
assert(I.hasOneMemOperand());
573
auto &MemOp = **I.memoperands_begin();
574
if (MemOp.isAtomic()) {
575
// Note: for unordered operations, we rely on the fact the appropriate MMO
576
// is already on the instruction we're mutating, and thus we don't need to
577
// make any changes. So long as we select an opcode which is capable of
578
// loading or storing the appropriate size atomically, the rest of the
579
// backend is required to respect the MMO state.
580
if (!MemOp.isUnordered()) {
581
LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
582
return false;
583
}
584
if (MemOp.getAlign() < Ty.getSizeInBits() / 8) {
585
LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
586
return false;
587
}
588
}
589
590
unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
591
if (NewOpc == Opc)
592
return false;
593
594
I.setDesc(TII.get(NewOpc));
595
MachineInstrBuilder MIB(MF, I);
596
const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg());
597
598
if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) {
599
assert(Opc == TargetOpcode::G_LOAD &&
600
"Only G_LOAD from constant pool is expected");
601
// TODO: Need a separate move for Large model
602
if (TM.getCodeModel() == CodeModel::Large)
603
return false;
604
605
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
606
unsigned PICBase = 0;
607
if (OpFlag == X86II::MO_GOTOFF)
608
PICBase = TII.getGlobalBaseReg(&MF);
609
else if (STI.is64Bit())
610
PICBase = X86::RIP;
611
612
I.removeOperand(1);
613
addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase,
614
OpFlag);
615
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
616
}
617
618
X86AddressMode AM;
619
X86SelectAddress(*Ptr, MRI, AM);
620
if (Opc == TargetOpcode::G_LOAD) {
621
I.removeOperand(1);
622
addFullAddress(MIB, AM);
623
} else {
624
// G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
625
I.removeOperand(1);
626
I.removeOperand(0);
627
addFullAddress(MIB, AM).addUse(DefReg);
628
}
629
bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
630
I.addImplicitDefUseOperands(MF);
631
return Constrained;
632
}
633
634
static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
635
if (Ty == LLT::pointer(0, 64))
636
return X86::LEA64r;
637
else if (Ty == LLT::pointer(0, 32))
638
return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
639
else
640
llvm_unreachable("Can't get LEA opcode. Unsupported type.");
641
}
642
643
bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
644
MachineRegisterInfo &MRI,
645
MachineFunction &MF) const {
646
unsigned Opc = I.getOpcode();
647
648
assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
649
"unexpected instruction");
650
651
const Register DefReg = I.getOperand(0).getReg();
652
LLT Ty = MRI.getType(DefReg);
653
654
// Use LEA to calculate frame index and GEP
655
unsigned NewOpc = getLeaOP(Ty, STI);
656
I.setDesc(TII.get(NewOpc));
657
MachineInstrBuilder MIB(MF, I);
658
659
if (Opc == TargetOpcode::G_FRAME_INDEX) {
660
addOffset(MIB, 0);
661
} else {
662
MachineOperand &InxOp = I.getOperand(2);
663
I.addOperand(InxOp); // set IndexReg
664
InxOp.ChangeToImmediate(1); // set Scale
665
MIB.addImm(0).addReg(0);
666
}
667
668
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
669
}
670
671
bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
672
MachineRegisterInfo &MRI,
673
MachineFunction &MF) const {
674
assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
675
"unexpected instruction");
676
677
auto GV = I.getOperand(1).getGlobal();
678
if (GV->isThreadLocal()) {
679
return false; // TODO: we don't support TLS yet.
680
}
681
682
// Can't handle alternate code models yet.
683
if (TM.getCodeModel() != CodeModel::Small)
684
return false;
685
686
X86AddressMode AM;
687
AM.GV = GV;
688
AM.GVOpFlags = STI.classifyGlobalReference(GV);
689
690
// TODO: The ABI requires an extra load. not supported yet.
691
if (isGlobalStubReference(AM.GVOpFlags))
692
return false;
693
694
// TODO: This reference is relative to the pic base. not supported yet.
695
if (isGlobalRelativeToPICBase(AM.GVOpFlags))
696
return false;
697
698
if (STI.isPICStyleRIPRel()) {
699
// Use rip-relative addressing.
700
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
701
AM.Base.Reg = X86::RIP;
702
}
703
704
const Register DefReg = I.getOperand(0).getReg();
705
LLT Ty = MRI.getType(DefReg);
706
unsigned NewOpc = getLeaOP(Ty, STI);
707
708
I.setDesc(TII.get(NewOpc));
709
MachineInstrBuilder MIB(MF, I);
710
711
I.removeOperand(1);
712
addFullAddress(MIB, AM);
713
714
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
715
}
716
717
bool X86InstructionSelector::selectConstant(MachineInstr &I,
718
MachineRegisterInfo &MRI,
719
MachineFunction &MF) const {
720
assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
721
"unexpected instruction");
722
723
const Register DefReg = I.getOperand(0).getReg();
724
LLT Ty = MRI.getType(DefReg);
725
726
if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
727
return false;
728
729
uint64_t Val = 0;
730
if (I.getOperand(1).isCImm()) {
731
Val = I.getOperand(1).getCImm()->getZExtValue();
732
I.getOperand(1).ChangeToImmediate(Val);
733
} else if (I.getOperand(1).isImm()) {
734
Val = I.getOperand(1).getImm();
735
} else
736
llvm_unreachable("Unsupported operand type.");
737
738
unsigned NewOpc;
739
switch (Ty.getSizeInBits()) {
740
case 8:
741
NewOpc = X86::MOV8ri;
742
break;
743
case 16:
744
NewOpc = X86::MOV16ri;
745
break;
746
case 32:
747
NewOpc = X86::MOV32ri;
748
break;
749
case 64:
750
// TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
751
if (isInt<32>(Val))
752
NewOpc = X86::MOV64ri32;
753
else
754
NewOpc = X86::MOV64ri;
755
break;
756
default:
757
llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
758
}
759
760
I.setDesc(TII.get(NewOpc));
761
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
762
}
763
764
// Helper function for selectTruncOrPtrToInt and selectAnyext.
765
// Returns true if DstRC lives on a floating register class and
766
// SrcRC lives on a 128-bit vector class.
767
static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
768
const TargetRegisterClass *SrcRC) {
769
return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
770
DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
771
(SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
772
}
773
774
bool X86InstructionSelector::selectTurnIntoCOPY(
775
MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
776
const TargetRegisterClass *DstRC, const unsigned SrcReg,
777
const TargetRegisterClass *SrcRC) const {
778
779
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
780
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
781
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
782
<< " operand\n");
783
return false;
784
}
785
I.setDesc(TII.get(X86::COPY));
786
return true;
787
}
788
789
bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
790
MachineRegisterInfo &MRI,
791
MachineFunction &MF) const {
792
assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
793
I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
794
"unexpected instruction");
795
796
const Register DstReg = I.getOperand(0).getReg();
797
const Register SrcReg = I.getOperand(1).getReg();
798
799
const LLT DstTy = MRI.getType(DstReg);
800
const LLT SrcTy = MRI.getType(SrcReg);
801
802
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
803
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
804
805
if (DstRB.getID() != SrcRB.getID()) {
806
LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
807
<< " input/output on different banks\n");
808
return false;
809
}
810
811
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
812
const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
813
814
if (!DstRC || !SrcRC)
815
return false;
816
817
// If that's truncation of the value that lives on the vector class and goes
818
// into the floating class, just replace it with copy, as we are able to
819
// select it as a regular move.
820
if (canTurnIntoCOPY(DstRC, SrcRC))
821
return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
822
823
if (DstRB.getID() != X86::GPRRegBankID)
824
return false;
825
826
unsigned SubIdx;
827
if (DstRC == SrcRC) {
828
// Nothing to be done
829
SubIdx = X86::NoSubRegister;
830
} else if (DstRC == &X86::GR32RegClass) {
831
SubIdx = X86::sub_32bit;
832
} else if (DstRC == &X86::GR16RegClass) {
833
SubIdx = X86::sub_16bit;
834
} else if (DstRC == &X86::GR8RegClass) {
835
SubIdx = X86::sub_8bit;
836
} else {
837
return false;
838
}
839
840
SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
841
842
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
843
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
844
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
845
<< "\n");
846
return false;
847
}
848
849
I.getOperand(1).setSubReg(SubIdx);
850
851
I.setDesc(TII.get(X86::COPY));
852
return true;
853
}
854
855
bool X86InstructionSelector::selectZext(MachineInstr &I,
856
MachineRegisterInfo &MRI,
857
MachineFunction &MF) const {
858
assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
859
860
const Register DstReg = I.getOperand(0).getReg();
861
const Register SrcReg = I.getOperand(1).getReg();
862
863
const LLT DstTy = MRI.getType(DstReg);
864
const LLT SrcTy = MRI.getType(SrcReg);
865
866
assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) &&
867
"8=>16 Zext is handled by tablegen");
868
assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
869
"8=>32 Zext is handled by tablegen");
870
assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
871
"16=>32 Zext is handled by tablegen");
872
assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) &&
873
"8=>64 Zext is handled by tablegen");
874
assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) &&
875
"16=>64 Zext is handled by tablegen");
876
assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) &&
877
"32=>64 Zext is handled by tablegen");
878
879
if (SrcTy != LLT::scalar(1))
880
return false;
881
882
unsigned AndOpc;
883
if (DstTy == LLT::scalar(8))
884
AndOpc = X86::AND8ri;
885
else if (DstTy == LLT::scalar(16))
886
AndOpc = X86::AND16ri;
887
else if (DstTy == LLT::scalar(32))
888
AndOpc = X86::AND32ri;
889
else if (DstTy == LLT::scalar(64))
890
AndOpc = X86::AND64ri32;
891
else
892
return false;
893
894
Register DefReg = SrcReg;
895
if (DstTy != LLT::scalar(8)) {
896
Register ImpDefReg =
897
MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
898
BuildMI(*I.getParent(), I, I.getDebugLoc(),
899
TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg);
900
901
DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
902
BuildMI(*I.getParent(), I, I.getDebugLoc(),
903
TII.get(TargetOpcode::INSERT_SUBREG), DefReg)
904
.addReg(ImpDefReg)
905
.addReg(SrcReg)
906
.addImm(X86::sub_8bit);
907
}
908
909
MachineInstr &AndInst =
910
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
911
.addReg(DefReg)
912
.addImm(1);
913
914
constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
915
916
I.eraseFromParent();
917
return true;
918
}
919
920
bool X86InstructionSelector::selectAnyext(MachineInstr &I,
921
MachineRegisterInfo &MRI,
922
MachineFunction &MF) const {
923
assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
924
925
const Register DstReg = I.getOperand(0).getReg();
926
const Register SrcReg = I.getOperand(1).getReg();
927
928
const LLT DstTy = MRI.getType(DstReg);
929
const LLT SrcTy = MRI.getType(SrcReg);
930
931
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
932
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
933
934
assert(DstRB.getID() == SrcRB.getID() &&
935
"G_ANYEXT input/output on different banks\n");
936
937
assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
938
"G_ANYEXT incorrect operand size");
939
940
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
941
const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
942
943
// If that's ANY_EXT of the value that lives on the floating class and goes
944
// into the vector class, just replace it with copy, as we are able to select
945
// it as a regular move.
946
if (canTurnIntoCOPY(SrcRC, DstRC))
947
return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
948
949
if (DstRB.getID() != X86::GPRRegBankID)
950
return false;
951
952
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
953
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
954
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
955
<< " operand\n");
956
return false;
957
}
958
959
if (SrcRC == DstRC) {
960
I.setDesc(TII.get(X86::COPY));
961
return true;
962
}
963
964
BuildMI(*I.getParent(), I, I.getDebugLoc(),
965
TII.get(TargetOpcode::SUBREG_TO_REG))
966
.addDef(DstReg)
967
.addImm(0)
968
.addReg(SrcReg)
969
.addImm(getSubRegIndex(SrcRC));
970
971
I.eraseFromParent();
972
return true;
973
}
974
975
bool X86InstructionSelector::selectCmp(MachineInstr &I,
976
MachineRegisterInfo &MRI,
977
MachineFunction &MF) const {
978
assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
979
980
X86::CondCode CC;
981
bool SwapArgs;
982
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
983
(CmpInst::Predicate)I.getOperand(1).getPredicate());
984
985
Register LHS = I.getOperand(2).getReg();
986
Register RHS = I.getOperand(3).getReg();
987
988
if (SwapArgs)
989
std::swap(LHS, RHS);
990
991
unsigned OpCmp;
992
LLT Ty = MRI.getType(LHS);
993
994
switch (Ty.getSizeInBits()) {
995
default:
996
return false;
997
case 8:
998
OpCmp = X86::CMP8rr;
999
break;
1000
case 16:
1001
OpCmp = X86::CMP16rr;
1002
break;
1003
case 32:
1004
OpCmp = X86::CMP32rr;
1005
break;
1006
case 64:
1007
OpCmp = X86::CMP64rr;
1008
break;
1009
}
1010
1011
MachineInstr &CmpInst =
1012
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1013
.addReg(LHS)
1014
.addReg(RHS);
1015
1016
MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1017
TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
1018
1019
constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1020
constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
1021
1022
I.eraseFromParent();
1023
return true;
1024
}
1025
1026
bool X86InstructionSelector::selectFCmp(MachineInstr &I,
1027
MachineRegisterInfo &MRI,
1028
MachineFunction &MF) const {
1029
assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
1030
1031
Register LhsReg = I.getOperand(2).getReg();
1032
Register RhsReg = I.getOperand(3).getReg();
1033
CmpInst::Predicate Predicate =
1034
(CmpInst::Predicate)I.getOperand(1).getPredicate();
1035
1036
// FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1037
static const uint16_t SETFOpcTable[2][3] = {
1038
{X86::COND_E, X86::COND_NP, X86::AND8rr},
1039
{X86::COND_NE, X86::COND_P, X86::OR8rr}};
1040
const uint16_t *SETFOpc = nullptr;
1041
switch (Predicate) {
1042
default:
1043
break;
1044
case CmpInst::FCMP_OEQ:
1045
SETFOpc = &SETFOpcTable[0][0];
1046
break;
1047
case CmpInst::FCMP_UNE:
1048
SETFOpc = &SETFOpcTable[1][0];
1049
break;
1050
}
1051
1052
// Compute the opcode for the CMP instruction.
1053
unsigned OpCmp;
1054
LLT Ty = MRI.getType(LhsReg);
1055
switch (Ty.getSizeInBits()) {
1056
default:
1057
return false;
1058
case 32:
1059
OpCmp = X86::UCOMISSrr;
1060
break;
1061
case 64:
1062
OpCmp = X86::UCOMISDrr;
1063
break;
1064
}
1065
1066
Register ResultReg = I.getOperand(0).getReg();
1067
RBI.constrainGenericRegister(
1068
ResultReg,
1069
*getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
1070
if (SETFOpc) {
1071
MachineInstr &CmpInst =
1072
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1073
.addReg(LhsReg)
1074
.addReg(RhsReg);
1075
1076
Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
1077
Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
1078
MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1079
TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
1080
MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1081
TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
1082
MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1083
TII.get(SETFOpc[2]), ResultReg)
1084
.addReg(FlagReg1)
1085
.addReg(FlagReg2);
1086
constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1087
constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
1088
constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
1089
constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
1090
1091
I.eraseFromParent();
1092
return true;
1093
}
1094
1095
X86::CondCode CC;
1096
bool SwapArgs;
1097
std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1098
assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1099
1100
if (SwapArgs)
1101
std::swap(LhsReg, RhsReg);
1102
1103
// Emit a compare of LHS/RHS.
1104
MachineInstr &CmpInst =
1105
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1106
.addReg(LhsReg)
1107
.addReg(RhsReg);
1108
1109
MachineInstr &Set =
1110
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
1111
constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1112
constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
1113
I.eraseFromParent();
1114
return true;
1115
}
1116
1117
bool X86InstructionSelector::selectUAddSub(MachineInstr &I,
1118
MachineRegisterInfo &MRI,
1119
MachineFunction &MF) const {
1120
assert((I.getOpcode() == TargetOpcode::G_UADDE ||
1121
I.getOpcode() == TargetOpcode::G_UADDO ||
1122
I.getOpcode() == TargetOpcode::G_USUBE ||
1123
I.getOpcode() == TargetOpcode::G_USUBO) &&
1124
"unexpected instruction");
1125
1126
const Register DstReg = I.getOperand(0).getReg();
1127
const Register CarryOutReg = I.getOperand(1).getReg();
1128
const Register Op0Reg = I.getOperand(2).getReg();
1129
const Register Op1Reg = I.getOperand(3).getReg();
1130
bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE ||
1131
I.getOpcode() == TargetOpcode::G_USUBO;
1132
bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE ||
1133
I.getOpcode() == TargetOpcode::G_USUBE;
1134
1135
const LLT DstTy = MRI.getType(DstReg);
1136
assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types");
1137
1138
// TODO: Handle immediate argument variants?
1139
unsigned OpADC, OpADD, OpSBB, OpSUB;
1140
switch (DstTy.getSizeInBits()) {
1141
case 8:
1142
OpADC = X86::ADC8rr;
1143
OpADD = X86::ADD8rr;
1144
OpSBB = X86::SBB8rr;
1145
OpSUB = X86::SUB8rr;
1146
break;
1147
case 16:
1148
OpADC = X86::ADC16rr;
1149
OpADD = X86::ADD16rr;
1150
OpSBB = X86::SBB16rr;
1151
OpSUB = X86::SUB16rr;
1152
break;
1153
case 32:
1154
OpADC = X86::ADC32rr;
1155
OpADD = X86::ADD32rr;
1156
OpSBB = X86::SBB32rr;
1157
OpSUB = X86::SUB32rr;
1158
break;
1159
case 64:
1160
OpADC = X86::ADC64rr;
1161
OpADD = X86::ADD64rr;
1162
OpSBB = X86::SBB64rr;
1163
OpSUB = X86::SUB64rr;
1164
break;
1165
default:
1166
llvm_unreachable("selectUAddSub unsupported type.");
1167
}
1168
1169
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1170
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
1171
1172
unsigned Opcode = IsSub ? OpSUB : OpADD;
1173
1174
// G_UADDE/G_USUBE - find CarryIn def instruction.
1175
if (HasCarryIn) {
1176
Register CarryInReg = I.getOperand(4).getReg();
1177
MachineInstr *Def = MRI.getVRegDef(CarryInReg);
1178
while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
1179
CarryInReg = Def->getOperand(1).getReg();
1180
Def = MRI.getVRegDef(CarryInReg);
1181
}
1182
1183
// TODO - handle more CF generating instructions
1184
if (Def->getOpcode() == TargetOpcode::G_UADDE ||
1185
Def->getOpcode() == TargetOpcode::G_UADDO ||
1186
Def->getOpcode() == TargetOpcode::G_USUBE ||
1187
Def->getOpcode() == TargetOpcode::G_USUBO) {
1188
// carry set by prev ADD/SUB.
1189
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY),
1190
X86::EFLAGS)
1191
.addReg(CarryInReg);
1192
1193
if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI))
1194
return false;
1195
1196
Opcode = IsSub ? OpSBB : OpADC;
1197
} else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
1198
// carry is constant, support only 0.
1199
if (*val != 0)
1200
return false;
1201
1202
Opcode = IsSub ? OpSUB : OpADD;
1203
} else
1204
return false;
1205
}
1206
1207
MachineInstr &Inst =
1208
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
1209
.addReg(Op0Reg)
1210
.addReg(Op1Reg);
1211
1212
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
1213
.addReg(X86::EFLAGS);
1214
1215
if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) ||
1216
!RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI))
1217
return false;
1218
1219
I.eraseFromParent();
1220
return true;
1221
}
1222
1223
bool X86InstructionSelector::selectExtract(MachineInstr &I,
1224
MachineRegisterInfo &MRI,
1225
MachineFunction &MF) const {
1226
assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
1227
"unexpected instruction");
1228
1229
const Register DstReg = I.getOperand(0).getReg();
1230
const Register SrcReg = I.getOperand(1).getReg();
1231
int64_t Index = I.getOperand(2).getImm();
1232
1233
const LLT DstTy = MRI.getType(DstReg);
1234
const LLT SrcTy = MRI.getType(SrcReg);
1235
1236
// Meanwile handle vector type only.
1237
if (!DstTy.isVector())
1238
return false;
1239
1240
if (Index % DstTy.getSizeInBits() != 0)
1241
return false; // Not extract subvector.
1242
1243
if (Index == 0) {
1244
// Replace by extract subreg copy.
1245
if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
1246
return false;
1247
1248
I.eraseFromParent();
1249
return true;
1250
}
1251
1252
bool HasAVX = STI.hasAVX();
1253
bool HasAVX512 = STI.hasAVX512();
1254
bool HasVLX = STI.hasVLX();
1255
1256
if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
1257
if (HasVLX)
1258
I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
1259
else if (HasAVX)
1260
I.setDesc(TII.get(X86::VEXTRACTF128rr));
1261
else
1262
return false;
1263
} else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
1264
if (DstTy.getSizeInBits() == 128)
1265
I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
1266
else if (DstTy.getSizeInBits() == 256)
1267
I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
1268
else
1269
return false;
1270
} else
1271
return false;
1272
1273
// Convert to X86 VEXTRACT immediate.
1274
Index = Index / DstTy.getSizeInBits();
1275
I.getOperand(2).setImm(Index);
1276
1277
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1278
}
1279
1280
bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
1281
MachineInstr &I,
1282
MachineRegisterInfo &MRI,
1283
MachineFunction &MF) const {
1284
const LLT DstTy = MRI.getType(DstReg);
1285
const LLT SrcTy = MRI.getType(SrcReg);
1286
unsigned SubIdx = X86::NoSubRegister;
1287
1288
if (!DstTy.isVector() || !SrcTy.isVector())
1289
return false;
1290
1291
assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
1292
"Incorrect Src/Dst register size");
1293
1294
if (DstTy.getSizeInBits() == 128)
1295
SubIdx = X86::sub_xmm;
1296
else if (DstTy.getSizeInBits() == 256)
1297
SubIdx = X86::sub_ymm;
1298
else
1299
return false;
1300
1301
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1302
const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1303
1304
SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
1305
1306
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1307
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1308
LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
1309
return false;
1310
}
1311
1312
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
1313
.addReg(SrcReg, 0, SubIdx);
1314
1315
return true;
1316
}
1317
1318
bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
1319
MachineInstr &I,
1320
MachineRegisterInfo &MRI,
1321
MachineFunction &MF) const {
1322
const LLT DstTy = MRI.getType(DstReg);
1323
const LLT SrcTy = MRI.getType(SrcReg);
1324
unsigned SubIdx = X86::NoSubRegister;
1325
1326
// TODO: support scalar types
1327
if (!DstTy.isVector() || !SrcTy.isVector())
1328
return false;
1329
1330
assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
1331
"Incorrect Src/Dst register size");
1332
1333
if (SrcTy.getSizeInBits() == 128)
1334
SubIdx = X86::sub_xmm;
1335
else if (SrcTy.getSizeInBits() == 256)
1336
SubIdx = X86::sub_ymm;
1337
else
1338
return false;
1339
1340
const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1341
const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1342
1343
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1344
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1345
LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
1346
return false;
1347
}
1348
1349
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
1350
.addReg(DstReg, RegState::DefineNoRead, SubIdx)
1351
.addReg(SrcReg);
1352
1353
return true;
1354
}
1355
1356
bool X86InstructionSelector::selectInsert(MachineInstr &I,
1357
MachineRegisterInfo &MRI,
1358
MachineFunction &MF) const {
1359
assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
1360
1361
const Register DstReg = I.getOperand(0).getReg();
1362
const Register SrcReg = I.getOperand(1).getReg();
1363
const Register InsertReg = I.getOperand(2).getReg();
1364
int64_t Index = I.getOperand(3).getImm();
1365
1366
const LLT DstTy = MRI.getType(DstReg);
1367
const LLT InsertRegTy = MRI.getType(InsertReg);
1368
1369
// Meanwile handle vector type only.
1370
if (!DstTy.isVector())
1371
return false;
1372
1373
if (Index % InsertRegTy.getSizeInBits() != 0)
1374
return false; // Not insert subvector.
1375
1376
if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
1377
// Replace by subreg copy.
1378
if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
1379
return false;
1380
1381
I.eraseFromParent();
1382
return true;
1383
}
1384
1385
bool HasAVX = STI.hasAVX();
1386
bool HasAVX512 = STI.hasAVX512();
1387
bool HasVLX = STI.hasVLX();
1388
1389
if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
1390
if (HasVLX)
1391
I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
1392
else if (HasAVX)
1393
I.setDesc(TII.get(X86::VINSERTF128rr));
1394
else
1395
return false;
1396
} else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
1397
if (InsertRegTy.getSizeInBits() == 128)
1398
I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
1399
else if (InsertRegTy.getSizeInBits() == 256)
1400
I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
1401
else
1402
return false;
1403
} else
1404
return false;
1405
1406
// Convert to X86 VINSERT immediate.
1407
Index = Index / InsertRegTy.getSizeInBits();
1408
1409
I.getOperand(3).setImm(Index);
1410
1411
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1412
}
1413
1414
bool X86InstructionSelector::selectUnmergeValues(
1415
MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1416
assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
1417
"unexpected instruction");
1418
1419
// Split to extracts.
1420
unsigned NumDefs = I.getNumOperands() - 1;
1421
Register SrcReg = I.getOperand(NumDefs).getReg();
1422
unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
1423
1424
for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1425
MachineInstr &ExtrInst =
1426
*BuildMI(*I.getParent(), I, I.getDebugLoc(),
1427
TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
1428
.addReg(SrcReg)
1429
.addImm(Idx * DefSize);
1430
1431
if (!select(ExtrInst))
1432
return false;
1433
}
1434
1435
I.eraseFromParent();
1436
return true;
1437
}
1438
1439
bool X86InstructionSelector::selectMergeValues(
1440
MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1441
assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
1442
I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
1443
"unexpected instruction");
1444
1445
// Split to inserts.
1446
Register DstReg = I.getOperand(0).getReg();
1447
Register SrcReg0 = I.getOperand(1).getReg();
1448
1449
const LLT DstTy = MRI.getType(DstReg);
1450
const LLT SrcTy = MRI.getType(SrcReg0);
1451
unsigned SrcSize = SrcTy.getSizeInBits();
1452
1453
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1454
1455
// For the first src use insertSubReg.
1456
Register DefReg = MRI.createGenericVirtualRegister(DstTy);
1457
MRI.setRegBank(DefReg, RegBank);
1458
if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
1459
return false;
1460
1461
for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
1462
Register Tmp = MRI.createGenericVirtualRegister(DstTy);
1463
MRI.setRegBank(Tmp, RegBank);
1464
1465
MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1466
TII.get(TargetOpcode::G_INSERT), Tmp)
1467
.addReg(DefReg)
1468
.addReg(I.getOperand(Idx).getReg())
1469
.addImm((Idx - 1) * SrcSize);
1470
1471
DefReg = Tmp;
1472
1473
if (!select(InsertInst))
1474
return false;
1475
}
1476
1477
MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1478
TII.get(TargetOpcode::COPY), DstReg)
1479
.addReg(DefReg);
1480
1481
if (!select(CopyInst))
1482
return false;
1483
1484
I.eraseFromParent();
1485
return true;
1486
}
1487
1488
bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
1489
MachineRegisterInfo &MRI,
1490
MachineFunction &MF) const {
1491
assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
1492
1493
const Register CondReg = I.getOperand(0).getReg();
1494
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1495
1496
MachineInstr &TestInst =
1497
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
1498
.addReg(CondReg)
1499
.addImm(1);
1500
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
1501
.addMBB(DestMBB).addImm(X86::COND_NE);
1502
1503
constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
1504
1505
I.eraseFromParent();
1506
return true;
1507
}
1508
1509
bool X86InstructionSelector::materializeFP(MachineInstr &I,
1510
MachineRegisterInfo &MRI,
1511
MachineFunction &MF) const {
1512
assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
1513
"unexpected instruction");
1514
1515
// Can't handle alternate code models yet.
1516
CodeModel::Model CM = TM.getCodeModel();
1517
if (CM != CodeModel::Small && CM != CodeModel::Large)
1518
return false;
1519
1520
const Register DstReg = I.getOperand(0).getReg();
1521
const LLT DstTy = MRI.getType(DstReg);
1522
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1523
// Create the load from the constant pool.
1524
const ConstantFP *CFP = I.getOperand(1).getFPImm();
1525
const auto &DL = MF.getDataLayout();
1526
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1527
const DebugLoc &DbgLoc = I.getDebugLoc();
1528
1529
unsigned Opc =
1530
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
1531
1532
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
1533
MachineInstr *LoadInst = nullptr;
1534
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
1535
1536
if (CM == CodeModel::Large && STI.is64Bit()) {
1537
// Under X86-64 non-small code model, GV (and friends) are 64-bits, so
1538
// they cannot be folded into immediate fields.
1539
1540
Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
1541
BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
1542
.addConstantPoolIndex(CPI, 0, OpFlag);
1543
1544
MachineMemOperand *MMO = MF.getMachineMemOperand(
1545
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
1546
LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);
1547
1548
LoadInst =
1549
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
1550
AddrReg)
1551
.addMemOperand(MMO);
1552
1553
} else if (CM == CodeModel::Small || !STI.is64Bit()) {
1554
// Handle the case when globals fit in our immediate field.
1555
// This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
1556
1557
// x86-32 PIC requires a PIC base register for constant pools.
1558
unsigned PICBase = 0;
1559
if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
1560
// PICBase can be allocated by TII.getGlobalBaseReg(&MF).
1561
// In DAGISEL the code that initialize it generated by the CGBR pass.
1562
return false; // TODO support the mode.
1563
} else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
1564
PICBase = X86::RIP;
1565
1566
LoadInst = addConstantPoolReference(
1567
BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
1568
OpFlag);
1569
} else
1570
return false;
1571
1572
constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
1573
I.eraseFromParent();
1574
return true;
1575
}
1576
1577
bool X86InstructionSelector::selectImplicitDefOrPHI(
1578
MachineInstr &I, MachineRegisterInfo &MRI) const {
1579
assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
1580
I.getOpcode() == TargetOpcode::G_PHI) &&
1581
"unexpected instruction");
1582
1583
Register DstReg = I.getOperand(0).getReg();
1584
1585
if (!MRI.getRegClassOrNull(DstReg)) {
1586
const LLT DstTy = MRI.getType(DstReg);
1587
const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
1588
1589
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
1590
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1591
<< " operand\n");
1592
return false;
1593
}
1594
}
1595
1596
if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1597
I.setDesc(TII.get(X86::IMPLICIT_DEF));
1598
else
1599
I.setDesc(TII.get(X86::PHI));
1600
1601
return true;
1602
}
1603
1604
bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
1605
MachineRegisterInfo &MRI,
1606
MachineFunction &MF) const {
1607
// The implementation of this function is adapted from X86FastISel.
1608
assert((I.getOpcode() == TargetOpcode::G_MUL ||
1609
I.getOpcode() == TargetOpcode::G_SMULH ||
1610
I.getOpcode() == TargetOpcode::G_UMULH ||
1611
I.getOpcode() == TargetOpcode::G_SDIV ||
1612
I.getOpcode() == TargetOpcode::G_SREM ||
1613
I.getOpcode() == TargetOpcode::G_UDIV ||
1614
I.getOpcode() == TargetOpcode::G_UREM) &&
1615
"unexpected instruction");
1616
1617
const Register DstReg = I.getOperand(0).getReg();
1618
const Register Op1Reg = I.getOperand(1).getReg();
1619
const Register Op2Reg = I.getOperand(2).getReg();
1620
1621
const LLT RegTy = MRI.getType(DstReg);
1622
assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
1623
"Arguments and return value types must match");
1624
1625
const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
1626
if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
1627
return false;
1628
1629
const static unsigned NumTypes = 4; // i8, i16, i32, i64
1630
const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh
1631
const static bool S = true; // IsSigned
1632
const static bool U = false; // !IsSigned
1633
const static unsigned Copy = TargetOpcode::COPY;
1634
1635
// For the X86 IDIV instruction, in most cases the dividend
1636
// (numerator) must be in a specific register pair highreg:lowreg,
1637
// producing the quotient in lowreg and the remainder in highreg.
1638
// For most data types, to set up the instruction, the dividend is
1639
// copied into lowreg, and lowreg is sign-extended into highreg. The
1640
// exception is i8, where the dividend is defined as a single register rather
1641
// than a register pair, and we therefore directly sign-extend the dividend
1642
// into lowreg, instead of copying, and ignore the highreg.
1643
const static struct MulDivRemEntry {
1644
// The following portion depends only on the data type.
1645
unsigned SizeInBits;
1646
unsigned LowInReg; // low part of the register pair
1647
unsigned HighInReg; // high part of the register pair
1648
// The following portion depends on both the data type and the operation.
1649
struct MulDivRemResult {
1650
unsigned OpMulDivRem; // The specific MUL/DIV opcode to use.
1651
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
1652
// highreg, or copying a zero into highreg.
1653
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
1654
// zero/sign-extending into lowreg for i8.
1655
unsigned ResultReg; // Register containing the desired result.
1656
bool IsOpSigned; // Whether to use signed or unsigned form.
1657
} ResultTable[NumOps];
1658
} OpTable[NumTypes] = {
1659
{8,
1660
X86::AX,
1661
0,
1662
{
1663
{X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
1664
{X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
1665
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
1666
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
1667
{X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul
1668
{X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH
1669
{X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH
1670
}}, // i8
1671
{16,
1672
X86::AX,
1673
X86::DX,
1674
{
1675
{X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
1676
{X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
1677
{X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
1678
{X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
1679
{X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul
1680
{X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH
1681
{X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH
1682
}}, // i16
1683
{32,
1684
X86::EAX,
1685
X86::EDX,
1686
{
1687
{X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
1688
{X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
1689
{X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
1690
{X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
1691
{X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul
1692
{X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH
1693
{X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH
1694
}}, // i32
1695
{64,
1696
X86::RAX,
1697
X86::RDX,
1698
{
1699
{X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
1700
{X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
1701
{X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
1702
{X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
1703
{X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul
1704
{X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH
1705
{X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH
1706
}}, // i64
1707
};
1708
1709
auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) {
1710
return El.SizeInBits == RegTy.getSizeInBits();
1711
});
1712
if (OpEntryIt == std::end(OpTable))
1713
return false;
1714
1715
unsigned OpIndex;
1716
switch (I.getOpcode()) {
1717
default:
1718
llvm_unreachable("Unexpected mul/div/rem opcode");
1719
case TargetOpcode::G_SDIV:
1720
OpIndex = 0;
1721
break;
1722
case TargetOpcode::G_SREM:
1723
OpIndex = 1;
1724
break;
1725
case TargetOpcode::G_UDIV:
1726
OpIndex = 2;
1727
break;
1728
case TargetOpcode::G_UREM:
1729
OpIndex = 3;
1730
break;
1731
case TargetOpcode::G_MUL:
1732
OpIndex = 4;
1733
break;
1734
case TargetOpcode::G_SMULH:
1735
OpIndex = 5;
1736
break;
1737
case TargetOpcode::G_UMULH:
1738
OpIndex = 6;
1739
break;
1740
}
1741
1742
const MulDivRemEntry &TypeEntry = *OpEntryIt;
1743
const MulDivRemEntry::MulDivRemResult &OpEntry =
1744
TypeEntry.ResultTable[OpIndex];
1745
1746
const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
1747
if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
1748
!RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
1749
!RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
1750
LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1751
<< " operand\n");
1752
return false;
1753
}
1754
1755
// Move op1 into low-order input register.
1756
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
1757
TypeEntry.LowInReg)
1758
.addReg(Op1Reg);
1759
1760
// Zero-extend or sign-extend into high-order input register.
1761
if (OpEntry.OpSignExtend) {
1762
if (OpEntry.IsOpSigned)
1763
BuildMI(*I.getParent(), I, I.getDebugLoc(),
1764
TII.get(OpEntry.OpSignExtend));
1765
else {
1766
Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
1767
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
1768
Zero32);
1769
1770
// Copy the zero into the appropriate sub/super/identical physical
1771
// register. Unfortunately the operations needed are not uniform enough
1772
// to fit neatly into the table above.
1773
if (RegTy.getSizeInBits() == 16) {
1774
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1775
TypeEntry.HighInReg)
1776
.addReg(Zero32, 0, X86::sub_16bit);
1777
} else if (RegTy.getSizeInBits() == 32) {
1778
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1779
TypeEntry.HighInReg)
1780
.addReg(Zero32);
1781
} else if (RegTy.getSizeInBits() == 64) {
1782
BuildMI(*I.getParent(), I, I.getDebugLoc(),
1783
TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1784
.addImm(0)
1785
.addReg(Zero32)
1786
.addImm(X86::sub_32bit);
1787
}
1788
}
1789
}
1790
1791
// Generate the DIV/IDIV/MUL/IMUL instruction.
1792
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem))
1793
.addReg(Op2Reg);
1794
1795
// For i8 remainder, we can't reference ah directly, as we'll end
1796
// up with bogus copies like %r9b = COPY %ah. Reference ax
1797
// instead to prevent ah references in a rex instruction.
1798
//
1799
// The current assumption of the fast register allocator is that isel
1800
// won't generate explicit references to the GR8_NOREX registers. If
1801
// the allocator and/or the backend get enhanced to be more robust in
1802
// that regard, this can be, and should be, removed.
1803
if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) {
1804
Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1805
Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1806
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
1807
.addReg(X86::AX);
1808
1809
// Shift AX right by 8 bits instead of using AH.
1810
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
1811
ResultSuperReg)
1812
.addReg(SourceSuperReg)
1813
.addImm(8);
1814
1815
// Now reference the 8-bit subreg of the result.
1816
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1817
DstReg)
1818
.addReg(ResultSuperReg, 0, X86::sub_8bit);
1819
} else {
1820
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1821
DstReg)
1822
.addReg(OpEntry.ResultReg);
1823
}
1824
I.eraseFromParent();
1825
1826
return true;
1827
}
1828
1829
bool X86InstructionSelector::selectSelect(MachineInstr &I,
1830
MachineRegisterInfo &MRI,
1831
MachineFunction &MF) const {
1832
GSelect &Sel = cast<GSelect>(I);
1833
unsigned DstReg = Sel.getReg(0);
1834
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr))
1835
.addReg(Sel.getCondReg())
1836
.addReg(Sel.getCondReg());
1837
1838
unsigned OpCmp;
1839
LLT Ty = MRI.getType(DstReg);
1840
switch (Ty.getSizeInBits()) {
1841
default:
1842
return false;
1843
case 8:
1844
OpCmp = X86::CMOV_GR8;
1845
break;
1846
case 16:
1847
OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1848
break;
1849
case 32:
1850
OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1851
break;
1852
case 64:
1853
assert(STI.is64Bit() && STI.canUseCMOV());
1854
OpCmp = X86::CMOV64rr;
1855
break;
1856
}
1857
BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1858
.addReg(Sel.getTrueReg())
1859
.addReg(Sel.getFalseReg())
1860
.addImm(X86::COND_E);
1861
1862
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
1863
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1864
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
1865
return false;
1866
}
1867
1868
Sel.eraseFromParent();
1869
return true;
1870
}
1871
1872
InstructionSelector *
1873
llvm::createX86InstructionSelector(const X86TargetMachine &TM,
1874
const X86Subtarget &Subtarget,
1875
const X86RegisterBankInfo &RBI) {
1876
return new X86InstructionSelector(TM, Subtarget, RBI);
1877
}
1878
1879