Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
35294 views
1
//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "MCTargetDesc/X86BaseInfo.h"
10
#include "MCTargetDesc/X86EncodingOptimization.h"
11
#include "MCTargetDesc/X86FixupKinds.h"
12
#include "llvm/ADT/StringSwitch.h"
13
#include "llvm/BinaryFormat/ELF.h"
14
#include "llvm/BinaryFormat/MachO.h"
15
#include "llvm/MC/MCAsmBackend.h"
16
#include "llvm/MC/MCAssembler.h"
17
#include "llvm/MC/MCCodeEmitter.h"
18
#include "llvm/MC/MCContext.h"
19
#include "llvm/MC/MCDwarf.h"
20
#include "llvm/MC/MCELFObjectWriter.h"
21
#include "llvm/MC/MCELFStreamer.h"
22
#include "llvm/MC/MCExpr.h"
23
#include "llvm/MC/MCFixupKindInfo.h"
24
#include "llvm/MC/MCInst.h"
25
#include "llvm/MC/MCInstrInfo.h"
26
#include "llvm/MC/MCMachObjectWriter.h"
27
#include "llvm/MC/MCObjectStreamer.h"
28
#include "llvm/MC/MCObjectWriter.h"
29
#include "llvm/MC/MCRegisterInfo.h"
30
#include "llvm/MC/MCSectionMachO.h"
31
#include "llvm/MC/MCSubtargetInfo.h"
32
#include "llvm/MC/MCValue.h"
33
#include "llvm/MC/TargetRegistry.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/raw_ostream.h"
37
38
using namespace llvm;
39
40
namespace {
41
/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42
class X86AlignBranchKind {
43
private:
44
uint8_t AlignBranchKind = 0;
45
46
public:
47
void operator=(const std::string &Val) {
48
if (Val.empty())
49
return;
50
SmallVector<StringRef, 6> BranchTypes;
51
StringRef(Val).split(BranchTypes, '+', -1, false);
52
for (auto BranchType : BranchTypes) {
53
if (BranchType == "fused")
54
addKind(X86::AlignBranchFused);
55
else if (BranchType == "jcc")
56
addKind(X86::AlignBranchJcc);
57
else if (BranchType == "jmp")
58
addKind(X86::AlignBranchJmp);
59
else if (BranchType == "call")
60
addKind(X86::AlignBranchCall);
61
else if (BranchType == "ret")
62
addKind(X86::AlignBranchRet);
63
else if (BranchType == "indirect")
64
addKind(X86::AlignBranchIndirect);
65
else {
66
errs() << "invalid argument " << BranchType.str()
67
<< " to -x86-align-branch=; each element must be one of: fused, "
68
"jcc, jmp, call, ret, indirect.(plus separated)\n";
69
}
70
}
71
}
72
73
operator uint8_t() const { return AlignBranchKind; }
74
void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75
};
76
77
X86AlignBranchKind X86AlignBranchKindLoc;
78
79
cl::opt<unsigned> X86AlignBranchBoundary(
80
"x86-align-branch-boundary", cl::init(0),
81
cl::desc(
82
"Control how the assembler should align branches with NOP. If the "
83
"boundary's size is not 0, it should be a power of 2 and no less "
84
"than 32. Branches will be aligned to prevent from being across or "
85
"against the boundary of specified size. The default value 0 does not "
86
"align branches."));
87
88
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89
"x86-align-branch",
90
cl::desc(
91
"Specify types of branches to align (plus separated list of types):"
92
"\njcc indicates conditional jumps"
93
"\nfused indicates fused conditional jumps"
94
"\njmp indicates direct unconditional jumps"
95
"\ncall indicates direct and indirect calls"
96
"\nret indicates rets"
97
"\nindirect indicates indirect unconditional jumps"),
98
cl::location(X86AlignBranchKindLoc));
99
100
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101
"x86-branches-within-32B-boundaries", cl::init(false),
102
cl::desc(
103
"Align selected instructions to mitigate negative performance impact "
104
"of Intel's micro code update for errata skx102. May break "
105
"assumptions about labels corresponding to particular instructions, "
106
"and should be used with caution."));
107
108
cl::opt<unsigned> X86PadMaxPrefixSize(
109
"x86-pad-max-prefix-size", cl::init(0),
110
cl::desc("Maximum number of prefixes to use for padding"));
111
112
cl::opt<bool> X86PadForAlign(
113
"x86-pad-for-align", cl::init(false), cl::Hidden,
114
cl::desc("Pad previous instructions to implement align directives"));
115
116
cl::opt<bool> X86PadForBranchAlign(
117
"x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118
cl::desc("Pad previous instructions to implement branch alignment"));
119
120
class X86AsmBackend : public MCAsmBackend {
121
const MCSubtargetInfo &STI;
122
std::unique_ptr<const MCInstrInfo> MCII;
123
X86AlignBranchKind AlignBranchType;
124
Align AlignBoundary;
125
unsigned TargetPrefixMax = 0;
126
127
MCInst PrevInst;
128
unsigned PrevInstOpcode = 0;
129
MCBoundaryAlignFragment *PendingBA = nullptr;
130
std::pair<MCFragment *, size_t> PrevInstPosition;
131
bool IsRightAfterData = false;
132
133
uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134
bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135
bool needAlign(const MCInst &Inst) const;
136
bool canPadBranches(MCObjectStreamer &OS) const;
137
bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138
139
public:
140
X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141
: MCAsmBackend(llvm::endianness::little), STI(STI),
142
MCII(T.createMCInstrInfo()) {
143
if (X86AlignBranchWithin32BBoundaries) {
144
// At the moment, this defaults to aligning fused branches, unconditional
145
// jumps, and (unfused) conditional jumps with nops. Both the
146
// instructions aligned and the alignment method (nop vs prefix) may
147
// change in the future.
148
AlignBoundary = assumeAligned(32);
149
AlignBranchType.addKind(X86::AlignBranchFused);
150
AlignBranchType.addKind(X86::AlignBranchJcc);
151
AlignBranchType.addKind(X86::AlignBranchJmp);
152
}
153
// Allow overriding defaults set by main flag
154
if (X86AlignBranchBoundary.getNumOccurrences())
155
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
156
if (X86AlignBranch.getNumOccurrences())
157
AlignBranchType = X86AlignBranchKindLoc;
158
if (X86PadMaxPrefixSize.getNumOccurrences())
159
TargetPrefixMax = X86PadMaxPrefixSize;
160
}
161
162
bool allowAutoPadding() const override;
163
bool allowEnhancedRelaxation() const override;
164
void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165
const MCSubtargetInfo &STI);
166
void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167
168
unsigned getNumFixupKinds() const override {
169
return X86::NumTargetFixupKinds;
170
}
171
172
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173
174
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175
176
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177
const MCValue &Target,
178
const MCSubtargetInfo *STI) override;
179
180
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181
const MCValue &Target, MutableArrayRef<char> Data,
182
uint64_t Value, bool IsResolved,
183
const MCSubtargetInfo *STI) const override;
184
185
bool mayNeedRelaxation(const MCInst &Inst,
186
const MCSubtargetInfo &STI) const override;
187
188
bool fixupNeedsRelaxation(const MCFixup &Fixup,
189
uint64_t Value) const override;
190
191
void relaxInstruction(MCInst &Inst,
192
const MCSubtargetInfo &STI) const override;
193
194
bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195
MCCodeEmitter &Emitter,
196
unsigned &RemainingSize) const;
197
198
bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199
unsigned &RemainingSize) const;
200
201
bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202
unsigned &RemainingSize) const;
203
204
void finishLayout(const MCAssembler &Asm) const override;
205
206
unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207
208
bool writeNopData(raw_ostream &OS, uint64_t Count,
209
const MCSubtargetInfo *STI) const override;
210
};
211
} // end anonymous namespace
212
213
static bool isRelaxableBranch(unsigned Opcode) {
214
return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215
}
216
217
static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218
bool Is16BitMode = false) {
219
switch (Opcode) {
220
default:
221
llvm_unreachable("invalid opcode for branch");
222
case X86::JCC_1:
223
return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224
case X86::JMP_1:
225
return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226
}
227
}
228
229
static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230
unsigned Opcode = MI.getOpcode();
231
return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232
: X86::getOpcodeForLongImmediateForm(Opcode);
233
}
234
235
static X86::CondCode getCondFromBranch(const MCInst &MI,
236
const MCInstrInfo &MCII) {
237
unsigned Opcode = MI.getOpcode();
238
switch (Opcode) {
239
default:
240
return X86::COND_INVALID;
241
case X86::JCC_1: {
242
const MCInstrDesc &Desc = MCII.get(Opcode);
243
return static_cast<X86::CondCode>(
244
MI.getOperand(Desc.getNumOperands() - 1).getImm());
245
}
246
}
247
}
248
249
static X86::SecondMacroFusionInstKind
250
classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251
X86::CondCode CC = getCondFromBranch(MI, MCII);
252
return classifySecondCondCodeInMacroFusion(CC);
253
}
254
255
/// Check if the instruction uses RIP relative addressing.
256
static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257
unsigned Opcode = MI.getOpcode();
258
const MCInstrDesc &Desc = MCII.get(Opcode);
259
uint64_t TSFlags = Desc.TSFlags;
260
unsigned CurOp = X86II::getOperandBias(Desc);
261
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262
if (MemoryOperand < 0)
263
return false;
264
unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265
unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266
return (BaseReg == X86::RIP);
267
}
268
269
/// Check if the instruction is a prefix.
270
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271
return X86II::isPrefix(MCII.get(Opcode).TSFlags);
272
}
273
274
/// Check if the instruction is valid as the first instruction in macro fusion.
275
static bool isFirstMacroFusibleInst(const MCInst &Inst,
276
const MCInstrInfo &MCII) {
277
// An Intel instruction with RIP relative addressing is not macro fusible.
278
if (isRIPRelative(Inst, MCII))
279
return false;
280
X86::FirstMacroFusionInstKind FIK =
281
X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282
return FIK != X86::FirstMacroFusionInstKind::Invalid;
283
}
284
285
/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286
/// get a better peformance in some cases. Here, we determine which prefix is
287
/// the most suitable.
288
///
289
/// If the instruction has a segment override prefix, use the existing one.
290
/// If the target is 64-bit, use the CS.
291
/// If the target is 32-bit,
292
/// - If the instruction has a ESP/EBP base register, use SS.
293
/// - Otherwise use DS.
294
uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295
assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296
"Prefixes can be added only in 32-bit or 64-bit mode.");
297
const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298
uint64_t TSFlags = Desc.TSFlags;
299
300
// Determine where the memory operand starts, if present.
301
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302
if (MemoryOperand != -1)
303
MemoryOperand += X86II::getOperandBias(Desc);
304
305
unsigned SegmentReg = 0;
306
if (MemoryOperand >= 0) {
307
// Check for explicit segment override on memory operand.
308
SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309
}
310
311
switch (TSFlags & X86II::FormMask) {
312
default:
313
break;
314
case X86II::RawFrmDstSrc: {
315
// Check segment override opcode prefix as needed (not for %ds).
316
if (Inst.getOperand(2).getReg() != X86::DS)
317
SegmentReg = Inst.getOperand(2).getReg();
318
break;
319
}
320
case X86II::RawFrmSrc: {
321
// Check segment override opcode prefix as needed (not for %ds).
322
if (Inst.getOperand(1).getReg() != X86::DS)
323
SegmentReg = Inst.getOperand(1).getReg();
324
break;
325
}
326
case X86II::RawFrmMemOffs: {
327
// Check segment override opcode prefix as needed.
328
SegmentReg = Inst.getOperand(1).getReg();
329
break;
330
}
331
}
332
333
if (SegmentReg != 0)
334
return X86::getSegmentOverridePrefixForReg(SegmentReg);
335
336
if (STI.hasFeature(X86::Is64Bit))
337
return X86::CS_Encoding;
338
339
if (MemoryOperand >= 0) {
340
unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341
unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342
if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343
return X86::SS_Encoding;
344
}
345
return X86::DS_Encoding;
346
}
347
348
/// Check if the two instructions will be macro-fused on the target cpu.
349
bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350
const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351
if (!InstDesc.isConditionalBranch())
352
return false;
353
if (!isFirstMacroFusibleInst(Cmp, *MCII))
354
return false;
355
const X86::FirstMacroFusionInstKind CmpKind =
356
X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357
const X86::SecondMacroFusionInstKind BranchKind =
358
classifySecondInstInMacroFusion(Jcc, *MCII);
359
return X86::isMacroFused(CmpKind, BranchKind);
360
}
361
362
/// Check if the instruction has a variant symbol operand.
363
static bool hasVariantSymbol(const MCInst &MI) {
364
for (auto &Operand : MI) {
365
if (!Operand.isExpr())
366
continue;
367
const MCExpr &Expr = *Operand.getExpr();
368
if (Expr.getKind() == MCExpr::SymbolRef &&
369
cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370
return true;
371
}
372
return false;
373
}
374
375
bool X86AsmBackend::allowAutoPadding() const {
376
return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377
}
378
379
bool X86AsmBackend::allowEnhancedRelaxation() const {
380
return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381
}
382
383
/// X86 has certain instructions which enable interrupts exactly one
384
/// instruction *after* the instruction which stores to SS. Return true if the
385
/// given instruction may have such an interrupt delay slot.
386
static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387
switch (InstOpcode) {
388
case X86::POPSS16:
389
case X86::POPSS32:
390
case X86::STI:
391
return true;
392
393
case X86::MOV16sr:
394
case X86::MOV32sr:
395
case X86::MOV64sr:
396
case X86::MOV16sm:
397
// In fact, this is only the case if the first operand is SS. However, as
398
// segment moves occur extremely rarely, this is just a minor pessimization.
399
return true;
400
}
401
return false;
402
}
403
404
/// Check if the instruction to be emitted is right after any data.
405
static bool
406
isRightAfterData(MCFragment *CurrentFragment,
407
const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408
MCFragment *F = CurrentFragment;
409
// Since data is always emitted into a DataFragment, our check strategy is
410
// simple here.
411
// - If the fragment is a DataFragment
412
// - If it's empty (section start or data after align), return false.
413
// - If it's not the fragment where the previous instruction is,
414
// returns true.
415
// - If it's the fragment holding the previous instruction but its
416
// size changed since the previous instruction was emitted into
417
// it, returns true.
418
// - Otherwise returns false.
419
// - If the fragment is not a DataFragment, returns false.
420
if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
421
return DF->getContents().size() &&
422
(DF != PrevInstPosition.first ||
423
DF->getContents().size() != PrevInstPosition.second);
424
425
return false;
426
}
427
428
/// \returns the fragment size if it has instructions, otherwise returns 0.
429
static size_t getSizeForInstFragment(const MCFragment *F) {
430
if (!F || !F->hasInstructions())
431
return 0;
432
// MCEncodedFragmentWithContents being templated makes this tricky.
433
switch (F->getKind()) {
434
default:
435
llvm_unreachable("Unknown fragment with instructions!");
436
case MCFragment::FT_Data:
437
return cast<MCDataFragment>(*F).getContents().size();
438
case MCFragment::FT_Relaxable:
439
return cast<MCRelaxableFragment>(*F).getContents().size();
440
case MCFragment::FT_CompactEncodedInst:
441
return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
442
}
443
}
444
445
/// Return true if we can insert NOP or prefixes automatically before the
446
/// the instruction to be emitted.
447
bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448
if (hasVariantSymbol(Inst))
449
// Linker may rewrite the instruction with variant symbol operand(e.g.
450
// TLSCALL).
451
return false;
452
453
if (mayHaveInterruptDelaySlot(PrevInstOpcode))
454
// If this instruction follows an interrupt enabling instruction with a one
455
// instruction delay, inserting a nop would change behavior.
456
return false;
457
458
if (isPrefix(PrevInstOpcode, *MCII))
459
// If this instruction follows a prefix, inserting a nop/prefix would change
460
// semantic.
461
return false;
462
463
if (isPrefix(Inst.getOpcode(), *MCII))
464
// If this instruction is a prefix, inserting a prefix would change
465
// semantic.
466
return false;
467
468
if (IsRightAfterData)
469
// If this instruction follows any data, there is no clear
470
// instruction boundary, inserting a nop/prefix would change semantic.
471
return false;
472
473
return true;
474
}
475
476
bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477
if (!OS.getAllowAutoPadding())
478
return false;
479
assert(allowAutoPadding() && "incorrect initialization!");
480
481
// We only pad in text section.
482
if (!OS.getCurrentSectionOnly()->isText())
483
return false;
484
485
// To be Done: Currently don't deal with Bundle cases.
486
if (OS.getAssembler().isBundlingEnabled())
487
return false;
488
489
// Branches only need to be aligned in 32-bit or 64-bit mode.
490
if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
491
return false;
492
493
return true;
494
}
495
496
/// Check if the instruction operand needs to be aligned.
497
bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498
const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
499
return (Desc.isConditionalBranch() &&
500
(AlignBranchType & X86::AlignBranchJcc)) ||
501
(Desc.isUnconditionalBranch() &&
502
(AlignBranchType & X86::AlignBranchJmp)) ||
503
(Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
504
(Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
505
(Desc.isIndirectBranch() &&
506
(AlignBranchType & X86::AlignBranchIndirect));
507
}
508
509
/// Insert BoundaryAlignFragment before instructions to align branches.
510
void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511
const MCInst &Inst, const MCSubtargetInfo &STI) {
512
// Used by canPadInst. Done here, because in emitInstructionEnd, the current
513
// fragment will have changed.
514
IsRightAfterData =
515
isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
516
517
if (!canPadBranches(OS))
518
return;
519
520
// NB: PrevInst only valid if canPadBranches is true.
521
if (!isMacroFused(PrevInst, Inst))
522
// Macro fusion doesn't happen indeed, clear the pending.
523
PendingBA = nullptr;
524
525
// When branch padding is enabled (basically the skx102 erratum => unlikely),
526
// we call canPadInst (not cheap) twice. However, in the common case, we can
527
// avoid unnecessary calls to that, as this is otherwise only used for
528
// relaxable fragments.
529
if (!canPadInst(Inst, OS))
530
return;
531
532
if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533
// Macro fusion actually happens and there is no other fragment inserted
534
// after the previous instruction.
535
//
536
// Do nothing here since we already inserted a BoudaryAlign fragment when
537
// we met the first instruction in the fused pair and we'll tie them
538
// together in emitInstructionEnd.
539
//
540
// Note: When there is at least one fragment, such as MCAlignFragment,
541
// inserted after the previous instruction, e.g.
542
//
543
// \code
544
// cmp %rax %rcx
545
// .align 16
546
// je .Label0
547
// \ endcode
548
//
549
// We will treat the JCC as a unfused branch although it may be fused
550
// with the CMP.
551
return;
552
}
553
554
if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
555
isFirstMacroFusibleInst(Inst, *MCII))) {
556
// If we meet a unfused branch or the first instuction in a fusiable pair,
557
// insert a BoundaryAlign fragment.
558
PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559
AlignBoundary, STI);
560
OS.insert(PendingBA);
561
}
562
}
563
564
/// Set the last fragment to be aligned for the BoundaryAlignFragment.
565
void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566
const MCInst &Inst) {
567
MCFragment *CF = OS.getCurrentFragment();
568
if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
569
F->setAllowAutoPadding(canPadInst(Inst, OS));
570
571
// Update PrevInstOpcode here, canPadInst() reads that.
572
PrevInstOpcode = Inst.getOpcode();
573
PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
574
575
if (!canPadBranches(OS))
576
return;
577
578
// PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579
PrevInst = Inst;
580
581
if (!needAlign(Inst) || !PendingBA)
582
return;
583
584
// Tie the aligned instructions into a pending BoundaryAlign.
585
PendingBA->setLastFragment(CF);
586
PendingBA = nullptr;
587
588
// We need to ensure that further data isn't added to the current
589
// DataFragment, so that we can get the size of instructions later in
590
// MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591
// DataFragment.
592
if (isa_and_nonnull<MCDataFragment>(CF))
593
OS.insert(OS.getContext().allocFragment<MCDataFragment>());
594
595
// Update the maximum alignment on the current section if necessary.
596
MCSection *Sec = OS.getCurrentSectionOnly();
597
Sec->ensureMinAlignment(AlignBoundary);
598
}
599
600
std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601
if (STI.getTargetTriple().isOSBinFormatELF()) {
602
unsigned Type;
603
if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604
Type = llvm::StringSwitch<unsigned>(Name)
605
#define ELF_RELOC(X, Y) .Case(#X, Y)
606
#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607
#undef ELF_RELOC
608
.Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
609
.Case("BFD_RELOC_8", ELF::R_X86_64_8)
610
.Case("BFD_RELOC_16", ELF::R_X86_64_16)
611
.Case("BFD_RELOC_32", ELF::R_X86_64_32)
612
.Case("BFD_RELOC_64", ELF::R_X86_64_64)
613
.Default(-1u);
614
} else {
615
Type = llvm::StringSwitch<unsigned>(Name)
616
#define ELF_RELOC(X, Y) .Case(#X, Y)
617
#include "llvm/BinaryFormat/ELFRelocs/i386.def"
618
#undef ELF_RELOC
619
.Case("BFD_RELOC_NONE", ELF::R_386_NONE)
620
.Case("BFD_RELOC_8", ELF::R_386_8)
621
.Case("BFD_RELOC_16", ELF::R_386_16)
622
.Case("BFD_RELOC_32", ELF::R_386_32)
623
.Default(-1u);
624
}
625
if (Type == -1u)
626
return std::nullopt;
627
return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628
}
629
return MCAsmBackend::getFixupKind(Name);
630
}
631
632
const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633
const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634
{"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
635
{"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
636
{"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
637
{"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
638
{"reloc_signed_4byte", 0, 32, 0},
639
{"reloc_signed_4byte_relax", 0, 32, 0},
640
{"reloc_global_offset_table", 0, 32, 0},
641
{"reloc_global_offset_table8", 0, 64, 0},
642
{"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
643
};
644
645
// Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646
// do not require any extra processing.
647
if (Kind >= FirstLiteralRelocationKind)
648
return MCAsmBackend::getFixupKindInfo(FK_NONE);
649
650
if (Kind < FirstTargetFixupKind)
651
return MCAsmBackend::getFixupKindInfo(Kind);
652
653
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654
"Invalid kind!");
655
assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656
return Infos[Kind - FirstTargetFixupKind];
657
}
658
659
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660
const MCFixup &Fixup, const MCValue &,
661
const MCSubtargetInfo *STI) {
662
return Fixup.getKind() >= FirstLiteralRelocationKind;
663
}
664
665
static unsigned getFixupKindSize(unsigned Kind) {
666
switch (Kind) {
667
default:
668
llvm_unreachable("invalid fixup kind!");
669
case FK_NONE:
670
return 0;
671
case FK_PCRel_1:
672
case FK_SecRel_1:
673
case FK_Data_1:
674
return 1;
675
case FK_PCRel_2:
676
case FK_SecRel_2:
677
case FK_Data_2:
678
return 2;
679
case FK_PCRel_4:
680
case X86::reloc_riprel_4byte:
681
case X86::reloc_riprel_4byte_relax:
682
case X86::reloc_riprel_4byte_relax_rex:
683
case X86::reloc_riprel_4byte_movq_load:
684
case X86::reloc_signed_4byte:
685
case X86::reloc_signed_4byte_relax:
686
case X86::reloc_global_offset_table:
687
case X86::reloc_branch_4byte_pcrel:
688
case FK_SecRel_4:
689
case FK_Data_4:
690
return 4;
691
case FK_PCRel_8:
692
case FK_SecRel_8:
693
case FK_Data_8:
694
case X86::reloc_global_offset_table8:
695
return 8;
696
}
697
}
698
699
void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700
const MCValue &Target,
701
MutableArrayRef<char> Data,
702
uint64_t Value, bool IsResolved,
703
const MCSubtargetInfo *STI) const {
704
unsigned Kind = Fixup.getKind();
705
if (Kind >= FirstLiteralRelocationKind)
706
return;
707
unsigned Size = getFixupKindSize(Kind);
708
709
assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710
711
int64_t SignedValue = static_cast<int64_t>(Value);
712
if ((Target.isAbsolute() || IsResolved) &&
713
getFixupKindInfo(Fixup.getKind()).Flags &
714
MCFixupKindInfo::FKF_IsPCRel) {
715
// check that PC relative fixup fits into the fixup size.
716
if (Size > 0 && !isIntN(Size * 8, SignedValue))
717
Asm.getContext().reportError(
718
Fixup.getLoc(), "value of " + Twine(SignedValue) +
719
" is too large for field of " + Twine(Size) +
720
((Size == 1) ? " byte." : " bytes."));
721
} else {
722
// Check that uppper bits are either all zeros or all ones.
723
// Specifically ignore overflow/underflow as long as the leakage is
724
// limited to the lower bits. This is to remain compatible with
725
// other assemblers.
726
assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
727
"Value does not fit in the Fixup field");
728
}
729
730
for (unsigned i = 0; i != Size; ++i)
731
Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
732
}
733
734
bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735
const MCSubtargetInfo &STI) const {
736
unsigned Opcode = MI.getOpcode();
737
unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
738
return isRelaxableBranch(Opcode) ||
739
(X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740
MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
741
}
742
743
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744
uint64_t Value) const {
745
// Relax if the value is too big for a (signed) i8.
746
return !isInt<8>(Value);
747
}
748
749
// FIXME: Can tblgen help at all here to verify there aren't other instructions
750
// we can relax?
751
void X86AsmBackend::relaxInstruction(MCInst &Inst,
752
const MCSubtargetInfo &STI) const {
753
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754
bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
755
unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
756
757
if (RelaxedOp == Inst.getOpcode()) {
758
SmallString<256> Tmp;
759
raw_svector_ostream OS(Tmp);
760
Inst.dump_pretty(OS);
761
OS << "\n";
762
report_fatal_error("unexpected instruction to relax: " + OS.str());
763
}
764
765
Inst.setOpcode(RelaxedOp);
766
}
767
768
bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769
MCCodeEmitter &Emitter,
770
unsigned &RemainingSize) const {
771
if (!RF.getAllowAutoPadding())
772
return false;
773
// If the instruction isn't fully relaxed, shifting it around might require a
774
// larger value for one of the fixups then can be encoded. The outer loop
775
// will also catch this before moving to the next instruction, but we need to
776
// prevent padding this single instruction as well.
777
if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
778
return false;
779
780
const unsigned OldSize = RF.getContents().size();
781
if (OldSize == 15)
782
return false;
783
784
const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
785
const unsigned RemainingPrefixSize = [&]() -> unsigned {
786
SmallString<15> Code;
787
X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
788
assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
789
790
// TODO: It turns out we need a decent amount of plumbing for the target
791
// specific bits to determine number of prefixes its safe to add. Various
792
// targets (older chips mostly, but also Atom family) encounter decoder
793
// stalls with too many prefixes. For testing purposes, we set the value
794
// externally for the moment.
795
unsigned ExistingPrefixSize = Code.size();
796
if (TargetPrefixMax <= ExistingPrefixSize)
797
return 0;
798
return TargetPrefixMax - ExistingPrefixSize;
799
}();
800
const unsigned PrefixBytesToAdd =
801
std::min(MaxPossiblePad, RemainingPrefixSize);
802
if (PrefixBytesToAdd == 0)
803
return false;
804
805
const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
806
807
SmallString<256> Code;
808
Code.append(PrefixBytesToAdd, Prefix);
809
Code.append(RF.getContents().begin(), RF.getContents().end());
810
RF.getContents() = Code;
811
812
// Adjust the fixups for the change in offsets
813
for (auto &F : RF.getFixups()) {
814
F.setOffset(F.getOffset() + PrefixBytesToAdd);
815
}
816
817
RemainingSize -= PrefixBytesToAdd;
818
return true;
819
}
820
821
bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822
MCCodeEmitter &Emitter,
823
unsigned &RemainingSize) const {
824
if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
825
// TODO: There are lots of other tricks we could apply for increasing
826
// encoding size without impacting performance.
827
return false;
828
829
MCInst Relaxed = RF.getInst();
830
relaxInstruction(Relaxed, *RF.getSubtargetInfo());
831
832
SmallVector<MCFixup, 4> Fixups;
833
SmallString<15> Code;
834
Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
835
const unsigned OldSize = RF.getContents().size();
836
const unsigned NewSize = Code.size();
837
assert(NewSize >= OldSize && "size decrease during relaxation?");
838
unsigned Delta = NewSize - OldSize;
839
if (Delta > RemainingSize)
840
return false;
841
RF.setInst(Relaxed);
842
RF.getContents() = Code;
843
RF.getFixups() = Fixups;
844
RemainingSize -= Delta;
845
return true;
846
}
847
848
bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849
MCCodeEmitter &Emitter,
850
unsigned &RemainingSize) const {
851
bool Changed = false;
852
if (RemainingSize != 0)
853
Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854
if (RemainingSize != 0)
855
Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856
return Changed;
857
}
858
859
void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
860
// See if we can further relax some instructions to cut down on the number of
861
// nop bytes required for code alignment. The actual win is in reducing
862
// instruction count, not number of bytes. Modern X86-64 can easily end up
863
// decode limited. It is often better to reduce the number of instructions
864
// (i.e. eliminate nops) even at the cost of increasing the size and
865
// complexity of others.
866
if (!X86PadForAlign && !X86PadForBranchAlign)
867
return;
868
869
// The processed regions are delimitered by LabeledFragments. -g may have more
870
// MCSymbols and therefore different relaxation results. X86PadForAlign is
871
// disabled by default to eliminate the -g vs non -g difference.
872
DenseSet<MCFragment *> LabeledFragments;
873
for (const MCSymbol &S : Asm.symbols())
874
LabeledFragments.insert(S.getFragment(false));
875
876
for (MCSection &Sec : Asm) {
877
if (!Sec.isText())
878
continue;
879
880
SmallVector<MCRelaxableFragment *, 4> Relaxable;
881
for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882
MCFragment &F = *I;
883
884
if (LabeledFragments.count(&F))
885
Relaxable.clear();
886
887
if (F.getKind() == MCFragment::FT_Data ||
888
F.getKind() == MCFragment::FT_CompactEncodedInst)
889
// Skip and ignore
890
continue;
891
892
if (F.getKind() == MCFragment::FT_Relaxable) {
893
auto &RF = cast<MCRelaxableFragment>(*I);
894
Relaxable.push_back(&RF);
895
continue;
896
}
897
898
auto canHandle = [](MCFragment &F) -> bool {
899
switch (F.getKind()) {
900
default:
901
return false;
902
case MCFragment::FT_Align:
903
return X86PadForAlign;
904
case MCFragment::FT_BoundaryAlign:
905
return X86PadForBranchAlign;
906
}
907
};
908
// For any unhandled kind, assume we can't change layout.
909
if (!canHandle(F)) {
910
Relaxable.clear();
911
continue;
912
}
913
914
#ifndef NDEBUG
915
const uint64_t OrigOffset = Asm.getFragmentOffset(F);
916
#endif
917
const uint64_t OrigSize = Asm.computeFragmentSize(F);
918
919
// To keep the effects local, prefer to relax instructions closest to
920
// the align directive. This is purely about human understandability
921
// of the resulting code. If we later find a reason to expand
922
// particular instructions over others, we can adjust.
923
unsigned RemainingSize = OrigSize;
924
while (!Relaxable.empty() && RemainingSize != 0) {
925
auto &RF = *Relaxable.pop_back_val();
926
// Give the backend a chance to play any tricks it wishes to increase
927
// the encoding size of the given instruction. Target independent code
928
// will try further relaxation, but target's may play further tricks.
929
if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
930
Sec.setHasLayout(false);
931
932
// If we have an instruction which hasn't been fully relaxed, we can't
933
// skip past it and insert bytes before it. Changing its starting
934
// offset might require a larger negative offset than it can encode.
935
// We don't need to worry about larger positive offsets as none of the
936
// possible offsets between this and our align are visible, and the
937
// ones afterwards aren't changing.
938
if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
939
break;
940
}
941
Relaxable.clear();
942
943
// BoundaryAlign explicitly tracks it's size (unlike align)
944
if (F.getKind() == MCFragment::FT_BoundaryAlign)
945
cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
946
947
#ifndef NDEBUG
948
const uint64_t FinalOffset = Asm.getFragmentOffset(F);
949
const uint64_t FinalSize = Asm.computeFragmentSize(F);
950
assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
951
"can't move start of next fragment!");
952
assert(FinalSize == RemainingSize && "inconsistent size computation?");
953
#endif
954
955
// If we're looking at a boundary align, make sure we don't try to pad
956
// its target instructions for some following directive. Doing so would
957
// break the alignment of the current boundary align.
958
if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
959
const MCFragment *LastFragment = BF->getLastFragment();
960
if (!LastFragment)
961
continue;
962
while (&*I != LastFragment)
963
++I;
964
}
965
}
966
}
967
968
// The layout is done. Mark every fragment as valid.
969
for (MCSection &Section : Asm) {
970
Asm.getFragmentOffset(*Section.curFragList()->Tail);
971
Asm.computeFragmentSize(*Section.curFragList()->Tail);
972
}
973
}
974
975
unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976
if (STI.hasFeature(X86::Is16Bit))
977
return 4;
978
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979
return 1;
980
if (STI.hasFeature(X86::TuningFast7ByteNOP))
981
return 7;
982
if (STI.hasFeature(X86::TuningFast15ByteNOP))
983
return 15;
984
if (STI.hasFeature(X86::TuningFast11ByteNOP))
985
return 11;
986
// FIXME: handle 32-bit mode
987
// 15-bytes is the longest single NOP instruction, but 10-bytes is
988
// commonly the longest that can be efficiently decoded.
989
return 10;
990
}
991
992
/// Write a sequence of optimal nops to the output, covering \p Count
993
/// bytes.
994
/// \return - true on success, false on failure
995
bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996
const MCSubtargetInfo *STI) const {
997
static const char Nops32Bit[10][11] = {
998
// nop
999
"\x90",
1000
// xchg %ax,%ax
1001
"\x66\x90",
1002
// nopl (%[re]ax)
1003
"\x0f\x1f\x00",
1004
// nopl 0(%[re]ax)
1005
"\x0f\x1f\x40\x00",
1006
// nopl 0(%[re]ax,%[re]ax,1)
1007
"\x0f\x1f\x44\x00\x00",
1008
// nopw 0(%[re]ax,%[re]ax,1)
1009
"\x66\x0f\x1f\x44\x00\x00",
1010
// nopl 0L(%[re]ax)
1011
"\x0f\x1f\x80\x00\x00\x00\x00",
1012
// nopl 0L(%[re]ax,%[re]ax,1)
1013
"\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014
// nopw 0L(%[re]ax,%[re]ax,1)
1015
"\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016
// nopw %cs:0L(%[re]ax,%[re]ax,1)
1017
"\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018
};
1019
1020
// 16-bit mode uses different nop patterns than 32-bit.
1021
static const char Nops16Bit[4][11] = {
1022
// nop
1023
"\x90",
1024
// xchg %eax,%eax
1025
"\x66\x90",
1026
// lea 0(%si),%si
1027
"\x8d\x74\x00",
1028
// lea 0w(%si),%si
1029
"\x8d\xb4\x00\x00",
1030
};
1031
1032
const char(*Nops)[11] =
1033
STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034
1035
uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036
1037
// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038
// length.
1039
do {
1040
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042
for (uint8_t i = 0; i < Prefixes; i++)
1043
OS << '\x66';
1044
const uint8_t Rest = ThisNopLength - Prefixes;
1045
if (Rest != 0)
1046
OS.write(Nops[Rest - 1], Rest);
1047
Count -= ThisNopLength;
1048
} while (Count != 0);
1049
1050
return true;
1051
}
1052
1053
/* *** */
1054
1055
namespace {
1056
1057
class ELFX86AsmBackend : public X86AsmBackend {
1058
public:
1059
uint8_t OSABI;
1060
ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061
: X86AsmBackend(T, STI), OSABI(OSABI) {}
1062
};
1063
1064
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065
public:
1066
ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067
const MCSubtargetInfo &STI)
1068
: ELFX86AsmBackend(T, OSABI, STI) {}
1069
1070
std::unique_ptr<MCObjectTargetWriter>
1071
createObjectTargetWriter() const override {
1072
return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073
}
1074
};
1075
1076
class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077
public:
1078
ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079
const MCSubtargetInfo &STI)
1080
: ELFX86AsmBackend(T, OSABI, STI) {}
1081
1082
std::unique_ptr<MCObjectTargetWriter>
1083
createObjectTargetWriter() const override {
1084
return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085
ELF::EM_X86_64);
1086
}
1087
};
1088
1089
class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090
public:
1091
ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092
const MCSubtargetInfo &STI)
1093
: ELFX86AsmBackend(T, OSABI, STI) {}
1094
1095
std::unique_ptr<MCObjectTargetWriter>
1096
createObjectTargetWriter() const override {
1097
return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098
ELF::EM_IAMCU);
1099
}
1100
};
1101
1102
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103
public:
1104
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105
const MCSubtargetInfo &STI)
1106
: ELFX86AsmBackend(T, OSABI, STI) {}
1107
1108
std::unique_ptr<MCObjectTargetWriter>
1109
createObjectTargetWriter() const override {
1110
return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111
}
1112
};
1113
1114
class WindowsX86AsmBackend : public X86AsmBackend {
1115
bool Is64Bit;
1116
1117
public:
1118
WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119
const MCSubtargetInfo &STI)
1120
: X86AsmBackend(T, STI)
1121
, Is64Bit(is64Bit) {
1122
}
1123
1124
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125
return StringSwitch<std::optional<MCFixupKind>>(Name)
1126
.Case("dir32", FK_Data_4)
1127
.Case("secrel32", FK_SecRel_4)
1128
.Case("secidx", FK_SecRel_2)
1129
.Default(MCAsmBackend::getFixupKind(Name));
1130
}
1131
1132
std::unique_ptr<MCObjectTargetWriter>
1133
createObjectTargetWriter() const override {
1134
return createX86WinCOFFObjectWriter(Is64Bit);
1135
}
1136
};
1137
1138
namespace CU {
1139
1140
/// Compact unwind encoding values.
1141
enum CompactUnwindEncodings {
1142
/// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143
/// the return address, then [RE]SP is moved to [RE]BP.
1144
UNWIND_MODE_BP_FRAME = 0x01000000,
1145
1146
/// A frameless function with a small constant stack size.
1147
UNWIND_MODE_STACK_IMMD = 0x02000000,
1148
1149
/// A frameless function with a large constant stack size.
1150
UNWIND_MODE_STACK_IND = 0x03000000,
1151
1152
/// No compact unwind encoding is available.
1153
UNWIND_MODE_DWARF = 0x04000000,
1154
1155
/// Mask for encoding the frame registers.
1156
UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1157
1158
/// Mask for encoding the frameless registers.
1159
UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160
};
1161
1162
} // namespace CU
1163
1164
class DarwinX86AsmBackend : public X86AsmBackend {
1165
const MCRegisterInfo &MRI;
1166
1167
/// Number of registers that can be saved in a compact unwind encoding.
1168
enum { CU_NUM_SAVED_REGS = 6 };
1169
1170
mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171
Triple TT;
1172
bool Is64Bit;
1173
1174
unsigned OffsetSize; ///< Offset of a "push" instruction.
1175
unsigned MoveInstrSize; ///< Size of a "move" instruction.
1176
unsigned StackDivide; ///< Amount to adjust stack size by.
1177
protected:
1178
/// Size of a "push" instruction for the given register.
1179
unsigned PushInstrSize(unsigned Reg) const {
1180
switch (Reg) {
1181
case X86::EBX:
1182
case X86::ECX:
1183
case X86::EDX:
1184
case X86::EDI:
1185
case X86::ESI:
1186
case X86::EBP:
1187
case X86::RBX:
1188
case X86::RBP:
1189
return 1;
1190
case X86::R12:
1191
case X86::R13:
1192
case X86::R14:
1193
case X86::R15:
1194
return 2;
1195
}
1196
return 1;
1197
}
1198
1199
private:
1200
/// Get the compact unwind number for a given register. The number
1201
/// corresponds to the enum lists in compact_unwind_encoding.h.
1202
int getCompactUnwindRegNum(unsigned Reg) const {
1203
static const MCPhysReg CU32BitRegs[7] = {
1204
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205
};
1206
static const MCPhysReg CU64BitRegs[] = {
1207
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208
};
1209
const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210
for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211
if (*CURegs == Reg)
1212
return Idx;
1213
1214
return -1;
1215
}
1216
1217
/// Return the registers encoded for a compact encoding with a frame
1218
/// pointer.
1219
uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220
// Encode the registers in the order they were saved --- 3-bits per
1221
// register. The list of saved registers is assumed to be in reverse
1222
// order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223
uint32_t RegEnc = 0;
1224
for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225
unsigned Reg = SavedRegs[i];
1226
if (Reg == 0) break;
1227
1228
int CURegNum = getCompactUnwindRegNum(Reg);
1229
if (CURegNum == -1) return ~0U;
1230
1231
// Encode the 3-bit register number in order, skipping over 3-bits for
1232
// each register.
1233
RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234
}
1235
1236
assert((RegEnc & 0x3FFFF) == RegEnc &&
1237
"Invalid compact register encoding!");
1238
return RegEnc;
1239
}
1240
1241
/// Create the permutation encoding used with frameless stacks. It is
1242
/// passed the number of registers to be saved and an array of the registers
1243
/// saved.
1244
uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245
// The saved registers are numbered from 1 to 6. In order to encode the
1246
// order in which they were saved, we re-number them according to their
1247
// place in the register order. The re-numbering is relative to the last
1248
// re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249
// that order:
1250
//
1251
// Orig Re-Num
1252
// ---- ------
1253
// 6 6
1254
// 2 2
1255
// 4 3
1256
// 5 3
1257
//
1258
for (unsigned i = 0; i < RegCount; ++i) {
1259
int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260
if (CUReg == -1) return ~0U;
1261
SavedRegs[i] = CUReg;
1262
}
1263
1264
// Reverse the list.
1265
std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266
1267
uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268
for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269
unsigned Countless = 0;
1270
for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271
if (SavedRegs[j] < SavedRegs[i])
1272
++Countless;
1273
1274
RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275
}
1276
1277
// Take the renumbered values and encode them into a 10-bit number.
1278
uint32_t permutationEncoding = 0;
1279
switch (RegCount) {
1280
case 6:
1281
permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282
+ 6 * RenumRegs[2] + 2 * RenumRegs[3]
1283
+ RenumRegs[4];
1284
break;
1285
case 5:
1286
permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287
+ 6 * RenumRegs[3] + 2 * RenumRegs[4]
1288
+ RenumRegs[5];
1289
break;
1290
case 4:
1291
permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1292
+ 3 * RenumRegs[4] + RenumRegs[5];
1293
break;
1294
case 3:
1295
permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1296
+ RenumRegs[5];
1297
break;
1298
case 2:
1299
permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1300
break;
1301
case 1:
1302
permutationEncoding |= RenumRegs[5];
1303
break;
1304
}
1305
1306
assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307
"Invalid compact register encoding!");
1308
return permutationEncoding;
1309
}
1310
1311
public:
1312
DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313
const MCSubtargetInfo &STI)
1314
: X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315
Is64Bit(TT.isArch64Bit()) {
1316
memset(SavedRegs, 0, sizeof(SavedRegs));
1317
OffsetSize = Is64Bit ? 8 : 4;
1318
MoveInstrSize = Is64Bit ? 3 : 2;
1319
StackDivide = Is64Bit ? 8 : 4;
1320
}
1321
1322
std::unique_ptr<MCObjectTargetWriter>
1323
createObjectTargetWriter() const override {
1324
uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325
uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326
return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327
}
1328
1329
/// Implementation of algorithm to generate the compact unwind encoding
1330
/// for the CFI instructions.
1331
uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332
const MCContext *Ctxt) const override {
1333
ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334
if (Instrs.empty()) return 0;
1335
if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336
!Ctxt->emitCompactUnwindNonCanonical())
1337
return CU::UNWIND_MODE_DWARF;
1338
1339
// Reset the saved registers.
1340
unsigned SavedRegIdx = 0;
1341
memset(SavedRegs, 0, sizeof(SavedRegs));
1342
1343
bool HasFP = false;
1344
1345
// Encode that we are using EBP/RBP as the frame pointer.
1346
uint64_t CompactUnwindEncoding = 0;
1347
1348
unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349
unsigned InstrOffset = 0;
1350
unsigned StackAdjust = 0;
1351
uint64_t StackSize = 0;
1352
int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1353
1354
for (const MCCFIInstruction &Inst : Instrs) {
1355
switch (Inst.getOperation()) {
1356
default:
1357
// Any other CFI directives indicate a frame that we aren't prepared
1358
// to represent via compact unwind, so just bail out.
1359
return CU::UNWIND_MODE_DWARF;
1360
case MCCFIInstruction::OpDefCfaRegister: {
1361
// Defines a frame pointer. E.g.
1362
//
1363
// movq %rsp, %rbp
1364
// L0:
1365
// .cfi_def_cfa_register %rbp
1366
//
1367
HasFP = true;
1368
1369
// If the frame pointer is other than esp/rsp, we do not have a way to
1370
// generate a compact unwinding representation, so bail out.
1371
if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372
(Is64Bit ? X86::RBP : X86::EBP))
1373
return CU::UNWIND_MODE_DWARF;
1374
1375
// Reset the counts.
1376
memset(SavedRegs, 0, sizeof(SavedRegs));
1377
StackAdjust = 0;
1378
SavedRegIdx = 0;
1379
MinAbsOffset = std::numeric_limits<int64_t>::max();
1380
InstrOffset += MoveInstrSize;
1381
break;
1382
}
1383
case MCCFIInstruction::OpDefCfaOffset: {
1384
// Defines a new offset for the CFA. E.g.
1385
//
1386
// With frame:
1387
//
1388
// pushq %rbp
1389
// L0:
1390
// .cfi_def_cfa_offset 16
1391
//
1392
// Without frame:
1393
//
1394
// subq $72, %rsp
1395
// L0:
1396
// .cfi_def_cfa_offset 80
1397
//
1398
StackSize = Inst.getOffset() / StackDivide;
1399
break;
1400
}
1401
case MCCFIInstruction::OpOffset: {
1402
// Defines a "push" of a callee-saved register. E.g.
1403
//
1404
// pushq %r15
1405
// pushq %r14
1406
// pushq %rbx
1407
// L0:
1408
// subq $120, %rsp
1409
// L1:
1410
// .cfi_offset %rbx, -40
1411
// .cfi_offset %r14, -32
1412
// .cfi_offset %r15, -24
1413
//
1414
if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415
// If there are too many saved registers, we cannot use a compact
1416
// unwind encoding.
1417
return CU::UNWIND_MODE_DWARF;
1418
1419
unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420
SavedRegs[SavedRegIdx++] = Reg;
1421
StackAdjust += OffsetSize;
1422
MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1423
InstrOffset += PushInstrSize(Reg);
1424
break;
1425
}
1426
}
1427
}
1428
1429
StackAdjust /= StackDivide;
1430
1431
if (HasFP) {
1432
if ((StackAdjust & 0xFF) != StackAdjust)
1433
// Offset was too big for a compact unwind encoding.
1434
return CU::UNWIND_MODE_DWARF;
1435
1436
// We don't attempt to track a real StackAdjust, so if the saved registers
1437
// aren't adjacent to rbp we can't cope.
1438
if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439
return CU::UNWIND_MODE_DWARF;
1440
1441
// Get the encoding of the saved registers when we have a frame pointer.
1442
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443
if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444
1445
CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446
CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447
CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448
} else {
1449
SubtractInstrIdx += InstrOffset;
1450
++StackAdjust;
1451
1452
if ((StackSize & 0xFF) == StackSize) {
1453
// Frameless stack with a small stack size.
1454
CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455
1456
// Encode the stack size.
1457
CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458
} else {
1459
if ((StackAdjust & 0x7) != StackAdjust)
1460
// The extra stack adjustments are too big for us to handle.
1461
return CU::UNWIND_MODE_DWARF;
1462
1463
// Frameless stack with an offset too large for us to encode compactly.
1464
CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465
1466
// Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467
// instruction.
1468
CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469
1470
// Encode any extra stack adjustments (done via push instructions).
1471
CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472
}
1473
1474
// Encode the number of registers saved. (Reverse the list first.)
1475
std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476
CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477
1478
// Get the encoding of the saved registers when we don't have a frame
1479
// pointer.
1480
uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481
if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482
1483
// Encode the register encoding.
1484
CompactUnwindEncoding |=
1485
RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486
}
1487
1488
return CompactUnwindEncoding;
1489
}
1490
};
1491
1492
} // end anonymous namespace
1493
1494
MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495
const MCSubtargetInfo &STI,
1496
const MCRegisterInfo &MRI,
1497
const MCTargetOptions &Options) {
1498
const Triple &TheTriple = STI.getTargetTriple();
1499
if (TheTriple.isOSBinFormatMachO())
1500
return new DarwinX86AsmBackend(T, MRI, STI);
1501
1502
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503
return new WindowsX86AsmBackend(T, false, STI);
1504
1505
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506
1507
if (TheTriple.isOSIAMCU())
1508
return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509
1510
return new ELFX86_32AsmBackend(T, OSABI, STI);
1511
}
1512
1513
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514
const MCSubtargetInfo &STI,
1515
const MCRegisterInfo &MRI,
1516
const MCTargetOptions &Options) {
1517
const Triple &TheTriple = STI.getTargetTriple();
1518
if (TheTriple.isOSBinFormatMachO())
1519
return new DarwinX86AsmBackend(T, MRI, STI);
1520
1521
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522
return new WindowsX86AsmBackend(T, true, STI);
1523
1524
if (TheTriple.isUEFI()) {
1525
assert(TheTriple.isOSBinFormatCOFF() &&
1526
"Only COFF format is supported in UEFI environment.");
1527
return new WindowsX86AsmBackend(T, true, STI);
1528
}
1529
1530
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531
1532
if (TheTriple.isX32())
1533
return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534
return new ELFX86_64AsmBackend(T, OSABI, STI);
1535
}
1536
1537
namespace {
1538
class X86ELFStreamer : public MCELFStreamer {
1539
public:
1540
X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1541
std::unique_ptr<MCObjectWriter> OW,
1542
std::unique_ptr<MCCodeEmitter> Emitter)
1543
: MCELFStreamer(Context, std::move(TAB), std::move(OW),
1544
std::move(Emitter)) {}
1545
1546
void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1547
};
1548
} // end anonymous namespace
1549
1550
void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1551
const MCSubtargetInfo &STI) {
1552
auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1553
Backend.emitInstructionBegin(S, Inst, STI);
1554
S.MCObjectStreamer::emitInstruction(Inst, STI);
1555
Backend.emitInstructionEnd(S, Inst);
1556
}
1557
1558
void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1559
const MCSubtargetInfo &STI) {
1560
X86_MC::emitInstruction(*this, Inst, STI);
1561
}
1562
1563
MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1564
std::unique_ptr<MCAsmBackend> &&MAB,
1565
std::unique_ptr<MCObjectWriter> &&MOW,
1566
std::unique_ptr<MCCodeEmitter> &&MCE) {
1567
return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1568
std::move(MCE));
1569
}
1570
1571