CoCalc -- X86AsmBackend.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
³⁵²⁹⁴ views
1
//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
#include "MCTargetDesc/X86BaseInfo.h"
10
#include "MCTargetDesc/X86EncodingOptimization.h"
11
#include "MCTargetDesc/X86FixupKinds.h"
12
#include "llvm/ADT/StringSwitch.h"
13
#include "llvm/BinaryFormat/ELF.h"
14
#include "llvm/BinaryFormat/MachO.h"
15
#include "llvm/MC/MCAsmBackend.h"
16
#include "llvm/MC/MCAssembler.h"
17
#include "llvm/MC/MCCodeEmitter.h"
18
#include "llvm/MC/MCContext.h"
19
#include "llvm/MC/MCDwarf.h"
20
#include "llvm/MC/MCELFObjectWriter.h"
21
#include "llvm/MC/MCELFStreamer.h"
22
#include "llvm/MC/MCExpr.h"
23
#include "llvm/MC/MCFixupKindInfo.h"
24
#include "llvm/MC/MCInst.h"
25
#include "llvm/MC/MCInstrInfo.h"
26
#include "llvm/MC/MCMachObjectWriter.h"
27
#include "llvm/MC/MCObjectStreamer.h"
28
#include "llvm/MC/MCObjectWriter.h"
29
#include "llvm/MC/MCRegisterInfo.h"
30
#include "llvm/MC/MCSectionMachO.h"
31
#include "llvm/MC/MCSubtargetInfo.h"
32
#include "llvm/MC/MCValue.h"
33
#include "llvm/MC/TargetRegistry.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/raw_ostream.h"
37

38
using namespace llvm;
39

40
namespace {
41
/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42
class X86AlignBranchKind {
43
private:
44
  uint8_t AlignBranchKind = 0;
45

46
public:
47
  void operator=(const std::string &Val) {
48
    if (Val.empty())
49
      return;
50
    SmallVector<StringRef, 6> BranchTypes;
51
    StringRef(Val).split(BranchTypes, '+', -1, false);
52
    for (auto BranchType : BranchTypes) {
53
      if (BranchType == "fused")
54
        addKind(X86::AlignBranchFused);
55
      else if (BranchType == "jcc")
56
        addKind(X86::AlignBranchJcc);
57
      else if (BranchType == "jmp")
58
        addKind(X86::AlignBranchJmp);
59
      else if (BranchType == "call")
60
        addKind(X86::AlignBranchCall);
61
      else if (BranchType == "ret")
62
        addKind(X86::AlignBranchRet);
63
      else if (BranchType == "indirect")
64
        addKind(X86::AlignBranchIndirect);
65
      else {
66
        errs() << "invalid argument " << BranchType.str()
67
               << " to -x86-align-branch=; each element must be one of: fused, "
68
                  "jcc, jmp, call, ret, indirect.(plus separated)\n";
69
      }
70
    }
71
  }
72

73
  operator uint8_t() const { return AlignBranchKind; }
74
  void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75
};
76

77
X86AlignBranchKind X86AlignBranchKindLoc;
78

79
cl::opt<unsigned> X86AlignBranchBoundary(
80
    "x86-align-branch-boundary", cl::init(0),
81
    cl::desc(
82
        "Control how the assembler should align branches with NOP. If the "
83
        "boundary's size is not 0, it should be a power of 2 and no less "
84
        "than 32. Branches will be aligned to prevent from being across or "
85
        "against the boundary of specified size. The default value 0 does not "
86
        "align branches."));
87

88
cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89
    "x86-align-branch",
90
    cl::desc(
91
        "Specify types of branches to align (plus separated list of types):"
92
             "\njcc      indicates conditional jumps"
93
             "\nfused    indicates fused conditional jumps"
94
             "\njmp      indicates direct unconditional jumps"
95
             "\ncall     indicates direct and indirect calls"
96
             "\nret      indicates rets"
97
             "\nindirect indicates indirect unconditional jumps"),
98
    cl::location(X86AlignBranchKindLoc));
99

100
cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101
    "x86-branches-within-32B-boundaries", cl::init(false),
102
    cl::desc(
103
        "Align selected instructions to mitigate negative performance impact "
104
        "of Intel's micro code update for errata skx102.  May break "
105
        "assumptions about labels corresponding to particular instructions, "
106
        "and should be used with caution."));
107

108
cl::opt<unsigned> X86PadMaxPrefixSize(
109
    "x86-pad-max-prefix-size", cl::init(0),
110
    cl::desc("Maximum number of prefixes to use for padding"));
111

112
cl::opt<bool> X86PadForAlign(
113
    "x86-pad-for-align", cl::init(false), cl::Hidden,
114
    cl::desc("Pad previous instructions to implement align directives"));
115

116
cl::opt<bool> X86PadForBranchAlign(
117
    "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118
    cl::desc("Pad previous instructions to implement branch alignment"));
119

120
class X86AsmBackend : public MCAsmBackend {
121
  const MCSubtargetInfo &STI;
122
  std::unique_ptr<const MCInstrInfo> MCII;
123
  X86AlignBranchKind AlignBranchType;
124
  Align AlignBoundary;
125
  unsigned TargetPrefixMax = 0;
126

127
  MCInst PrevInst;
128
  unsigned PrevInstOpcode = 0;
129
  MCBoundaryAlignFragment *PendingBA = nullptr;
130
  std::pair<MCFragment *, size_t> PrevInstPosition;
131
  bool IsRightAfterData = false;
132

133
  uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134
  bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135
  bool needAlign(const MCInst &Inst) const;
136
  bool canPadBranches(MCObjectStreamer &OS) const;
137
  bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138

139
public:
140
  X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141
      : MCAsmBackend(llvm::endianness::little), STI(STI),
142
        MCII(T.createMCInstrInfo()) {
143
    if (X86AlignBranchWithin32BBoundaries) {
144
      // At the moment, this defaults to aligning fused branches, unconditional
145
      // jumps, and (unfused) conditional jumps with nops.  Both the
146
      // instructions aligned and the alignment method (nop vs prefix) may
147
      // change in the future.
148
      AlignBoundary = assumeAligned(32);
149
      AlignBranchType.addKind(X86::AlignBranchFused);
150
      AlignBranchType.addKind(X86::AlignBranchJcc);
151
      AlignBranchType.addKind(X86::AlignBranchJmp);
152
    }
153
    // Allow overriding defaults set by main flag
154
    if (X86AlignBranchBoundary.getNumOccurrences())
155
      AlignBoundary = assumeAligned(X86AlignBranchBoundary);
156
    if (X86AlignBranch.getNumOccurrences())
157
      AlignBranchType = X86AlignBranchKindLoc;
158
    if (X86PadMaxPrefixSize.getNumOccurrences())
159
      TargetPrefixMax = X86PadMaxPrefixSize;
160
  }
161

162
  bool allowAutoPadding() const override;
163
  bool allowEnhancedRelaxation() const override;
164
  void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165
                            const MCSubtargetInfo &STI);
166
  void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167

168
  unsigned getNumFixupKinds() const override {
169
    return X86::NumTargetFixupKinds;
170
  }
171

172
  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173

174
  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175

176
  bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177
                             const MCValue &Target,
178
                             const MCSubtargetInfo *STI) override;
179

180
  void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181
                  const MCValue &Target, MutableArrayRef<char> Data,
182
                  uint64_t Value, bool IsResolved,
183
                  const MCSubtargetInfo *STI) const override;
184

185
  bool mayNeedRelaxation(const MCInst &Inst,
186
                         const MCSubtargetInfo &STI) const override;
187

188
  bool fixupNeedsRelaxation(const MCFixup &Fixup,
189
                            uint64_t Value) const override;
190

191
  void relaxInstruction(MCInst &Inst,
192
                        const MCSubtargetInfo &STI) const override;
193

194
  bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195
                                   MCCodeEmitter &Emitter,
196
                                   unsigned &RemainingSize) const;
197

198
  bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199
                               unsigned &RemainingSize) const;
200

201
  bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202
                              unsigned &RemainingSize) const;
203

204
  void finishLayout(const MCAssembler &Asm) const override;
205

206
  unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207

208
  bool writeNopData(raw_ostream &OS, uint64_t Count,
209
                    const MCSubtargetInfo *STI) const override;
210
};
211
} // end anonymous namespace
212

213
static bool isRelaxableBranch(unsigned Opcode) {
214
  return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215
}
216

217
static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218
                                       bool Is16BitMode = false) {
219
  switch (Opcode) {
220
  default:
221
    llvm_unreachable("invalid opcode for branch");
222
  case X86::JCC_1:
223
    return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224
  case X86::JMP_1:
225
    return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226
  }
227
}
228

229
static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230
  unsigned Opcode = MI.getOpcode();
231
  return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232
                                   : X86::getOpcodeForLongImmediateForm(Opcode);
233
}
234

235
static X86::CondCode getCondFromBranch(const MCInst &MI,
236
                                       const MCInstrInfo &MCII) {
237
  unsigned Opcode = MI.getOpcode();
238
  switch (Opcode) {
239
  default:
240
    return X86::COND_INVALID;
241
  case X86::JCC_1: {
242
    const MCInstrDesc &Desc = MCII.get(Opcode);
243
    return static_cast<X86::CondCode>(
244
        MI.getOperand(Desc.getNumOperands() - 1).getImm());
245
  }
246
  }
247
}
248

249
static X86::SecondMacroFusionInstKind
250
classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251
  X86::CondCode CC = getCondFromBranch(MI, MCII);
252
  return classifySecondCondCodeInMacroFusion(CC);
253
}
254

255
/// Check if the instruction uses RIP relative addressing.
256
static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257
  unsigned Opcode = MI.getOpcode();
258
  const MCInstrDesc &Desc = MCII.get(Opcode);
259
  uint64_t TSFlags = Desc.TSFlags;
260
  unsigned CurOp = X86II::getOperandBias(Desc);
261
  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262
  if (MemoryOperand < 0)
263
    return false;
264
  unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265
  unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266
  return (BaseReg == X86::RIP);
267
}
268

269
/// Check if the instruction is a prefix.
270
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271
  return X86II::isPrefix(MCII.get(Opcode).TSFlags);
272
}
273

274
/// Check if the instruction is valid as the first instruction in macro fusion.
275
static bool isFirstMacroFusibleInst(const MCInst &Inst,
276
                                    const MCInstrInfo &MCII) {
277
  // An Intel instruction with RIP relative addressing is not macro fusible.
278
  if (isRIPRelative(Inst, MCII))
279
    return false;
280
  X86::FirstMacroFusionInstKind FIK =
281
      X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282
  return FIK != X86::FirstMacroFusionInstKind::Invalid;
283
}
284

285
/// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286
/// get a better peformance in some cases. Here, we determine which prefix is
287
/// the most suitable.
288
///
289
/// If the instruction has a segment override prefix, use the existing one.
290
/// If the target is 64-bit, use the CS.
291
/// If the target is 32-bit,
292
///   - If the instruction has a ESP/EBP base register, use SS.
293
///   - Otherwise use DS.
294
uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295
  assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296
         "Prefixes can be added only in 32-bit or 64-bit mode.");
297
  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298
  uint64_t TSFlags = Desc.TSFlags;
299

300
  // Determine where the memory operand starts, if present.
301
  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302
  if (MemoryOperand != -1)
303
    MemoryOperand += X86II::getOperandBias(Desc);
304

305
  unsigned SegmentReg = 0;
306
  if (MemoryOperand >= 0) {
307
    // Check for explicit segment override on memory operand.
308
    SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309
  }
310

311
  switch (TSFlags & X86II::FormMask) {
312
  default:
313
    break;
314
  case X86II::RawFrmDstSrc: {
315
    // Check segment override opcode prefix as needed (not for %ds).
316
    if (Inst.getOperand(2).getReg() != X86::DS)
317
      SegmentReg = Inst.getOperand(2).getReg();
318
    break;
319
  }
320
  case X86II::RawFrmSrc: {
321
    // Check segment override opcode prefix as needed (not for %ds).
322
    if (Inst.getOperand(1).getReg() != X86::DS)
323
      SegmentReg = Inst.getOperand(1).getReg();
324
    break;
325
  }
326
  case X86II::RawFrmMemOffs: {
327
    // Check segment override opcode prefix as needed.
328
    SegmentReg = Inst.getOperand(1).getReg();
329
    break;
330
  }
331
  }
332

333
  if (SegmentReg != 0)
334
    return X86::getSegmentOverridePrefixForReg(SegmentReg);
335

336
  if (STI.hasFeature(X86::Is64Bit))
337
    return X86::CS_Encoding;
338

339
  if (MemoryOperand >= 0) {
340
    unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341
    unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342
    if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343
      return X86::SS_Encoding;
344
  }
345
  return X86::DS_Encoding;
346
}
347

348
/// Check if the two instructions will be macro-fused on the target cpu.
349
bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350
  const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351
  if (!InstDesc.isConditionalBranch())
352
    return false;
353
  if (!isFirstMacroFusibleInst(Cmp, *MCII))
354
    return false;
355
  const X86::FirstMacroFusionInstKind CmpKind =
356
      X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357
  const X86::SecondMacroFusionInstKind BranchKind =
358
      classifySecondInstInMacroFusion(Jcc, *MCII);
359
  return X86::isMacroFused(CmpKind, BranchKind);
360
}
361

362
/// Check if the instruction has a variant symbol operand.
363
static bool hasVariantSymbol(const MCInst &MI) {
364
  for (auto &Operand : MI) {
365
    if (!Operand.isExpr())
366
      continue;
367
    const MCExpr &Expr = *Operand.getExpr();
368
    if (Expr.getKind() == MCExpr::SymbolRef &&
369
        cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370
      return true;
371
  }
372
  return false;
373
}
374

375
bool X86AsmBackend::allowAutoPadding() const {
376
  return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377
}
378

379
bool X86AsmBackend::allowEnhancedRelaxation() const {
380
  return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381
}
382

383
/// X86 has certain instructions which enable interrupts exactly one
384
/// instruction *after* the instruction which stores to SS.  Return true if the
385
/// given instruction may have such an interrupt delay slot.
386
static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387
  switch (InstOpcode) {
388
  case X86::POPSS16:
389
  case X86::POPSS32:
390
  case X86::STI:
391
    return true;
392

393
  case X86::MOV16sr:
394
  case X86::MOV32sr:
395
  case X86::MOV64sr:
396
  case X86::MOV16sm:
397
    // In fact, this is only the case if the first operand is SS. However, as
398
    // segment moves occur extremely rarely, this is just a minor pessimization.
399
    return true;
400
  }
401
  return false;
402
}
403

404
/// Check if the instruction to be emitted is right after any data.
405
static bool
406
isRightAfterData(MCFragment *CurrentFragment,
407
                 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408
  MCFragment *F = CurrentFragment;
409
  // Since data is always emitted into a DataFragment, our check strategy is
410
  // simple here.
411
  //   - If the fragment is a DataFragment
412
  //     - If it's empty (section start or data after align), return false.
413
  //     - If it's not the fragment where the previous instruction is,
414
  //       returns true.
415
  //     - If it's the fragment holding the previous instruction but its
416
  //       size changed since the previous instruction was emitted into
417
  //       it, returns true.
418
  //     - Otherwise returns false.
419
  //   - If the fragment is not a DataFragment, returns false.
420
  if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
421
    return DF->getContents().size() &&
422
           (DF != PrevInstPosition.first ||
423
            DF->getContents().size() != PrevInstPosition.second);
424

425
  return false;
426
}
427

428
/// \returns the fragment size if it has instructions, otherwise returns 0.
429
static size_t getSizeForInstFragment(const MCFragment *F) {
430
  if (!F || !F->hasInstructions())
431
    return 0;
432
  // MCEncodedFragmentWithContents being templated makes this tricky.
433
  switch (F->getKind()) {
434
  default:
435
    llvm_unreachable("Unknown fragment with instructions!");
436
  case MCFragment::FT_Data:
437
    return cast<MCDataFragment>(*F).getContents().size();
438
  case MCFragment::FT_Relaxable:
439
    return cast<MCRelaxableFragment>(*F).getContents().size();
440
  case MCFragment::FT_CompactEncodedInst:
441
    return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
442
  }
443
}
444

445
/// Return true if we can insert NOP or prefixes automatically before the
446
/// the instruction to be emitted.
447
bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448
  if (hasVariantSymbol(Inst))
449
    // Linker may rewrite the instruction with variant symbol operand(e.g.
450
    // TLSCALL).
451
    return false;
452

453
  if (mayHaveInterruptDelaySlot(PrevInstOpcode))
454
    // If this instruction follows an interrupt enabling instruction with a one
455
    // instruction delay, inserting a nop would change behavior.
456
    return false;
457

458
  if (isPrefix(PrevInstOpcode, *MCII))
459
    // If this instruction follows a prefix, inserting a nop/prefix would change
460
    // semantic.
461
    return false;
462

463
  if (isPrefix(Inst.getOpcode(), *MCII))
464
    // If this instruction is a prefix, inserting a prefix would change
465
    // semantic.
466
    return false;
467

468
  if (IsRightAfterData)
469
    // If this instruction follows any data, there is no clear
470
    // instruction boundary, inserting a nop/prefix would change semantic.
471
    return false;
472

473
  return true;
474
}
475

476
bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477
  if (!OS.getAllowAutoPadding())
478
    return false;
479
  assert(allowAutoPadding() && "incorrect initialization!");
480

481
  // We only pad in text section.
482
  if (!OS.getCurrentSectionOnly()->isText())
483
    return false;
484

485
  // To be Done: Currently don't deal with Bundle cases.
486
  if (OS.getAssembler().isBundlingEnabled())
487
    return false;
488

489
  // Branches only need to be aligned in 32-bit or 64-bit mode.
490
  if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
491
    return false;
492

493
  return true;
494
}
495

496
/// Check if the instruction operand needs to be aligned.
497
bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498
  const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
499
  return (Desc.isConditionalBranch() &&
500
          (AlignBranchType & X86::AlignBranchJcc)) ||
501
         (Desc.isUnconditionalBranch() &&
502
          (AlignBranchType & X86::AlignBranchJmp)) ||
503
         (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
504
         (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
505
         (Desc.isIndirectBranch() &&
506
          (AlignBranchType & X86::AlignBranchIndirect));
507
}
508

509
/// Insert BoundaryAlignFragment before instructions to align branches.
510
void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511
                                         const MCInst &Inst, const MCSubtargetInfo &STI) {
512
  // Used by canPadInst. Done here, because in emitInstructionEnd, the current
513
  // fragment will have changed.
514
  IsRightAfterData =
515
      isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
516

517
  if (!canPadBranches(OS))
518
    return;
519

520
  // NB: PrevInst only valid if canPadBranches is true.
521
  if (!isMacroFused(PrevInst, Inst))
522
    // Macro fusion doesn't happen indeed, clear the pending.
523
    PendingBA = nullptr;
524

525
  // When branch padding is enabled (basically the skx102 erratum => unlikely),
526
  // we call canPadInst (not cheap) twice. However, in the common case, we can
527
  // avoid unnecessary calls to that, as this is otherwise only used for
528
  // relaxable fragments.
529
  if (!canPadInst(Inst, OS))
530
    return;
531

532
  if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533
    // Macro fusion actually happens and there is no other fragment inserted
534
    // after the previous instruction.
535
    //
536
    // Do nothing here since we already inserted a BoudaryAlign fragment when
537
    // we met the first instruction in the fused pair and we'll tie them
538
    // together in emitInstructionEnd.
539
    //
540
    // Note: When there is at least one fragment, such as MCAlignFragment,
541
    // inserted after the previous instruction, e.g.
542
    //
543
    // \code
544
    //   cmp %rax %rcx
545
    //   .align 16
546
    //   je .Label0
547
    // \ endcode
548
    //
549
    // We will treat the JCC as a unfused branch although it may be fused
550
    // with the CMP.
551
    return;
552
  }
553

554
  if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
555
                          isFirstMacroFusibleInst(Inst, *MCII))) {
556
    // If we meet a unfused branch or the first instuction in a fusiable pair,
557
    // insert a BoundaryAlign fragment.
558
    PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559
        AlignBoundary, STI);
560
    OS.insert(PendingBA);
561
  }
562
}
563

564
/// Set the last fragment to be aligned for the BoundaryAlignFragment.
565
void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566
                                       const MCInst &Inst) {
567
  MCFragment *CF = OS.getCurrentFragment();
568
  if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
569
    F->setAllowAutoPadding(canPadInst(Inst, OS));
570

571
  // Update PrevInstOpcode here, canPadInst() reads that.
572
  PrevInstOpcode = Inst.getOpcode();
573
  PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
574

575
  if (!canPadBranches(OS))
576
    return;
577

578
  // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579
  PrevInst = Inst;
580

581
  if (!needAlign(Inst) || !PendingBA)
582
    return;
583

584
  // Tie the aligned instructions into a pending BoundaryAlign.
585
  PendingBA->setLastFragment(CF);
586
  PendingBA = nullptr;
587

588
  // We need to ensure that further data isn't added to the current
589
  // DataFragment, so that we can get the size of instructions later in
590
  // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591
  // DataFragment.
592
  if (isa_and_nonnull<MCDataFragment>(CF))
593
    OS.insert(OS.getContext().allocFragment<MCDataFragment>());
594

595
  // Update the maximum alignment on the current section if necessary.
596
  MCSection *Sec = OS.getCurrentSectionOnly();
597
  Sec->ensureMinAlignment(AlignBoundary);
598
}
599

600
std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601
  if (STI.getTargetTriple().isOSBinFormatELF()) {
602
    unsigned Type;
603
    if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604
      Type = llvm::StringSwitch<unsigned>(Name)
605
#define ELF_RELOC(X, Y) .Case(#X, Y)
606
#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607
#undef ELF_RELOC
608
                 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
609
                 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
610
                 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
611
                 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
612
                 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
613
                 .Default(-1u);
614
    } else {
615
      Type = llvm::StringSwitch<unsigned>(Name)
616
#define ELF_RELOC(X, Y) .Case(#X, Y)
617
#include "llvm/BinaryFormat/ELFRelocs/i386.def"
618
#undef ELF_RELOC
619
                 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
620
                 .Case("BFD_RELOC_8", ELF::R_386_8)
621
                 .Case("BFD_RELOC_16", ELF::R_386_16)
622
                 .Case("BFD_RELOC_32", ELF::R_386_32)
623
                 .Default(-1u);
624
    }
625
    if (Type == -1u)
626
      return std::nullopt;
627
    return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628
  }
629
  return MCAsmBackend::getFixupKind(Name);
630
}
631

632
const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633
  const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634
      {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
635
      {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
636
      {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
637
      {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
638
      {"reloc_signed_4byte", 0, 32, 0},
639
      {"reloc_signed_4byte_relax", 0, 32, 0},
640
      {"reloc_global_offset_table", 0, 32, 0},
641
      {"reloc_global_offset_table8", 0, 64, 0},
642
      {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
643
  };
644

645
  // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646
  // do not require any extra processing.
647
  if (Kind >= FirstLiteralRelocationKind)
648
    return MCAsmBackend::getFixupKindInfo(FK_NONE);
649

650
  if (Kind < FirstTargetFixupKind)
651
    return MCAsmBackend::getFixupKindInfo(Kind);
652

653
  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654
         "Invalid kind!");
655
  assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656
  return Infos[Kind - FirstTargetFixupKind];
657
}
658

659
bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660
                                          const MCFixup &Fixup, const MCValue &,
661
                                          const MCSubtargetInfo *STI) {
662
  return Fixup.getKind() >= FirstLiteralRelocationKind;
663
}
664

665
static unsigned getFixupKindSize(unsigned Kind) {
666
  switch (Kind) {
667
  default:
668
    llvm_unreachable("invalid fixup kind!");
669
  case FK_NONE:
670
    return 0;
671
  case FK_PCRel_1:
672
  case FK_SecRel_1:
673
  case FK_Data_1:
674
    return 1;
675
  case FK_PCRel_2:
676
  case FK_SecRel_2:
677
  case FK_Data_2:
678
    return 2;
679
  case FK_PCRel_4:
680
  case X86::reloc_riprel_4byte:
681
  case X86::reloc_riprel_4byte_relax:
682
  case X86::reloc_riprel_4byte_relax_rex:
683
  case X86::reloc_riprel_4byte_movq_load:
684
  case X86::reloc_signed_4byte:
685
  case X86::reloc_signed_4byte_relax:
686
  case X86::reloc_global_offset_table:
687
  case X86::reloc_branch_4byte_pcrel:
688
  case FK_SecRel_4:
689
  case FK_Data_4:
690
    return 4;
691
  case FK_PCRel_8:
692
  case FK_SecRel_8:
693
  case FK_Data_8:
694
  case X86::reloc_global_offset_table8:
695
    return 8;
696
  }
697
}
698

699
void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700
                               const MCValue &Target,
701
                               MutableArrayRef<char> Data,
702
                               uint64_t Value, bool IsResolved,
703
                               const MCSubtargetInfo *STI) const {
704
  unsigned Kind = Fixup.getKind();
705
  if (Kind >= FirstLiteralRelocationKind)
706
    return;
707
  unsigned Size = getFixupKindSize(Kind);
708

709
  assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710

711
  int64_t SignedValue = static_cast<int64_t>(Value);
712
  if ((Target.isAbsolute() || IsResolved) &&
713
      getFixupKindInfo(Fixup.getKind()).Flags &
714
      MCFixupKindInfo::FKF_IsPCRel) {
715
    // check that PC relative fixup fits into the fixup size.
716
    if (Size > 0 && !isIntN(Size * 8, SignedValue))
717
      Asm.getContext().reportError(
718
                                   Fixup.getLoc(), "value of " + Twine(SignedValue) +
719
                                   " is too large for field of " + Twine(Size) +
720
                                   ((Size == 1) ? " byte." : " bytes."));
721
  } else {
722
    // Check that uppper bits are either all zeros or all ones.
723
    // Specifically ignore overflow/underflow as long as the leakage is
724
    // limited to the lower bits. This is to remain compatible with
725
    // other assemblers.
726
    assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
727
           "Value does not fit in the Fixup field");
728
  }
729

730
  for (unsigned i = 0; i != Size; ++i)
731
    Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
732
}
733

734
bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735
                                      const MCSubtargetInfo &STI) const {
736
  unsigned Opcode = MI.getOpcode();
737
  unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
738
  return isRelaxableBranch(Opcode) ||
739
         (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740
          MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
741
}
742

743
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744
                                         uint64_t Value) const {
745
  // Relax if the value is too big for a (signed) i8.
746
  return !isInt<8>(Value);
747
}
748

749
// FIXME: Can tblgen help at all here to verify there aren't other instructions
750
// we can relax?
751
void X86AsmBackend::relaxInstruction(MCInst &Inst,
752
                                     const MCSubtargetInfo &STI) const {
753
  // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754
  bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
755
  unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
756

757
  if (RelaxedOp == Inst.getOpcode()) {
758
    SmallString<256> Tmp;
759
    raw_svector_ostream OS(Tmp);
760
    Inst.dump_pretty(OS);
761
    OS << "\n";
762
    report_fatal_error("unexpected instruction to relax: " + OS.str());
763
  }
764

765
  Inst.setOpcode(RelaxedOp);
766
}
767

768
bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769
                                            MCCodeEmitter &Emitter,
770
                                            unsigned &RemainingSize) const {
771
  if (!RF.getAllowAutoPadding())
772
    return false;
773
  // If the instruction isn't fully relaxed, shifting it around might require a
774
  // larger value for one of the fixups then can be encoded.  The outer loop
775
  // will also catch this before moving to the next instruction, but we need to
776
  // prevent padding this single instruction as well.
777
  if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
778
    return false;
779

780
  const unsigned OldSize = RF.getContents().size();
781
  if (OldSize == 15)
782
    return false;
783

784
  const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
785
  const unsigned RemainingPrefixSize = [&]() -> unsigned {
786
    SmallString<15> Code;
787
    X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
788
    assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
789

790
    // TODO: It turns out we need a decent amount of plumbing for the target
791
    // specific bits to determine number of prefixes its safe to add.  Various
792
    // targets (older chips mostly, but also Atom family) encounter decoder
793
    // stalls with too many prefixes.  For testing purposes, we set the value
794
    // externally for the moment.
795
    unsigned ExistingPrefixSize = Code.size();
796
    if (TargetPrefixMax <= ExistingPrefixSize)
797
      return 0;
798
    return TargetPrefixMax - ExistingPrefixSize;
799
  }();
800
  const unsigned PrefixBytesToAdd =
801
      std::min(MaxPossiblePad, RemainingPrefixSize);
802
  if (PrefixBytesToAdd == 0)
803
    return false;
804

805
  const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
806

807
  SmallString<256> Code;
808
  Code.append(PrefixBytesToAdd, Prefix);
809
  Code.append(RF.getContents().begin(), RF.getContents().end());
810
  RF.getContents() = Code;
811

812
  // Adjust the fixups for the change in offsets
813
  for (auto &F : RF.getFixups()) {
814
    F.setOffset(F.getOffset() + PrefixBytesToAdd);
815
  }
816

817
  RemainingSize -= PrefixBytesToAdd;
818
  return true;
819
}
820

821
bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822
                                                MCCodeEmitter &Emitter,
823
                                                unsigned &RemainingSize) const {
824
  if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
825
    // TODO: There are lots of other tricks we could apply for increasing
826
    // encoding size without impacting performance.
827
    return false;
828

829
  MCInst Relaxed = RF.getInst();
830
  relaxInstruction(Relaxed, *RF.getSubtargetInfo());
831

832
  SmallVector<MCFixup, 4> Fixups;
833
  SmallString<15> Code;
834
  Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
835
  const unsigned OldSize = RF.getContents().size();
836
  const unsigned NewSize = Code.size();
837
  assert(NewSize >= OldSize && "size decrease during relaxation?");
838
  unsigned Delta = NewSize - OldSize;
839
  if (Delta > RemainingSize)
840
    return false;
841
  RF.setInst(Relaxed);
842
  RF.getContents() = Code;
843
  RF.getFixups() = Fixups;
844
  RemainingSize -= Delta;
845
  return true;
846
}
847

848
bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849
                                           MCCodeEmitter &Emitter,
850
                                           unsigned &RemainingSize) const {
851
  bool Changed = false;
852
  if (RemainingSize != 0)
853
    Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854
  if (RemainingSize != 0)
855
    Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856
  return Changed;
857
}
858

859
void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
860
  // See if we can further relax some instructions to cut down on the number of
861
  // nop bytes required for code alignment.  The actual win is in reducing
862
  // instruction count, not number of bytes.  Modern X86-64 can easily end up
863
  // decode limited.  It is often better to reduce the number of instructions
864
  // (i.e. eliminate nops) even at the cost of increasing the size and
865
  // complexity of others.
866
  if (!X86PadForAlign && !X86PadForBranchAlign)
867
    return;
868

869
  // The processed regions are delimitered by LabeledFragments. -g may have more
870
  // MCSymbols and therefore different relaxation results. X86PadForAlign is
871
  // disabled by default to eliminate the -g vs non -g difference.
872
  DenseSet<MCFragment *> LabeledFragments;
873
  for (const MCSymbol &S : Asm.symbols())
874
    LabeledFragments.insert(S.getFragment(false));
875

876
  for (MCSection &Sec : Asm) {
877
    if (!Sec.isText())
878
      continue;
879

880
    SmallVector<MCRelaxableFragment *, 4> Relaxable;
881
    for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882
      MCFragment &F = *I;
883

884
      if (LabeledFragments.count(&F))
885
        Relaxable.clear();
886

887
      if (F.getKind() == MCFragment::FT_Data ||
888
          F.getKind() == MCFragment::FT_CompactEncodedInst)
889
        // Skip and ignore
890
        continue;
891

892
      if (F.getKind() == MCFragment::FT_Relaxable) {
893
        auto &RF = cast<MCRelaxableFragment>(*I);
894
        Relaxable.push_back(&RF);
895
        continue;
896
      }
897

898
      auto canHandle = [](MCFragment &F) -> bool {
899
        switch (F.getKind()) {
900
        default:
901
          return false;
902
        case MCFragment::FT_Align:
903
          return X86PadForAlign;
904
        case MCFragment::FT_BoundaryAlign:
905
          return X86PadForBranchAlign;
906
        }
907
      };
908
      // For any unhandled kind, assume we can't change layout.
909
      if (!canHandle(F)) {
910
        Relaxable.clear();
911
        continue;
912
      }
913

914
#ifndef NDEBUG
915
      const uint64_t OrigOffset = Asm.getFragmentOffset(F);
916
#endif
917
      const uint64_t OrigSize = Asm.computeFragmentSize(F);
918

919
      // To keep the effects local, prefer to relax instructions closest to
920
      // the align directive.  This is purely about human understandability
921
      // of the resulting code.  If we later find a reason to expand
922
      // particular instructions over others, we can adjust.
923
      unsigned RemainingSize = OrigSize;
924
      while (!Relaxable.empty() && RemainingSize != 0) {
925
        auto &RF = *Relaxable.pop_back_val();
926
        // Give the backend a chance to play any tricks it wishes to increase
927
        // the encoding size of the given instruction.  Target independent code
928
        // will try further relaxation, but target's may play further tricks.
929
        if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
930
          Sec.setHasLayout(false);
931

932
        // If we have an instruction which hasn't been fully relaxed, we can't
933
        // skip past it and insert bytes before it.  Changing its starting
934
        // offset might require a larger negative offset than it can encode.
935
        // We don't need to worry about larger positive offsets as none of the
936
        // possible offsets between this and our align are visible, and the
937
        // ones afterwards aren't changing.
938
        if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
939
          break;
940
      }
941
      Relaxable.clear();
942

943
      // BoundaryAlign explicitly tracks it's size (unlike align)
944
      if (F.getKind() == MCFragment::FT_BoundaryAlign)
945
        cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
946

947
#ifndef NDEBUG
948
      const uint64_t FinalOffset = Asm.getFragmentOffset(F);
949
      const uint64_t FinalSize = Asm.computeFragmentSize(F);
950
      assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
951
             "can't move start of next fragment!");
952
      assert(FinalSize == RemainingSize && "inconsistent size computation?");
953
#endif
954

955
      // If we're looking at a boundary align, make sure we don't try to pad
956
      // its target instructions for some following directive.  Doing so would
957
      // break the alignment of the current boundary align.
958
      if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
959
        const MCFragment *LastFragment = BF->getLastFragment();
960
        if (!LastFragment)
961
          continue;
962
        while (&*I != LastFragment)
963
          ++I;
964
      }
965
    }
966
  }
967

968
  // The layout is done. Mark every fragment as valid.
969
  for (MCSection &Section : Asm) {
970
    Asm.getFragmentOffset(*Section.curFragList()->Tail);
971
    Asm.computeFragmentSize(*Section.curFragList()->Tail);
972
  }
973
}
974

975
unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976
  if (STI.hasFeature(X86::Is16Bit))
977
    return 4;
978
  if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979
    return 1;
980
  if (STI.hasFeature(X86::TuningFast7ByteNOP))
981
    return 7;
982
  if (STI.hasFeature(X86::TuningFast15ByteNOP))
983
    return 15;
984
  if (STI.hasFeature(X86::TuningFast11ByteNOP))
985
    return 11;
986
  // FIXME: handle 32-bit mode
987
  // 15-bytes is the longest single NOP instruction, but 10-bytes is
988
  // commonly the longest that can be efficiently decoded.
989
  return 10;
990
}
991

992
/// Write a sequence of optimal nops to the output, covering \p Count
993
/// bytes.
994
/// \return - true on success, false on failure
995
bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996
                                 const MCSubtargetInfo *STI) const {
997
  static const char Nops32Bit[10][11] = {
998
      // nop
999
      "\x90",
1000
      // xchg %ax,%ax
1001
      "\x66\x90",
1002
      // nopl (%[re]ax)
1003
      "\x0f\x1f\x00",
1004
      // nopl 0(%[re]ax)
1005
      "\x0f\x1f\x40\x00",
1006
      // nopl 0(%[re]ax,%[re]ax,1)
1007
      "\x0f\x1f\x44\x00\x00",
1008
      // nopw 0(%[re]ax,%[re]ax,1)
1009
      "\x66\x0f\x1f\x44\x00\x00",
1010
      // nopl 0L(%[re]ax)
1011
      "\x0f\x1f\x80\x00\x00\x00\x00",
1012
      // nopl 0L(%[re]ax,%[re]ax,1)
1013
      "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014
      // nopw 0L(%[re]ax,%[re]ax,1)
1015
      "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016
      // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017
      "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018
  };
1019

1020
  // 16-bit mode uses different nop patterns than 32-bit.
1021
  static const char Nops16Bit[4][11] = {
1022
      // nop
1023
      "\x90",
1024
      // xchg %eax,%eax
1025
      "\x66\x90",
1026
      // lea 0(%si),%si
1027
      "\x8d\x74\x00",
1028
      // lea 0w(%si),%si
1029
      "\x8d\xb4\x00\x00",
1030
  };
1031

1032
  const char(*Nops)[11] =
1033
      STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034

1035
  uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036

1037
  // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038
  // length.
1039
  do {
1040
    const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041
    const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042
    for (uint8_t i = 0; i < Prefixes; i++)
1043
      OS << '\x66';
1044
    const uint8_t Rest = ThisNopLength - Prefixes;
1045
    if (Rest != 0)
1046
      OS.write(Nops[Rest - 1], Rest);
1047
    Count -= ThisNopLength;
1048
  } while (Count != 0);
1049

1050
  return true;
1051
}
1052

1053
/* *** */
1054

1055
namespace {
1056

1057
class ELFX86AsmBackend : public X86AsmBackend {
1058
public:
1059
  uint8_t OSABI;
1060
  ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061
      : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062
};
1063

1064
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065
public:
1066
  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067
                      const MCSubtargetInfo &STI)
1068
    : ELFX86AsmBackend(T, OSABI, STI) {}
1069

1070
  std::unique_ptr<MCObjectTargetWriter>
1071
  createObjectTargetWriter() const override {
1072
    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073
  }
1074
};
1075

1076
class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077
public:
1078
  ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079
                       const MCSubtargetInfo &STI)
1080
      : ELFX86AsmBackend(T, OSABI, STI) {}
1081

1082
  std::unique_ptr<MCObjectTargetWriter>
1083
  createObjectTargetWriter() const override {
1084
    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085
                                    ELF::EM_X86_64);
1086
  }
1087
};
1088

1089
class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090
public:
1091
  ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092
                         const MCSubtargetInfo &STI)
1093
      : ELFX86AsmBackend(T, OSABI, STI) {}
1094

1095
  std::unique_ptr<MCObjectTargetWriter>
1096
  createObjectTargetWriter() const override {
1097
    return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098
                                    ELF::EM_IAMCU);
1099
  }
1100
};
1101

1102
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103
public:
1104
  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105
                      const MCSubtargetInfo &STI)
1106
    : ELFX86AsmBackend(T, OSABI, STI) {}
1107

1108
  std::unique_ptr<MCObjectTargetWriter>
1109
  createObjectTargetWriter() const override {
1110
    return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111
  }
1112
};
1113

1114
class WindowsX86AsmBackend : public X86AsmBackend {
1115
  bool Is64Bit;
1116

1117
public:
1118
  WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119
                       const MCSubtargetInfo &STI)
1120
    : X86AsmBackend(T, STI)
1121
    , Is64Bit(is64Bit) {
1122
  }
1123

1124
  std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125
    return StringSwitch<std::optional<MCFixupKind>>(Name)
1126
        .Case("dir32", FK_Data_4)
1127
        .Case("secrel32", FK_SecRel_4)
1128
        .Case("secidx", FK_SecRel_2)
1129
        .Default(MCAsmBackend::getFixupKind(Name));
1130
  }
1131

1132
  std::unique_ptr<MCObjectTargetWriter>
1133
  createObjectTargetWriter() const override {
1134
    return createX86WinCOFFObjectWriter(Is64Bit);
1135
  }
1136
};
1137

1138
namespace CU {
1139

1140
  /// Compact unwind encoding values.
1141
  enum CompactUnwindEncodings {
1142
    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143
    /// the return address, then [RE]SP is moved to [RE]BP.
1144
    UNWIND_MODE_BP_FRAME                   = 0x01000000,
1145

1146
    /// A frameless function with a small constant stack size.
1147
    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1148

1149
    /// A frameless function with a large constant stack size.
1150
    UNWIND_MODE_STACK_IND                  = 0x03000000,
1151

1152
    /// No compact unwind encoding is available.
1153
    UNWIND_MODE_DWARF                      = 0x04000000,
1154

1155
    /// Mask for encoding the frame registers.
1156
    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1157

1158
    /// Mask for encoding the frameless registers.
1159
    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160
  };
1161

1162
} // namespace CU
1163

1164
class DarwinX86AsmBackend : public X86AsmBackend {
1165
  const MCRegisterInfo &MRI;
1166

1167
  /// Number of registers that can be saved in a compact unwind encoding.
1168
  enum { CU_NUM_SAVED_REGS = 6 };
1169

1170
  mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171
  Triple TT;
1172
  bool Is64Bit;
1173

1174
  unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1175
  unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1176
  unsigned StackDivide;                  ///< Amount to adjust stack size by.
1177
protected:
1178
  /// Size of a "push" instruction for the given register.
1179
  unsigned PushInstrSize(unsigned Reg) const {
1180
    switch (Reg) {
1181
      case X86::EBX:
1182
      case X86::ECX:
1183
      case X86::EDX:
1184
      case X86::EDI:
1185
      case X86::ESI:
1186
      case X86::EBP:
1187
      case X86::RBX:
1188
      case X86::RBP:
1189
        return 1;
1190
      case X86::R12:
1191
      case X86::R13:
1192
      case X86::R14:
1193
      case X86::R15:
1194
        return 2;
1195
    }
1196
    return 1;
1197
  }
1198

1199
private:
1200
  /// Get the compact unwind number for a given register. The number
1201
  /// corresponds to the enum lists in compact_unwind_encoding.h.
1202
  int getCompactUnwindRegNum(unsigned Reg) const {
1203
    static const MCPhysReg CU32BitRegs[7] = {
1204
      X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205
    };
1206
    static const MCPhysReg CU64BitRegs[] = {
1207
      X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208
    };
1209
    const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210
    for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211
      if (*CURegs == Reg)
1212
        return Idx;
1213

1214
    return -1;
1215
  }
1216

1217
  /// Return the registers encoded for a compact encoding with a frame
1218
  /// pointer.
1219
  uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220
    // Encode the registers in the order they were saved --- 3-bits per
1221
    // register. The list of saved registers is assumed to be in reverse
1222
    // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223
    uint32_t RegEnc = 0;
1224
    for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225
      unsigned Reg = SavedRegs[i];
1226
      if (Reg == 0) break;
1227

1228
      int CURegNum = getCompactUnwindRegNum(Reg);
1229
      if (CURegNum == -1) return ~0U;
1230

1231
      // Encode the 3-bit register number in order, skipping over 3-bits for
1232
      // each register.
1233
      RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234
    }
1235

1236
    assert((RegEnc & 0x3FFFF) == RegEnc &&
1237
           "Invalid compact register encoding!");
1238
    return RegEnc;
1239
  }
1240

1241
  /// Create the permutation encoding used with frameless stacks. It is
1242
  /// passed the number of registers to be saved and an array of the registers
1243
  /// saved.
1244
  uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245
    // The saved registers are numbered from 1 to 6. In order to encode the
1246
    // order in which they were saved, we re-number them according to their
1247
    // place in the register order. The re-numbering is relative to the last
1248
    // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249
    // that order:
1250
    //
1251
    //    Orig  Re-Num
1252
    //    ----  ------
1253
    //     6       6
1254
    //     2       2
1255
    //     4       3
1256
    //     5       3
1257
    //
1258
    for (unsigned i = 0; i < RegCount; ++i) {
1259
      int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260
      if (CUReg == -1) return ~0U;
1261
      SavedRegs[i] = CUReg;
1262
    }
1263

1264
    // Reverse the list.
1265
    std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266

1267
    uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268
    for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269
      unsigned Countless = 0;
1270
      for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271
        if (SavedRegs[j] < SavedRegs[i])
1272
          ++Countless;
1273

1274
      RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275
    }
1276

1277
    // Take the renumbered values and encode them into a 10-bit number.
1278
    uint32_t permutationEncoding = 0;
1279
    switch (RegCount) {
1280
    case 6:
1281
      permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282
                             + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1283
                             +     RenumRegs[4];
1284
      break;
1285
    case 5:
1286
      permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287
                             + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1288
                             +     RenumRegs[5];
1289
      break;
1290
    case 4:
1291
      permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1292
                             + 3 * RenumRegs[4] +      RenumRegs[5];
1293
      break;
1294
    case 3:
1295
      permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1296
                             +     RenumRegs[5];
1297
      break;
1298
    case 2:
1299
      permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1300
      break;
1301
    case 1:
1302
      permutationEncoding |=       RenumRegs[5];
1303
      break;
1304
    }
1305

1306
    assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307
           "Invalid compact register encoding!");
1308
    return permutationEncoding;
1309
  }
1310

1311
public:
1312
  DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313
                      const MCSubtargetInfo &STI)
1314
      : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315
        Is64Bit(TT.isArch64Bit()) {
1316
    memset(SavedRegs, 0, sizeof(SavedRegs));
1317
    OffsetSize = Is64Bit ? 8 : 4;
1318
    MoveInstrSize = Is64Bit ? 3 : 2;
1319
    StackDivide = Is64Bit ? 8 : 4;
1320
  }
1321

1322
  std::unique_ptr<MCObjectTargetWriter>
1323
  createObjectTargetWriter() const override {
1324
    uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325
    uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326
    return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327
  }
1328

1329
  /// Implementation of algorithm to generate the compact unwind encoding
1330
  /// for the CFI instructions.
1331
  uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332
                                         const MCContext *Ctxt) const override {
1333
    ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334
    if (Instrs.empty()) return 0;
1335
    if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336
        !Ctxt->emitCompactUnwindNonCanonical())
1337
      return CU::UNWIND_MODE_DWARF;
1338

1339
    // Reset the saved registers.
1340
    unsigned SavedRegIdx = 0;
1341
    memset(SavedRegs, 0, sizeof(SavedRegs));
1342

1343
    bool HasFP = false;
1344

1345
    // Encode that we are using EBP/RBP as the frame pointer.
1346
    uint64_t CompactUnwindEncoding = 0;
1347

1348
    unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349
    unsigned InstrOffset = 0;
1350
    unsigned StackAdjust = 0;
1351
    uint64_t StackSize = 0;
1352
    int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1353

1354
    for (const MCCFIInstruction &Inst : Instrs) {
1355
      switch (Inst.getOperation()) {
1356
      default:
1357
        // Any other CFI directives indicate a frame that we aren't prepared
1358
        // to represent via compact unwind, so just bail out.
1359
        return CU::UNWIND_MODE_DWARF;
1360
      case MCCFIInstruction::OpDefCfaRegister: {
1361
        // Defines a frame pointer. E.g.
1362
        //
1363
        //     movq %rsp, %rbp
1364
        //  L0:
1365
        //     .cfi_def_cfa_register %rbp
1366
        //
1367
        HasFP = true;
1368

1369
        // If the frame pointer is other than esp/rsp, we do not have a way to
1370
        // generate a compact unwinding representation, so bail out.
1371
        if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372
            (Is64Bit ? X86::RBP : X86::EBP))
1373
          return CU::UNWIND_MODE_DWARF;
1374

1375
        // Reset the counts.
1376
        memset(SavedRegs, 0, sizeof(SavedRegs));
1377
        StackAdjust = 0;
1378
        SavedRegIdx = 0;
1379
        MinAbsOffset = std::numeric_limits<int64_t>::max();
1380
        InstrOffset += MoveInstrSize;
1381
        break;
1382
      }
1383
      case MCCFIInstruction::OpDefCfaOffset: {
1384
        // Defines a new offset for the CFA. E.g.
1385
        //
1386
        //  With frame:
1387
        //
1388
        //     pushq %rbp
1389
        //  L0:
1390
        //     .cfi_def_cfa_offset 16
1391
        //
1392
        //  Without frame:
1393
        //
1394
        //     subq $72, %rsp
1395
        //  L0:
1396
        //     .cfi_def_cfa_offset 80
1397
        //
1398
        StackSize = Inst.getOffset() / StackDivide;
1399
        break;
1400
      }
1401
      case MCCFIInstruction::OpOffset: {
1402
        // Defines a "push" of a callee-saved register. E.g.
1403
        //
1404
        //     pushq %r15
1405
        //     pushq %r14
1406
        //     pushq %rbx
1407
        //  L0:
1408
        //     subq $120, %rsp
1409
        //  L1:
1410
        //     .cfi_offset %rbx, -40
1411
        //     .cfi_offset %r14, -32
1412
        //     .cfi_offset %r15, -24
1413
        //
1414
        if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415
          // If there are too many saved registers, we cannot use a compact
1416
          // unwind encoding.
1417
          return CU::UNWIND_MODE_DWARF;
1418

1419
        unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420
        SavedRegs[SavedRegIdx++] = Reg;
1421
        StackAdjust += OffsetSize;
1422
        MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1423
        InstrOffset += PushInstrSize(Reg);
1424
        break;
1425
      }
1426
      }
1427
    }
1428

1429
    StackAdjust /= StackDivide;
1430

1431
    if (HasFP) {
1432
      if ((StackAdjust & 0xFF) != StackAdjust)
1433
        // Offset was too big for a compact unwind encoding.
1434
        return CU::UNWIND_MODE_DWARF;
1435

1436
      // We don't attempt to track a real StackAdjust, so if the saved registers
1437
      // aren't adjacent to rbp we can't cope.
1438
      if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439
        return CU::UNWIND_MODE_DWARF;
1440

1441
      // Get the encoding of the saved registers when we have a frame pointer.
1442
      uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443
      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444

1445
      CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446
      CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447
      CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448
    } else {
1449
      SubtractInstrIdx += InstrOffset;
1450
      ++StackAdjust;
1451

1452
      if ((StackSize & 0xFF) == StackSize) {
1453
        // Frameless stack with a small stack size.
1454
        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455

1456
        // Encode the stack size.
1457
        CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458
      } else {
1459
        if ((StackAdjust & 0x7) != StackAdjust)
1460
          // The extra stack adjustments are too big for us to handle.
1461
          return CU::UNWIND_MODE_DWARF;
1462

1463
        // Frameless stack with an offset too large for us to encode compactly.
1464
        CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465

1466
        // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467
        // instruction.
1468
        CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469

1470
        // Encode any extra stack adjustments (done via push instructions).
1471
        CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472
      }
1473

1474
      // Encode the number of registers saved. (Reverse the list first.)
1475
      std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476
      CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477

1478
      // Get the encoding of the saved registers when we don't have a frame
1479
      // pointer.
1480
      uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481
      if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482

1483
      // Encode the register encoding.
1484
      CompactUnwindEncoding |=
1485
        RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486
    }
1487

1488
    return CompactUnwindEncoding;
1489
  }
1490
};
1491

1492
} // end anonymous namespace
1493

1494
MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495
                                           const MCSubtargetInfo &STI,
1496
                                           const MCRegisterInfo &MRI,
1497
                                           const MCTargetOptions &Options) {
1498
  const Triple &TheTriple = STI.getTargetTriple();
1499
  if (TheTriple.isOSBinFormatMachO())
1500
    return new DarwinX86AsmBackend(T, MRI, STI);
1501

1502
  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503
    return new WindowsX86AsmBackend(T, false, STI);
1504

1505
  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506

1507
  if (TheTriple.isOSIAMCU())
1508
    return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509

1510
  return new ELFX86_32AsmBackend(T, OSABI, STI);
1511
}
1512

1513
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514
                                           const MCSubtargetInfo &STI,
1515
                                           const MCRegisterInfo &MRI,
1516
                                           const MCTargetOptions &Options) {
1517
  const Triple &TheTriple = STI.getTargetTriple();
1518
  if (TheTriple.isOSBinFormatMachO())
1519
    return new DarwinX86AsmBackend(T, MRI, STI);
1520

1521
  if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522
    return new WindowsX86AsmBackend(T, true, STI);
1523

1524
  if (TheTriple.isUEFI()) {
1525
    assert(TheTriple.isOSBinFormatCOFF() &&
1526
         "Only COFF format is supported in UEFI environment.");
1527
    return new WindowsX86AsmBackend(T, true, STI);
1528
  }
1529

1530
  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531

1532
  if (TheTriple.isX32())
1533
    return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534
  return new ELFX86_64AsmBackend(T, OSABI, STI);
1535
}
1536

1537
namespace {
1538
class X86ELFStreamer : public MCELFStreamer {
1539
public:
1540
  X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1541
                 std::unique_ptr<MCObjectWriter> OW,
1542
                 std::unique_ptr<MCCodeEmitter> Emitter)
1543
      : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1544
                      std::move(Emitter)) {}
1545

1546
  void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1547
};
1548
} // end anonymous namespace
1549

1550
void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1551
                             const MCSubtargetInfo &STI) {
1552
  auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1553
  Backend.emitInstructionBegin(S, Inst, STI);
1554
  S.MCObjectStreamer::emitInstruction(Inst, STI);
1555
  Backend.emitInstructionEnd(S, Inst);
1556
}
1557

1558
void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1559
                                     const MCSubtargetInfo &STI) {
1560
  X86_MC::emitInstruction(*this, Inst, STI);
1561
}
1562

1563
MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1564
                                       std::unique_ptr<MCAsmBackend> &&MAB,
1565
                                       std::unique_ptr<MCObjectWriter> &&MOW,
1566
                                       std::unique_ptr<MCCodeEmitter> &&MCE) {
1567
  return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1568
                            std::move(MCE));
1569
}
1570

1571
Product

Resources

Company