Path: blob/main/contrib/llvm-project/lld/ELF/ARMErrataFix.cpp
34878 views
//===- ARMErrataFix.cpp ---------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7// This file implements Section Patching for the purpose of working around the8// Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions9// can result in an incorrect instruction fetch or processor deadlock." The10// erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the11// Cortex-A8. A high level description of the patching technique is given in12// the opening comment of AArch64ErrataFix.cpp.13//===----------------------------------------------------------------------===//1415#include "ARMErrataFix.h"16#include "InputFiles.h"17#include "LinkerScript.h"18#include "OutputSections.h"19#include "Relocations.h"20#include "Symbols.h"21#include "SyntheticSections.h"22#include "Target.h"23#include "lld/Common/CommonLinkerContext.h"24#include "lld/Common/Strings.h"25#include "llvm/Support/Endian.h"26#include <algorithm>2728using namespace llvm;29using namespace llvm::ELF;30using namespace llvm::object;31using namespace llvm::support;32using namespace llvm::support::endian;33using namespace lld;34using namespace lld::elf;3536// The documented title for Erratum 657417 is:37// "A 32bit branch instruction that spans two 4K regions can result in an38// incorrect instruction fetch or processor deadlock". Graphically using a39// 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff40// xxxxxx000 // Memory region 1 start41// target:42// ...43// xxxxxxffe f7fe // First halfword of branch to target:44// xxxxxx000 // Memory region 2 start45// xxxxxx002 bfff // Second halfword of branch to target:46//47// The specific trigger conditions that can be detected at link time are:48// - There is a 32-bit Thumb-2 branch instruction with an address of the form49// xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the50// second 2 bytes are in region 2.51// - The branch instruction is one of BLX, BL, B.w BCC.w52// - The instruction preceding the branch is a 32-bit non-branch instruction.53// - The target of the branch is in region 1.54//55// The linker mitigation for the fix is to redirect any branch that meets the56// erratum conditions to a patch section containing a branch to the target.57//58// As adding patch sections may move branches onto region boundaries the patch59// must iterate until no more patches are added.60//61// Example, before:62// 00000FFA func: NOP.w // 32-bit Thumb function63// 00000FFE B.W func // 32-bit branch spanning 2 regions, dest in 1st.64// Example, after:65// 00000FFA func: NOP.w // 32-bit Thumb function66// 00000FFE B.w __CortexA8657417_00000FFE67// 00001002 2 - bytes padding68// 00001004 __CortexA8657417_00000FFE: B.w func6970class elf::Patch657417Section final : public SyntheticSection {71public:72Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM);7374void writeTo(uint8_t *buf) override;7576size_t getSize() const override { return 4; }7778// Get the virtual address of the branch instruction at patcheeOffset.79uint64_t getBranchAddr() const;8081static bool classof(const SectionBase *d) {82return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch";83}8485// The Section we are patching.86const InputSection *patchee;87// The offset of the instruction in the Patchee section we are patching.88uint64_t patcheeOffset;89// A label for the start of the Patch that we can use as a relocation target.90Symbol *patchSym;91// A decoding of the branch instruction at patcheeOffset.92uint32_t instr;93// True If the patch is to be written in ARM state, otherwise the patch will94// be written in Thumb state.95bool isARM;96};9798// Return true if the half-word, when taken as the first of a pair of halfwords99// is the first half of a 32-bit instruction.100// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition101// section A6.3: 32-bit Thumb instruction encoding102// | HW1 | HW2 |103// | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) |104// With op1 == 0b00, a 16-bit instruction is encoded.105//106// We test only the first halfword, looking for op != 0b00.107static bool is32bitInstruction(uint16_t hw) {108return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000;109}110111// Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition112// section A6.3.4 Branches and miscellaneous control.113// | HW1 | HW2 |114// | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |115// op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)116// op1 == 0x1 | Branch (B.W)117// op1 == 1x0 | Branch with Link and Exchange (BLX.w)118// op1 == 1x1 | Branch with Link (BL.W)119120static bool isBcc(uint32_t instr) {121return (instr & 0xf800d000) == 0xf0008000 &&122(instr & 0x03800000) != 0x03800000;123}124125static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; }126127static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; }128129static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; }130131static bool is32bitBranch(uint32_t instr) {132return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr);133}134135Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,136uint32_t instr, bool isARM)137: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,138".text.patch"),139patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {140parent = p->getParent();141patchSym = addSyntheticLocal(142saver().save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,143isARM ? 0 : 1, getSize(), *this);144addSyntheticLocal(saver().save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);145}146147uint64_t Patch657417Section::getBranchAddr() const {148return patchee->getVA(patcheeOffset);149}150151// Given a branch instruction instr at sourceAddr work out its destination152// address. This is only used when the branch instruction has no relocation.153static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) {154uint8_t buf[4];155write16le(buf, instr >> 16);156write16le(buf + 2, instr & 0x0000ffff);157int64_t offset;158if (isBcc(instr))159offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19);160else if (isB(instr))161offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24);162else163offset = target->getImplicitAddend(buf, R_ARM_THM_CALL);164// A BLX instruction from Thumb to Arm may have an address that is165// not 4-byte aligned. As Arm instructions are always 4-byte aligned166// the instruction is calculated (from Arm ARM):167// targetAddress = Align(PC, 4) + imm32168// where169// Align(x, y) = y * (x Div y)170// which corresponds to alignDown.171if (isBLX(instr))172sourceAddr = alignDown(sourceAddr, 4);173return sourceAddr + offset + 4;174}175176void Patch657417Section::writeTo(uint8_t *buf) {177// The base instruction of the patch is always a 32-bit unconditional branch.178if (isARM)179write32le(buf, 0xea000000);180else181write32le(buf, 0x9000f000);182// If we have a relocation then apply it.183if (!relocs().empty()) {184target->relocateAlloc(*this, buf);185return;186}187188// If we don't have a relocation then we must calculate and write the offset189// ourselves.190// Get the destination offset from the addend in the branch instruction.191// We cannot use the instruction in the patchee section as this will have192// been altered to point to us!193uint64_t s = getThumbDestAddr(getBranchAddr(), instr);194// A BLX changes the state of the branch in the patch to Arm state, which195// has a PC Bias of 8, whereas in all other cases the branch is in Thumb196// state with a PC Bias of 4.197uint64_t pcBias = isBLX(instr) ? 8 : 4;198uint64_t p = getVA(pcBias);199target->relocateNoSym(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p);200}201202// Given a branch instruction spanning two 4KiB regions, at offset off from the203// start of isec, return true if the destination of the branch is within the204// first of the two 4Kib regions.205static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off,206uint32_t instr, const Relocation *r) {207uint64_t sourceAddr = isec->getVA(0) + off;208assert((sourceAddr & 0xfff) == 0xffe);209uint64_t destAddr;210// If there is a branch relocation at the same offset we must use this to211// find the destination address as the branch could be indirected via a thunk212// or the PLT.213if (r) {214uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA();215// Account for Thumb PC bias, usually cancelled to 0 by addend of -4.216destAddr = dst + r->addend + 4;217} else {218// If there is no relocation, we must have an intra-section branch219// We must extract the offset from the addend manually.220destAddr = getThumbDestAddr(sourceAddr, instr);221}222223return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000);224}225226// Return true if a branch can reach a patch section placed after isec.227// The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.228static bool patchInRange(const InputSection *isec, uint64_t off,229uint32_t instr) {230231// We need the branch at source to reach a patch section placed immediately232// after isec. As there can be more than one patch in the patch section we233// add 0x100 as contingency to account for worst case of 1 branch every 4KiB234// for a 1 MiB range.235return target->inBranchRange(236isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off),237isec->getVA() + isec->getSize() + 0x100);238}239240struct ScanResult {241// Offset of branch within its InputSection.242uint64_t off;243// Cached decoding of the branch instruction.244uint32_t instr;245// Branch relocation at off. Will be nullptr if no relocation exists.246Relocation *rel;247};248249// Detect the erratum sequence, returning the offset of the branch instruction250// and a decoding of the branch. If the erratum sequence is not found then251// return an offset of 0 for the branch. 0 is a safe value to use for no patch252// as there must be at least one 32-bit non-branch instruction before the253// branch so the minimum offset for a patch is 4.254static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,255uint64_t limit) {256uint64_t isecAddr = isec->getVA(0);257// Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We258// need to check for a 32-bit instruction immediately before a 32-bit branch259// at 0xffe modulo 0x1000.260off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr;261if (off >= limit || limit - off < 8) {262// Need at least 2 4-byte sized instructions to trigger erratum.263off = limit;264return {0, 0, nullptr};265}266267ScanResult scanRes = {0, 0, nullptr};268const uint8_t *buf = isec->content().begin();269// ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive270// little-endian halfwords.271const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);272uint16_t hw11 = *instBuf++;273uint16_t hw12 = *instBuf++;274uint16_t hw21 = *instBuf++;275uint16_t hw22 = *instBuf++;276if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) {277uint32_t instr1 = (hw11 << 16) | hw12;278uint32_t instr2 = (hw21 << 16) | hw22;279if (!is32bitBranch(instr1) && is32bitBranch(instr2)) {280// Find a relocation for the branch if it exists. This will be used281// to determine the target.282uint64_t branchOff = off + 4;283auto relIt = llvm::find_if(isec->relocs(), [=](const Relocation &r) {284return r.offset == branchOff &&285(r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 ||286r.type == R_ARM_THM_CALL);287});288if (relIt != isec->relocs().end())289scanRes.rel = &(*relIt);290if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) {291if (patchInRange(isec, branchOff, instr2)) {292scanRes.off = branchOff;293scanRes.instr = instr2;294} else {295warn(toString(isec->file) +296": skipping cortex-a8 657417 erratum sequence, section " +297isec->name + " is too large to patch");298}299}300}301}302off += 0x1000;303return scanRes;304}305306void ARMErr657417Patcher::init() {307// The Arm ABI permits a mix of ARM, Thumb and Data in the same308// InputSection. We must only scan Thumb instructions to avoid false309// matches. We use the mapping symbols in the InputObjects to identify this310// data, caching the results in sectionMap so we don't have to recalculate311// it each pass.312313// The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe314// half open intervals [Symbol Value, Next Symbol Value) of code and data315// within sections. If there is no next symbol then the half open interval is316// [Symbol Value, End of section). The type, code or data, is determined by317// the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.318auto isArmMapSymbol = [](const Symbol *s) {319return s->getName() == "$a" || s->getName().starts_with("$a.");320};321auto isThumbMapSymbol = [](const Symbol *s) {322return s->getName() == "$t" || s->getName().starts_with("$t.");323};324auto isDataMapSymbol = [](const Symbol *s) {325return s->getName() == "$d" || s->getName().starts_with("$d.");326};327328// Collect mapping symbols for every executable InputSection.329for (ELFFileBase *file : ctx.objectFiles) {330for (Symbol *s : file->getLocalSymbols()) {331auto *def = dyn_cast<Defined>(s);332if (!def)333continue;334if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) &&335!isDataMapSymbol(def))336continue;337if (auto *sec = dyn_cast_or_null<InputSection>(def->section))338if (sec->flags & SHF_EXECINSTR)339sectionMap[sec].push_back(def);340}341}342// For each InputSection make sure the mapping symbols are in sorted in343// ascending order and are in alternating Thumb, non-Thumb order.344for (auto &kv : sectionMap) {345std::vector<const Defined *> &mapSyms = kv.second;346llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {347return a->value < b->value;348});349mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(),350[=](const Defined *a, const Defined *b) {351return (isThumbMapSymbol(a) ==352isThumbMapSymbol(b));353}),354mapSyms.end());355// Always start with a Thumb Mapping Symbol356if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front()))357mapSyms.erase(mapSyms.begin());358}359initialized = true;360}361362void ARMErr657417Patcher::insertPatches(363InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) {364uint64_t spacing = 0x100000 - 0x7500;365uint64_t isecLimit;366uint64_t prevIsecLimit = isd.sections.front()->outSecOff;367uint64_t patchUpperBound = prevIsecLimit + spacing;368uint64_t outSecAddr = isd.sections.front()->getParent()->addr;369370// Set the outSecOff of patches to the place where we want to insert them.371// We use a similar strategy to initial thunk placement, using 1 MiB as the372// range of the Thumb-2 conditional branch with a contingency accounting for373// thunk generation.374auto patchIt = patches.begin();375auto patchEnd = patches.end();376for (const InputSection *isec : isd.sections) {377isecLimit = isec->outSecOff + isec->getSize();378if (isecLimit > patchUpperBound) {379for (; patchIt != patchEnd; ++patchIt) {380if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit)381break;382(*patchIt)->outSecOff = prevIsecLimit;383}384patchUpperBound = prevIsecLimit + spacing;385}386prevIsecLimit = isecLimit;387}388for (; patchIt != patchEnd; ++patchIt)389(*patchIt)->outSecOff = isecLimit;390391// Merge all patch sections. We use the outSecOff assigned above to392// determine the insertion point. This is ok as we only merge into an393// InputSectionDescription once per pass, and at the end of the pass394// assignAddresses() will recalculate all the outSecOff values.395SmallVector<InputSection *, 0> tmp;396tmp.reserve(isd.sections.size() + patches.size());397auto mergeCmp = [](const InputSection *a, const InputSection *b) {398if (a->outSecOff != b->outSecOff)399return a->outSecOff < b->outSecOff;400return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b);401};402std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(),403patches.end(), std::back_inserter(tmp), mergeCmp);404isd.sections = std::move(tmp);405}406407// Given a branch instruction described by ScanRes redirect it to a patch408// section containing an unconditional branch instruction to the target.409// Ensure that this patch section is 4-byte aligned so that the branch cannot410// span two 4 KiB regions. Place the patch section so that it is always after411// isec so the branch we are patching always goes forwards.412static void implementPatch(ScanResult sr, InputSection *isec,413std::vector<Patch657417Section *> &patches) {414415log("detected cortex-a8-657419 erratum sequence starting at " +416utohexstr(isec->getVA(sr.off)) + " in unpatched output.");417Patch657417Section *psec;418// We have two cases to deal with.419// Case 1. There is a relocation at patcheeOffset to a symbol. The420// unconditional branch in the patch must have a relocation so that any421// further redirection via the PLT or a Thunk happens as normal. At422// patcheeOffset we redirect the existing relocation to a Symbol defined at423// the start of the patch section.424//425// Case 2. There is no relocation at patcheeOffset. We are unlikely to have426// a symbol that we can use as a target for a relocation in the patch section.427// Luckily we know that the destination cannot be indirected via the PLT or428// a Thunk so we can just write the destination directly.429if (sr.rel) {430// Case 1. We have an existing relocation to redirect to patch and a431// Symbol target.432433// Create a branch relocation for the unconditional branch in the patch.434// This can be redirected via the PLT or Thunks.435RelType patchRelType = R_ARM_THM_JUMP24;436int64_t patchRelAddend = sr.rel->addend;437bool destIsARM = false;438if (isBL(sr.instr) || isBLX(sr.instr)) {439// The final target of the branch may be ARM or Thumb, if the target440// is ARM then we write the patch in ARM state to avoid a state change441// Thunk from the patch to the target.442uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA()443: sr.rel->sym->getVA();444destIsARM = (dstSymAddr & 1) == 0;445}446psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM);447if (destIsARM) {448// The patch will be in ARM state. Use an ARM relocation and account for449// the larger ARM PC-bias of 8 rather than Thumb's 4.450patchRelType = R_ARM_JUMP24;451patchRelAddend -= 4;452}453psec->addReloc(454Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym});455// Redirect the existing branch relocation to the patch.456sr.rel->expr = R_PC;457sr.rel->addend = -4;458sr.rel->sym = psec->patchSym;459} else {460// Case 2. We do not have a relocation to the patch. Add a relocation of the461// appropriate type to the patch at patcheeOffset.462463// The destination is ARM if we have a BLX.464psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr));465RelType type;466if (isBcc(sr.instr))467type = R_ARM_THM_JUMP19;468else if (isB(sr.instr))469type = R_ARM_THM_JUMP24;470else471type = R_ARM_THM_CALL;472isec->addReloc(Relocation{R_PC, type, sr.off, -4, psec->patchSym});473}474patches.push_back(psec);475}476477// Scan all the instructions in InputSectionDescription, for each instance of478// the erratum sequence create a Patch657417Section. We return the list of479// Patch657417Sections that need to be applied to the InputSectionDescription.480std::vector<Patch657417Section *>481ARMErr657417Patcher::patchInputSectionDescription(482InputSectionDescription &isd) {483std::vector<Patch657417Section *> patches;484for (InputSection *isec : isd.sections) {485// LLD doesn't use the erratum sequence in SyntheticSections.486if (isa<SyntheticSection>(isec))487continue;488// Use sectionMap to make sure we only scan Thumb code and not Arm or inline489// data. We have already sorted mapSyms in ascending order and removed490// consecutive mapping symbols of the same type. Our range of executable491// instructions to scan is therefore [thumbSym->value, nonThumbSym->value)492// or [thumbSym->value, section size).493std::vector<const Defined *> &mapSyms = sectionMap[isec];494495auto thumbSym = mapSyms.begin();496while (thumbSym != mapSyms.end()) {497auto nonThumbSym = std::next(thumbSym);498uint64_t off = (*thumbSym)->value;499uint64_t limit = nonThumbSym == mapSyms.end() ? isec->content().size()500: (*nonThumbSym)->value;501502while (off < limit) {503ScanResult sr = scanCortexA8Errata657417(isec, off, limit);504if (sr.off)505implementPatch(sr, isec, patches);506}507if (nonThumbSym == mapSyms.end())508break;509thumbSym = std::next(nonThumbSym);510}511}512return patches;513}514515bool ARMErr657417Patcher::createFixes() {516if (!initialized)517init();518519bool addressesChanged = false;520for (OutputSection *os : outputSections) {521if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))522continue;523for (SectionCommand *cmd : os->commands)524if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {525std::vector<Patch657417Section *> patches =526patchInputSectionDescription(*isd);527if (!patches.empty()) {528insertPatches(*isd, patches);529addressesChanged = true;530}531}532}533return addressesChanged;534}535536537