Path: blob/main/contrib/llvm-project/llvm/lib/ObjCopy/MachO/MachOReader.cpp
35269 views
//===- MachOReader.cpp ------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "MachOReader.h"9#include "MachOObject.h"10#include "llvm/BinaryFormat/MachO.h"11#include "llvm/Object/MachO.h"12#include "llvm/Support/Errc.h"13#include "llvm/Support/SystemZ/zOSSupport.h"14#include <memory>1516using namespace llvm;17using namespace llvm::objcopy;18using namespace llvm::objcopy::macho;1920void MachOReader::readHeader(Object &O) const {21O.Header.Magic = MachOObj.getHeader().magic;22O.Header.CPUType = MachOObj.getHeader().cputype;23O.Header.CPUSubType = MachOObj.getHeader().cpusubtype;24O.Header.FileType = MachOObj.getHeader().filetype;25O.Header.NCmds = MachOObj.getHeader().ncmds;26O.Header.SizeOfCmds = MachOObj.getHeader().sizeofcmds;27O.Header.Flags = MachOObj.getHeader().flags;28}2930template <typename SectionType>31static Section constructSectionCommon(const SectionType &Sec, uint32_t Index) {32StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));33StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));34Section S(SegName, SectName);35S.Index = Index;36S.Addr = Sec.addr;37S.Size = Sec.size;38S.OriginalOffset = Sec.offset;39S.Align = Sec.align;40S.RelOff = Sec.reloff;41S.NReloc = Sec.nreloc;42S.Flags = Sec.flags;43S.Reserved1 = Sec.reserved1;44S.Reserved2 = Sec.reserved2;45S.Reserved3 = 0;46return S;47}4849Section constructSection(const MachO::section &Sec, uint32_t Index) {50return constructSectionCommon(Sec, Index);51}5253Section constructSection(const MachO::section_64 &Sec, uint32_t Index) {54Section S = constructSectionCommon(Sec, Index);55S.Reserved3 = Sec.reserved3;56return S;57}5859template <typename SectionType, typename SegmentType>60Expected<std::vector<std::unique_ptr<Section>>> static extractSections(61const object::MachOObjectFile::LoadCommandInfo &LoadCmd,62const object::MachOObjectFile &MachOObj, uint32_t &NextSectionIndex) {63std::vector<std::unique_ptr<Section>> Sections;64for (auto Curr = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +65sizeof(SegmentType)),66End = reinterpret_cast<const SectionType *>(LoadCmd.Ptr +67LoadCmd.C.cmdsize);68Curr < End; ++Curr) {69SectionType Sec;70memcpy((void *)&Sec, reinterpret_cast<const char *>(Curr),71sizeof(SectionType));7273if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)74MachO::swapStruct(Sec);7576Sections.push_back(77std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));7879Section &S = *Sections.back();8081Expected<object::SectionRef> SecRef =82MachOObj.getSection(NextSectionIndex++);83if (!SecRef)84return SecRef.takeError();8586Expected<ArrayRef<uint8_t>> Data =87MachOObj.getSectionContents(SecRef->getRawDataRefImpl());88if (!Data)89return Data.takeError();9091S.Content =92StringRef(reinterpret_cast<const char *>(Data->data()), Data->size());9394const uint32_t CPUType = MachOObj.getHeader().cputype;95S.Relocations.reserve(S.NReloc);96for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),97RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());98RI != RE; ++RI) {99RelocationInfo R;100R.Symbol = nullptr; // We'll fill this field later.101R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());102R.Scattered = MachOObj.isRelocationScattered(R.Info);103unsigned Type = MachOObj.getAnyRelocationType(R.Info);104// TODO Support CPU_TYPE_ARM.105R.IsAddend = !R.Scattered && (CPUType == MachO::CPU_TYPE_ARM64 &&106Type == MachO::ARM64_RELOC_ADDEND);107R.Extern = !R.Scattered && MachOObj.getPlainRelocationExternal(R.Info);108S.Relocations.push_back(R);109}110111assert(S.NReloc == S.Relocations.size() &&112"Incorrect number of relocations");113}114return std::move(Sections);115}116117Error MachOReader::readLoadCommands(Object &O) const {118// For MachO sections indices start from 1.119uint32_t NextSectionIndex = 1;120static constexpr char TextSegmentName[] = "__TEXT";121for (auto LoadCmd : MachOObj.load_commands()) {122LoadCommand LC;123switch (LoadCmd.C.cmd) {124case MachO::LC_CODE_SIGNATURE:125O.CodeSignatureCommandIndex = O.LoadCommands.size();126break;127case MachO::LC_SEGMENT:128// LoadCmd.Ptr might not be aligned temporarily as129// MachO::segment_command requires, but the segname char pointer do not130// have alignment restrictions.131if (StringRef(reinterpret_cast<const char *>(132LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==133TextSegmentName)134O.TextSegmentCommandIndex = O.LoadCommands.size();135136if (Expected<std::vector<std::unique_ptr<Section>>> Sections =137extractSections<MachO::section, MachO::segment_command>(138LoadCmd, MachOObj, NextSectionIndex))139LC.Sections = std::move(*Sections);140else141return Sections.takeError();142break;143case MachO::LC_SEGMENT_64:144// LoadCmd.Ptr might not be aligned temporarily as145// MachO::segment_command_64 requires, but the segname char pointer do146// not have alignment restrictions.147if (StringRef(reinterpret_cast<const char *>(148LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==149TextSegmentName)150O.TextSegmentCommandIndex = O.LoadCommands.size();151152if (Expected<std::vector<std::unique_ptr<Section>>> Sections =153extractSections<MachO::section_64, MachO::segment_command_64>(154LoadCmd, MachOObj, NextSectionIndex))155LC.Sections = std::move(*Sections);156else157return Sections.takeError();158break;159case MachO::LC_SYMTAB:160O.SymTabCommandIndex = O.LoadCommands.size();161break;162case MachO::LC_DYSYMTAB:163O.DySymTabCommandIndex = O.LoadCommands.size();164break;165case MachO::LC_DYLD_INFO:166case MachO::LC_DYLD_INFO_ONLY:167O.DyLdInfoCommandIndex = O.LoadCommands.size();168break;169case MachO::LC_DATA_IN_CODE:170O.DataInCodeCommandIndex = O.LoadCommands.size();171break;172case MachO::LC_LINKER_OPTIMIZATION_HINT:173O.LinkerOptimizationHintCommandIndex = O.LoadCommands.size();174break;175case MachO::LC_FUNCTION_STARTS:176O.FunctionStartsCommandIndex = O.LoadCommands.size();177break;178case MachO::LC_DYLIB_CODE_SIGN_DRS:179O.DylibCodeSignDRsIndex = O.LoadCommands.size();180break;181case MachO::LC_DYLD_EXPORTS_TRIE:182O.ExportsTrieCommandIndex = O.LoadCommands.size();183break;184case MachO::LC_DYLD_CHAINED_FIXUPS:185O.ChainedFixupsCommandIndex = O.LoadCommands.size();186break;187}188#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \189case MachO::LCName: \190memcpy((void *)&(LC.MachOLoadCommand.LCStruct##_data), LoadCmd.Ptr, \191sizeof(MachO::LCStruct)); \192if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) \193MachO::swapStruct(LC.MachOLoadCommand.LCStruct##_data); \194if (LoadCmd.C.cmdsize > sizeof(MachO::LCStruct)) \195LC.Payload = ArrayRef<uint8_t>( \196reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) + \197sizeof(MachO::LCStruct), \198LoadCmd.C.cmdsize - sizeof(MachO::LCStruct)); \199break;200201switch (LoadCmd.C.cmd) {202default:203memcpy((void *)&(LC.MachOLoadCommand.load_command_data), LoadCmd.Ptr,204sizeof(MachO::load_command));205if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost)206MachO::swapStruct(LC.MachOLoadCommand.load_command_data);207if (LoadCmd.C.cmdsize > sizeof(MachO::load_command))208LC.Payload = ArrayRef<uint8_t>(209reinterpret_cast<uint8_t *>(const_cast<char *>(LoadCmd.Ptr)) +210sizeof(MachO::load_command),211LoadCmd.C.cmdsize - sizeof(MachO::load_command));212break;213#include "llvm/BinaryFormat/MachO.def"214}215O.LoadCommands.push_back(std::move(LC));216}217return Error::success();218}219220template <typename nlist_t>221SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {222assert(nlist.n_strx < StrTable.size() &&223"n_strx exceeds the size of the string table");224SymbolEntry SE;225SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();226SE.n_type = nlist.n_type;227SE.n_sect = nlist.n_sect;228SE.n_desc = nlist.n_desc;229SE.n_value = nlist.n_value;230return SE;231}232233void MachOReader::readSymbolTable(Object &O) const {234StringRef StrTable = MachOObj.getStringTableData();235for (auto Symbol : MachOObj.symbols()) {236SymbolEntry SE =237(MachOObj.is64Bit()238? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(239Symbol.getRawDataRefImpl()))240: constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(241Symbol.getRawDataRefImpl())));242243O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));244}245}246247void MachOReader::setSymbolInRelocationInfo(Object &O) const {248std::vector<const Section *> Sections;249for (auto &LC : O.LoadCommands)250for (std::unique_ptr<Section> &Sec : LC.Sections)251Sections.push_back(Sec.get());252253for (LoadCommand &LC : O.LoadCommands)254for (std::unique_ptr<Section> &Sec : LC.Sections)255for (auto &Reloc : Sec->Relocations)256if (!Reloc.Scattered && !Reloc.IsAddend) {257const uint32_t SymbolNum =258Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian());259if (Reloc.Extern) {260Reloc.Symbol = O.SymTable.getSymbolByIndex(SymbolNum);261} else {262// FIXME: Refactor error handling in MachOReader and report an error263// if we encounter an invalid relocation.264assert(SymbolNum >= 1 && SymbolNum <= Sections.size() &&265"Invalid section index.");266Reloc.Sec = Sections[SymbolNum - 1];267}268}269}270271void MachOReader::readRebaseInfo(Object &O) const {272O.Rebases.Opcodes = MachOObj.getDyldInfoRebaseOpcodes();273}274275void MachOReader::readBindInfo(Object &O) const {276O.Binds.Opcodes = MachOObj.getDyldInfoBindOpcodes();277}278279void MachOReader::readWeakBindInfo(Object &O) const {280O.WeakBinds.Opcodes = MachOObj.getDyldInfoWeakBindOpcodes();281}282283void MachOReader::readLazyBindInfo(Object &O) const {284O.LazyBinds.Opcodes = MachOObj.getDyldInfoLazyBindOpcodes();285}286287void MachOReader::readExportInfo(Object &O) const {288// This information can be in LC_DYLD_INFO or in LC_DYLD_EXPORTS_TRIE289ArrayRef<uint8_t> Trie = MachOObj.getDyldInfoExportsTrie();290if (Trie.empty())291Trie = MachOObj.getDyldExportsTrie();292O.Exports.Trie = Trie;293}294295void MachOReader::readLinkData(Object &O, std::optional<size_t> LCIndex,296LinkData &LD) const {297if (!LCIndex)298return;299const MachO::linkedit_data_command &LC =300O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;301LD.Data =302arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));303}304305void MachOReader::readDataInCodeData(Object &O) const {306return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);307}308309void MachOReader::readLinkerOptimizationHint(Object &O) const {310return readLinkData(O, O.LinkerOptimizationHintCommandIndex,311O.LinkerOptimizationHint);312}313314void MachOReader::readFunctionStartsData(Object &O) const {315return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);316}317318void MachOReader::readDylibCodeSignDRs(Object &O) const {319return readLinkData(O, O.DylibCodeSignDRsIndex, O.DylibCodeSignDRs);320}321322void MachOReader::readExportsTrie(Object &O) const {323return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);324}325326void MachOReader::readChainedFixups(Object &O) const {327return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);328}329330void MachOReader::readIndirectSymbolTable(Object &O) const {331MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();332constexpr uint32_t AbsOrLocalMask =333MachO::INDIRECT_SYMBOL_LOCAL | MachO::INDIRECT_SYMBOL_ABS;334for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i) {335uint32_t Index = MachOObj.getIndirectSymbolTableEntry(DySymTab, i);336if ((Index & AbsOrLocalMask) != 0)337O.IndirectSymTable.Symbols.emplace_back(Index, std::nullopt);338else339O.IndirectSymTable.Symbols.emplace_back(340Index, O.SymTable.getSymbolByIndex(Index));341}342}343344void MachOReader::readSwiftVersion(Object &O) const {345struct ObjCImageInfo {346uint32_t Version;347uint32_t Flags;348} ImageInfo;349350for (const LoadCommand &LC : O.LoadCommands)351for (const std::unique_ptr<Section> &Sec : LC.Sections)352if (Sec->Sectname == "__objc_imageinfo" &&353(Sec->Segname == "__DATA" || Sec->Segname == "__DATA_CONST" ||354Sec->Segname == "__DATA_DIRTY") &&355Sec->Content.size() >= sizeof(ObjCImageInfo)) {356memcpy(&ImageInfo, Sec->Content.data(), sizeof(ObjCImageInfo));357if (MachOObj.isLittleEndian() != sys::IsLittleEndianHost) {358sys::swapByteOrder(ImageInfo.Version);359sys::swapByteOrder(ImageInfo.Flags);360}361O.SwiftVersion = (ImageInfo.Flags >> 8) & 0xff;362return;363}364}365366Expected<std::unique_ptr<Object>> MachOReader::create() const {367auto Obj = std::make_unique<Object>();368readHeader(*Obj);369if (Error E = readLoadCommands(*Obj))370return std::move(E);371readSymbolTable(*Obj);372setSymbolInRelocationInfo(*Obj);373readRebaseInfo(*Obj);374readBindInfo(*Obj);375readWeakBindInfo(*Obj);376readLazyBindInfo(*Obj);377readExportInfo(*Obj);378readDataInCodeData(*Obj);379readLinkerOptimizationHint(*Obj);380readFunctionStartsData(*Obj);381readDylibCodeSignDRs(*Obj);382readExportsTrie(*Obj);383readChainedFixups(*Obj);384readIndirectSymbolTable(*Obj);385readSwiftVersion(*Obj);386return std::move(Obj);387}388389390