Path: blob/main/contrib/llvm-project/lld/MachO/ObjC.cpp
34878 views
//===- ObjC.cpp -----------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "ObjC.h"9#include "ConcatOutputSection.h"10#include "InputFiles.h"11#include "InputSection.h"12#include "Layout.h"13#include "OutputSegment.h"14#include "SyntheticSections.h"15#include "Target.h"1617#include "lld/Common/ErrorHandler.h"18#include "llvm/ADT/DenseMap.h"19#include "llvm/BinaryFormat/MachO.h"20#include "llvm/Bitcode/BitcodeReader.h"21#include "llvm/Support/TimeProfiler.h"2223using namespace llvm;24using namespace llvm::MachO;25using namespace lld;26using namespace lld::macho;2728template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {29using SectionHeader = typename LP::section;3031auto *hdr =32reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());33if (hdr->magic != LP::magic)34return false;3536if (const auto *c =37findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {38auto sectionHeaders = ArrayRef<SectionHeader>{39reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};40for (const SectionHeader &secHead : sectionHeaders) {41StringRef sectname(secHead.sectname,42strnlen(secHead.sectname, sizeof(secHead.sectname)));43StringRef segname(secHead.segname,44strnlen(secHead.segname, sizeof(secHead.segname)));45if ((segname == segment_names::data &&46sectname == section_names::objcCatList) ||47(segname == segment_names::text &&48sectname.starts_with(section_names::swift))) {49return true;50}51}52}53return false;54}5556static bool objectHasObjCSection(MemoryBufferRef mb) {57if (target->wordSize == 8)58return ::objectHasObjCSection<LP64>(mb);59else60return ::objectHasObjCSection<ILP32>(mb);61}6263bool macho::hasObjCSection(MemoryBufferRef mb) {64switch (identify_magic(mb.getBuffer())) {65case file_magic::macho_object:66return objectHasObjCSection(mb);67case file_magic::bitcode:68return check(isBitcodeContainingObjCCategory(mb));69default:70return false;71}72}7374namespace {7576#define FOR_EACH_CATEGORY_FIELD(DO) \77DO(Ptr, name) \78DO(Ptr, klass) \79DO(Ptr, instanceMethods) \80DO(Ptr, classMethods) \81DO(Ptr, protocols) \82DO(Ptr, instanceProps) \83DO(Ptr, classProps) \84DO(uint32_t, size)8586CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);8788#undef FOR_EACH_CATEGORY_FIELD8990#define FOR_EACH_CLASS_FIELD(DO) \91DO(Ptr, metaClass) \92DO(Ptr, superClass) \93DO(Ptr, methodCache) \94DO(Ptr, vtable) \95DO(Ptr, roData)9697CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);9899#undef FOR_EACH_CLASS_FIELD100101#define FOR_EACH_RO_CLASS_FIELD(DO) \102DO(uint32_t, flags) \103DO(uint32_t, instanceStart) \104DO(Ptr, instanceSize) \105DO(Ptr, ivarLayout) \106DO(Ptr, name) \107DO(Ptr, baseMethods) \108DO(Ptr, baseProtocols) \109DO(Ptr, ivars) \110DO(Ptr, weakIvarLayout) \111DO(Ptr, baseProperties)112113CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);114115#undef FOR_EACH_RO_CLASS_FIELD116117#define FOR_EACH_LIST_HEADER(DO) \118DO(uint32_t, structSize) \119DO(uint32_t, structCount)120121CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);122123#undef FOR_EACH_LIST_HEADER124125#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)126127CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);128129#undef FOR_EACH_PROTOCOL_LIST_HEADER130131#define FOR_EACH_METHOD(DO) \132DO(Ptr, name) \133DO(Ptr, type) \134DO(Ptr, impl)135136CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);137138#undef FOR_EACH_METHOD139140enum MethodContainerKind {141MCK_Class,142MCK_Category,143};144145struct MethodContainer {146MethodContainerKind kind;147const ConcatInputSection *isec;148};149150enum MethodKind {151MK_Instance,152MK_Static,153};154155struct ObjcClass {156DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;157DenseMap<CachedHashStringRef, MethodContainer> classMethods;158};159160} // namespace161162class ObjcCategoryChecker {163public:164ObjcCategoryChecker();165void parseCategory(const ConcatInputSection *catListIsec);166167private:168void parseClass(const Defined *classSym);169void parseMethods(const ConcatInputSection *methodsIsec,170const Symbol *methodContainer,171const ConcatInputSection *containerIsec,172MethodContainerKind, MethodKind);173174CategoryLayout catLayout;175ClassLayout classLayout;176ROClassLayout roClassLayout;177ListHeaderLayout listHeaderLayout;178MethodLayout methodLayout;179180DenseMap<const Symbol *, ObjcClass> classMap;181};182183ObjcCategoryChecker::ObjcCategoryChecker()184: catLayout(target->wordSize), classLayout(target->wordSize),185roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),186methodLayout(target->wordSize) {}187188// \p r must point to an offset within a CStringInputSection or a189// ConcatInputSection190static StringRef getReferentString(const Reloc &r) {191if (auto *isec = r.referent.dyn_cast<InputSection *>())192return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);193194auto *sym = cast<Defined>(r.referent.get<Symbol *>());195auto *symIsec = sym->isec();196auto symOffset = sym->value + r.addend;197198if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))199return s->getStringRefAtOffset(symOffset);200201if (isa<ConcatInputSection>(symIsec)) {202auto strData = symIsec->data.slice(symOffset);203const char *pszData = reinterpret_cast<const char *>(strData.data());204return StringRef(pszData, strnlen(pszData, strData.size()));205}206207llvm_unreachable("unknown reference section in getReferentString");208}209210void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,211const Symbol *methodContainerSym,212const ConcatInputSection *containerIsec,213MethodContainerKind mcKind,214MethodKind mKind) {215ObjcClass &klass = classMap[methodContainerSym];216for (const Reloc &r : methodsIsec->relocs) {217if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=218methodLayout.nameOffset)219continue;220221CachedHashStringRef methodName(getReferentString(r));222// +load methods are special: all implementations are called by the runtime223// even if they are part of the same class. Thus there is no need to check224// for duplicates.225// NOTE: Instead of specifically checking for this method name, ld64 simply226// checks whether a class / category is present in __objc_nlclslist /227// __objc_nlcatlist respectively. This will be the case if the class /228// category has a +load method. It skips optimizing the categories if there229// are multiple +load methods. Since it does dupe checking as part of the230// optimization process, this avoids spurious dupe messages around +load,231// but it also means that legit dupe issues for other methods are ignored.232if (mKind == MK_Static && methodName.val() == "load")233continue;234235auto &methodMap =236mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;237if (methodMap238.try_emplace(methodName, MethodContainer{mcKind, containerIsec})239.second)240continue;241242// We have a duplicate; generate a warning message.243const auto &mc = methodMap.lookup(methodName);244const Reloc *nameReloc = nullptr;245if (mc.kind == MCK_Category) {246nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);247} else {248assert(mc.kind == MCK_Class);249const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)250->getReferentInputSection();251nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);252}253StringRef containerName = getReferentString(*nameReloc);254StringRef methPrefix = mKind == MK_Instance ? "-" : "+";255256// We should only ever encounter collisions when parsing category methods257// (since the Class struct is parsed before any of its categories).258assert(mcKind == MCK_Category);259StringRef newCatName =260getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));261262auto formatObjAndSrcFileName = [](const InputSection *section) {263lld::macho::InputFile *inputFile = section->getFile();264std::string result = toString(inputFile);265266auto objFile = dyn_cast_or_null<ObjFile>(inputFile);267if (objFile && objFile->compileUnit)268result += " (" + objFile->sourceFile() + ")";269270return result;271};272273StringRef containerType = mc.kind == MCK_Category ? "category" : "class";274warn("method '" + methPrefix + methodName.val() +275"' has conflicting definitions:\n>>> defined in category " +276newCatName + " from " + formatObjAndSrcFileName(containerIsec) +277"\n>>> defined in " + containerType + " " + containerName + " from " +278formatObjAndSrcFileName(mc.isec));279}280}281282void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {283auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);284if (!classReloc)285return;286287auto *classSym = classReloc->referent.get<Symbol *>();288if (auto *d = dyn_cast<Defined>(classSym))289if (!classMap.count(d))290parseClass(d);291292if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {293parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),294classSym, catIsec, MCK_Category, MK_Static);295}296297if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {298parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),299classSym, catIsec, MCK_Category, MK_Instance);300}301}302303void ObjcCategoryChecker::parseClass(const Defined *classSym) {304// Given a Class struct, get its corresponding Methods struct305auto getMethodsIsec =306[&](const InputSection *classIsec) -> ConcatInputSection * {307if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {308if (const auto *roIsec =309cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {310if (const auto *r =311roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {312if (auto *methodsIsec = cast_or_null<ConcatInputSection>(313r->getReferentInputSection()))314return methodsIsec;315}316}317}318return nullptr;319};320321const auto *classIsec = cast<ConcatInputSection>(classSym->isec());322323// Parse instance methods.324if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))325parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,326MK_Instance);327328// Class methods are contained in the metaclass.329if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))330if (const auto *classMethodsIsec = getMethodsIsec(331cast<ConcatInputSection>(r->getReferentInputSection())))332parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);333}334335void objc::checkCategories() {336TimeTraceScope timeScope("ObjcCategoryChecker");337338ObjcCategoryChecker checker;339for (const InputSection *isec : inputSections) {340if (isec->getName() == section_names::objcCatList)341for (const Reloc &r : isec->relocs) {342auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());343checker.parseCategory(catIsec);344}345}346}347348namespace {349350class ObjcCategoryMerger {351// In which language was a particular construct originally defined352enum SourceLanguage { Unknown, ObjC, Swift };353354// Information about an input category355struct InfoInputCategory {356ConcatInputSection *catListIsec;357ConcatInputSection *catBodyIsec;358uint32_t offCatListIsec = 0;359SourceLanguage sourceLanguage = SourceLanguage::Unknown;360361bool wasMerged = false;362};363364// To write new (merged) categories or classes, we will try make limited365// assumptions about the alignment and the sections the various class/category366// info are stored in and . So we'll just reuse the same sections and367// alignment as already used in existing (input) categories. To do this we368// have InfoCategoryWriter which contains the various sections that the369// generated categories will be written to.370struct InfoWriteSection {371bool valid = false; // Data has been successfully collected from input372uint32_t align = 0;373Section *inputSection;374Reloc relocTemplate;375OutputSection *outputSection;376};377378struct InfoCategoryWriter {379InfoWriteSection catListInfo;380InfoWriteSection catBodyInfo;381InfoWriteSection catNameInfo;382InfoWriteSection catPtrListInfo;383};384385// Information about a pointer list in the original categories or class(method386// lists, protocol lists, etc)387struct PointerListInfo {388PointerListInfo() = default;389PointerListInfo(const PointerListInfo &) = default;390PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)391: categoryPrefix(_categoryPrefix),392pointersPerStruct(_pointersPerStruct) {}393394inline bool operator==(const PointerListInfo &cmp) const {395return pointersPerStruct == cmp.pointersPerStruct &&396structSize == cmp.structSize && structCount == cmp.structCount &&397allPtrs == cmp.allPtrs;398}399400const char *categoryPrefix;401402uint32_t pointersPerStruct = 0;403404uint32_t structSize = 0;405uint32_t structCount = 0;406407std::vector<Symbol *> allPtrs;408};409410// Full information describing an ObjC class . This will include all the411// additional methods, protocols, and properties that are contained in the412// class and all the categories that extend a particular class.413struct ClassExtensionInfo {414ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};415416// Merged names of containers. Ex: base|firstCategory|secondCategory|...417std::string mergedContainerName;418std::string baseClassName;419const Symbol *baseClass = nullptr;420SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;421422CategoryLayout &catLayout;423424// In case we generate new data, mark the new data as belonging to this file425ObjFile *objFileForMergeData = nullptr;426427PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,428/*pointersPerStruct=*/3};429PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,430/*pointersPerStruct=*/3};431PointerListInfo protocols = {objc::symbol_names::categoryProtocols,432/*pointersPerStruct=*/0};433PointerListInfo instanceProps = {objc::symbol_names::listProprieties,434/*pointersPerStruct=*/2};435PointerListInfo classProps = {objc::symbol_names::klassPropList,436/*pointersPerStruct=*/2};437};438439public:440ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);441void doMerge();442static void doCleanup();443444private:445DenseSet<const Symbol *> collectNlCategories();446void collectAndValidateCategoriesData();447void448mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);449450void eraseISec(ConcatInputSection *isec);451void eraseMergedCategories();452453void generateCatListForNonErasedCategories(454MapVector<ConcatInputSection *, std::set<uint64_t>>455catListToErasedOffsets);456void collectSectionWriteInfoFromIsec(const InputSection *isec,457InfoWriteSection &catWriteInfo);458void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);459void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,460ClassExtensionInfo &extInfo);461462void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,463PointerListInfo &ptrList,464SourceLanguage sourceLang);465466PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,467uint32_t secOffset,468SourceLanguage sourceLang);469470void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,471PointerListInfo &ptrList);472473void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,474const ClassExtensionInfo &extInfo,475const PointerListInfo &ptrList);476477Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,478const ClassExtensionInfo &extInfo,479const PointerListInfo &ptrList);480481Defined *emitCategory(const ClassExtensionInfo &extInfo);482Defined *emitCatListEntrySec(const std::string &forCategoryName,483const std::string &forBaseClassName,484ObjFile *objFile);485Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,486const Symbol *baseClassSym,487const std::string &baseClassName, ObjFile *objFile);488Defined *emitCategoryName(const std::string &name, ObjFile *objFile);489void createSymbolReference(Defined *refFrom, const Symbol *refTo,490uint32_t offset, const Reloc &relocTemplate);491Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);492Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,493uint32_t offset);494Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,495uint32_t offset);496Defined *getClassRo(const Defined *classSym, bool getMetaRo);497SourceLanguage getClassSymSourceLang(const Defined *classSym);498void mergeCategoriesIntoBaseClass(const Defined *baseClass,499std::vector<InfoInputCategory> &categories);500void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);501void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,502uint32_t offset);503504// Allocate a null-terminated StringRef backed by generatedSectionData505StringRef newStringData(const char *str);506// Allocate section data, backed by generatedSectionData507SmallVector<uint8_t> &newSectionData(uint32_t size);508509CategoryLayout catLayout;510ClassLayout classLayout;511ROClassLayout roClassLayout;512ListHeaderLayout listHeaderLayout;513MethodLayout methodLayout;514ProtocolListHeaderLayout protocolListHeaderLayout;515516InfoCategoryWriter infoCategoryWriter;517std::vector<ConcatInputSection *> &allInputSections;518// Map of base class Symbol to list of InfoInputCategory's for it519MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;520521// Normally, the binary data comes from the input files, but since we're522// generating binary data ourselves, we use the below array to store it in.523// Need this to be 'static' so the data survives past the ObjcCategoryMerger524// object, as the data will be read by the Writer when the final binary is525// generated.526static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>527generatedSectionData;528};529530SmallVector<std::unique_ptr<SmallVector<uint8_t>>>531ObjcCategoryMerger::generatedSectionData;532533ObjcCategoryMerger::ObjcCategoryMerger(534std::vector<ConcatInputSection *> &_allInputSections)535: catLayout(target->wordSize), classLayout(target->wordSize),536roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),537methodLayout(target->wordSize),538protocolListHeaderLayout(target->wordSize),539allInputSections(_allInputSections) {}540541void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(542const InputSection *isec, InfoWriteSection &catWriteInfo) {543544catWriteInfo.inputSection = const_cast<Section *>(&isec->section);545catWriteInfo.align = isec->align;546catWriteInfo.outputSection = isec->parent;547548assert(catWriteInfo.outputSection &&549"outputSection may not be null in collectSectionWriteInfoFromIsec.");550551if (isec->relocs.size())552catWriteInfo.relocTemplate = isec->relocs[0];553554catWriteInfo.valid = true;555}556557Symbol *558ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,559uint32_t offset) {560if (!isec)561return nullptr;562const Reloc *reloc = isec->getRelocAt(offset);563564if (!reloc)565return nullptr;566567Symbol *sym = reloc->referent.get<Symbol *>();568569if (reloc->addend) {570assert(isa<Defined>(sym) && "Expected defined for non-zero addend");571Defined *definedSym = cast<Defined>(sym);572sym = tryFindDefinedOnIsec(definedSym->isec(),573definedSym->value + reloc->addend);574}575576return sym;577}578579Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,580uint32_t offset) {581for (Defined *sym : isec->symbols)582if ((sym->value <= offset) && (sym->value + sym->size > offset))583return sym;584585return nullptr;586}587588Defined *589ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,590uint32_t offset) {591Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);592return dyn_cast_or_null<Defined>(sym);593}594595// Get the class's ro_data symbol. If getMetaRo is true, then we will return596// the meta-class's ro_data symbol. Otherwise, we will return the class597// (instance) ro_data symbol.598Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,599bool getMetaRo) {600ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());601if (!isec)602return nullptr;603604if (!getMetaRo)605return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +606classSym->value);607608Defined *metaClass = tryGetDefinedAtIsecOffset(609isec, classLayout.metaClassOffset + classSym->value);610if (!metaClass)611return nullptr;612613return tryGetDefinedAtIsecOffset(614dyn_cast<ConcatInputSection>(metaClass->isec()),615classLayout.roDataOffset);616}617618// Given an ConcatInputSection or CStringInputSection and an offset, if there is619// a symbol(Defined) at that offset, then erase the symbol (mark it not live)620void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(621const ConcatInputSection *isec, uint32_t offset) {622const Reloc *reloc = isec->getRelocAt(offset);623624if (!reloc)625return;626627Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());628if (!sym)629return;630631if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))632eraseISec(cisec);633else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {634uint32_t totalOffset = sym->value + reloc->addend;635StringPiece &piece = csisec->getStringPiece(totalOffset);636piece.live = false;637} else {638llvm_unreachable("erased symbol has to be Defined or CStringInputSection");639}640}641642void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(643const InfoInputCategory &catInfo) {644645if (!infoCategoryWriter.catListInfo.valid)646collectSectionWriteInfoFromIsec(catInfo.catListIsec,647infoCategoryWriter.catListInfo);648if (!infoCategoryWriter.catBodyInfo.valid)649collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,650infoCategoryWriter.catBodyInfo);651652if (!infoCategoryWriter.catNameInfo.valid) {653lld::macho::Defined *catNameSym =654tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);655assert(catNameSym && "Category does not have a valid name Symbol");656657collectSectionWriteInfoFromIsec(catNameSym->isec(),658infoCategoryWriter.catNameInfo);659}660661// Collect writer info from all the category lists (we're assuming they all662// would provide the same info)663if (!infoCategoryWriter.catPtrListInfo.valid) {664for (uint32_t off = catLayout.instanceMethodsOffset;665off <= catLayout.classPropsOffset; off += target->wordSize) {666if (Defined *ptrList =667tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {668collectSectionWriteInfoFromIsec(ptrList->isec(),669infoCategoryWriter.catPtrListInfo);670// we've successfully collected data, so we can break671break;672}673}674}675}676677// Parse a protocol list that might be linked to ConcatInputSection at a given678// offset. The format of the protocol list is different than other lists (prop679// lists, method lists) so we need to parse it differently680void ObjcCategoryMerger::parseProtocolListInfo(681const ConcatInputSection *isec, uint32_t secOffset,682PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {683assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&684"Tried to read pointer list beyond protocol section end");685686const Reloc *reloc = isec->getRelocAt(secOffset);687if (!reloc)688return;689690auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());691assert(ptrListSym && "Protocol list reloc does not have a valid Defined");692693// Theoretically protocol count can be either 32b or 64b, depending on694// platform pointer size, but to simplify implementation we always just read695// the lower 32b which should be good enough.696uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(697ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);698699ptrList.structCount += protocolCount;700ptrList.structSize = target->wordSize;701702[[maybe_unused]] uint32_t expectedListSize =703(protocolCount * target->wordSize) +704/*header(count)*/ protocolListHeaderLayout.totalSize +705/*extra null value*/ target->wordSize;706707// On Swift, the protocol list does not have the extra (unnecessary) null708[[maybe_unused]] uint32_t expectedListSizeSwift =709expectedListSize - target->wordSize;710711assert(((expectedListSize == ptrListSym->isec()->data.size() &&712sourceLang == SourceLanguage::ObjC) ||713(expectedListSizeSwift == ptrListSym->isec()->data.size() &&714sourceLang == SourceLanguage::Swift)) &&715"Protocol list does not match expected size");716717uint32_t off = protocolListHeaderLayout.totalSize;718for (uint32_t inx = 0; inx < protocolCount; ++inx) {719const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);720assert(reloc && "No reloc found at protocol list offset");721722auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());723assert(listSym && "Protocol list reloc does not have a valid Defined");724725ptrList.allPtrs.push_back(listSym);726off += target->wordSize;727}728assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&729"expected null terminating protocol");730assert(off + /*extra null value*/ target->wordSize == expectedListSize &&731"Protocol list end offset does not match expected size");732}733734// Parse a protocol list and return the PointerListInfo for it735ObjcCategoryMerger::PointerListInfo736ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,737uint32_t secOffset,738SourceLanguage sourceLang) {739PointerListInfo ptrList;740parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);741return ptrList;742}743744// Parse a pointer list that might be linked to ConcatInputSection at a given745// offset. This can be used for instance methods, class methods, instance props746// and class props since they have the same format.747void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,748uint32_t secOffset,749PointerListInfo &ptrList) {750assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);751assert(isec && "Trying to parse pointer list from null isec");752assert(secOffset + target->wordSize <= isec->data.size() &&753"Trying to read pointer list beyond section end");754755const Reloc *reloc = isec->getRelocAt(secOffset);756if (!reloc)757return;758759auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());760assert(ptrListSym && "Reloc does not have a valid Defined");761762uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(763ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);764uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(765ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);766assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);767768assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));769770ptrList.structCount += thisStructCount;771ptrList.structSize = thisStructSize;772773uint32_t expectedListSize =774listHeaderLayout.totalSize + (thisStructSize * thisStructCount);775assert(expectedListSize == ptrListSym->isec()->data.size() &&776"Pointer list does not match expected size");777778for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;779off += target->wordSize) {780const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);781assert(reloc && "No reloc found at pointer list offset");782783auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());784assert(listSym && "Reloc does not have a valid Defined");785786ptrList.allPtrs.push_back(listSym);787}788}789790// Here we parse all the information of an input category (catInfo) and791// append the parsed info into the structure which will contain all the792// information about how a class is extended (extInfo)793void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,794ClassExtensionInfo &extInfo) {795const Reloc *catNameReloc =796catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);797798// Parse name799assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");800801// is this the first category we are parsing?802if (extInfo.mergedContainerName.empty())803extInfo.objFileForMergeData =804dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());805else806extInfo.mergedContainerName += "|";807808assert(extInfo.objFileForMergeData &&809"Expected to already have valid objextInfo.objFileForMergeData");810811StringRef catName = getReferentString(*catNameReloc);812extInfo.mergedContainerName += catName.str();813814// Parse base class815if (!extInfo.baseClass) {816Symbol *classSym =817tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);818assert(extInfo.baseClassName.empty());819extInfo.baseClass = classSym;820llvm::StringRef classPrefix(objc::symbol_names::klass);821assert(classSym->getName().starts_with(classPrefix) &&822"Base class symbol does not start with expected prefix");823extInfo.baseClassName = classSym->getName().substr(classPrefix.size());824} else {825assert((extInfo.baseClass ==826tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,827catLayout.klassOffset)) &&828"Trying to parse category info into container with different base "829"class");830}831832parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,833extInfo.instanceMethods);834835parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,836extInfo.classMethods);837838parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,839extInfo.protocols, catInfo.sourceLanguage);840841parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,842extInfo.instanceProps);843844parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,845extInfo.classProps);846}847848// Generate a protocol list (including header) and link it into the parent at849// the specified offset.850Defined *ObjcCategoryMerger::emitAndLinkProtocolList(851Defined *parentSym, uint32_t linkAtOffset,852const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {853if (ptrList.allPtrs.empty())854return nullptr;855856assert(ptrList.allPtrs.size() == ptrList.structCount);857858uint32_t bodySize = (ptrList.structCount * target->wordSize) +859/*header(count)*/ protocolListHeaderLayout.totalSize +860/*extra null value*/ target->wordSize;861llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);862863// This theoretically can be either 32b or 64b, but writing just the first 32b864// is good enough865const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(866bodyData.data() + protocolListHeaderLayout.protocolCountOffset);867868*const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();869870ConcatInputSection *listSec = make<ConcatInputSection>(871*infoCategoryWriter.catPtrListInfo.inputSection, bodyData,872infoCategoryWriter.catPtrListInfo.align);873listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;874listSec->live = true;875876listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;877878std::string symName = ptrList.categoryPrefix;879symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";880881Defined *ptrListSym = make<Defined>(882newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),883listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,884/*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,885/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,886/*isWeakDefCanBeHidden=*/false);887888ptrListSym->used = true;889parentSym->getObjectFile()->symbols.push_back(ptrListSym);890addInputSection(listSec);891892createSymbolReference(parentSym, ptrListSym, linkAtOffset,893infoCategoryWriter.catBodyInfo.relocTemplate);894895uint32_t offset = protocolListHeaderLayout.totalSize;896for (Symbol *symbol : ptrList.allPtrs) {897createSymbolReference(ptrListSym, symbol, offset,898infoCategoryWriter.catPtrListInfo.relocTemplate);899offset += target->wordSize;900}901902return ptrListSym;903}904905// Generate a pointer list (including header) and link it into the parent at the906// specified offset. This is used for instance and class methods and907// proprieties.908void ObjcCategoryMerger::emitAndLinkPointerList(909Defined *parentSym, uint32_t linkAtOffset,910const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {911if (ptrList.allPtrs.empty())912return;913914assert(ptrList.allPtrs.size() * target->wordSize ==915ptrList.structCount * ptrList.structSize);916917// Generate body918uint32_t bodySize =919listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);920llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);921922const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(923bodyData.data() + listHeaderLayout.structSizeOffset);924const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(925bodyData.data() + listHeaderLayout.structCountOffset);926927*const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;928*const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;929930ConcatInputSection *listSec = make<ConcatInputSection>(931*infoCategoryWriter.catPtrListInfo.inputSection, bodyData,932infoCategoryWriter.catPtrListInfo.align);933listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;934listSec->live = true;935936listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;937938std::string symName = ptrList.categoryPrefix;939symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";940941Defined *ptrListSym = make<Defined>(942newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),943listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,944/*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,945/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,946/*isWeakDefCanBeHidden=*/false);947948ptrListSym->used = true;949parentSym->getObjectFile()->symbols.push_back(ptrListSym);950addInputSection(listSec);951952createSymbolReference(parentSym, ptrListSym, linkAtOffset,953infoCategoryWriter.catBodyInfo.relocTemplate);954955uint32_t offset = listHeaderLayout.totalSize;956for (Symbol *symbol : ptrList.allPtrs) {957createSymbolReference(ptrListSym, symbol, offset,958infoCategoryWriter.catPtrListInfo.relocTemplate);959offset += target->wordSize;960}961}962963// This method creates an __objc_catlist ConcatInputSection with a single slot964Defined *965ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,966const std::string &forBaseClassName,967ObjFile *objFile) {968uint32_t sectionSize = target->wordSize;969llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);970971ConcatInputSection *newCatList =972make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,973bodyData, infoCategoryWriter.catListInfo.align);974newCatList->parent = infoCategoryWriter.catListInfo.outputSection;975newCatList->live = true;976977newCatList->parent = infoCategoryWriter.catListInfo.outputSection;978979std::string catSymName = "<__objc_catlist slot for merged category ";980catSymName += forBaseClassName + "(" + forCategoryName + ")>";981982Defined *catListSym = make<Defined>(983newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,984/*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,985/*isPrivateExtern=*/false, /*includeInSymtab=*/false,986/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,987/*isWeakDefCanBeHidden=*/false);988989catListSym->used = true;990objFile->symbols.push_back(catListSym);991addInputSection(newCatList);992return catListSym;993}994995// Here we generate the main category body and link the name and base class into996// it. We don't link any other info yet like the protocol and class/instance997// methods/props.998Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,999const Defined *nameSym,1000const Symbol *baseClassSym,1001const std::string &baseClassName,1002ObjFile *objFile) {1003llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);10041005uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +1006catLayout.sizeOffset);1007*ptrSize = catLayout.totalSize;10081009ConcatInputSection *newBodySec =1010make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,1011bodyData, infoCategoryWriter.catBodyInfo.align);1012newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;1013newBodySec->live = true;10141015std::string symName =1016objc::symbol_names::category + baseClassName + "(" + name + ")";1017Defined *catBodySym = make<Defined>(1018newStringData(symName.c_str()), /*file=*/objFile, newBodySec,1019/*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,1020/*isPrivateExtern=*/false, /*includeInSymtab=*/true,1021/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,1022/*isWeakDefCanBeHidden=*/false);10231024catBodySym->used = true;1025objFile->symbols.push_back(catBodySym);1026addInputSection(newBodySec);10271028createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,1029infoCategoryWriter.catBodyInfo.relocTemplate);10301031// Create a reloc to the base class (either external or internal)1032createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,1033infoCategoryWriter.catBodyInfo.relocTemplate);10341035return catBodySym;1036}10371038// This writes the new category name (for the merged category) into the binary1039// and returns the sybmol for it.1040Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,1041ObjFile *objFile) {1042StringRef nameStrData = newStringData(name.c_str());1043// We use +1 below to include the null terminator1044llvm::ArrayRef<uint8_t> nameData(1045reinterpret_cast<const uint8_t *>(nameStrData.data()),1046nameStrData.size() + 1);10471048auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;1049CStringInputSection *newStringSec = make<CStringInputSection>(1050*infoCategoryWriter.catNameInfo.inputSection, nameData,1051infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);10521053parentSection->subsections.push_back({0, newStringSec});10541055newStringSec->splitIntoPieces();1056newStringSec->pieces[0].live = true;1057newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;1058in.cStringSection->addInput(newStringSec);1059assert(newStringSec->pieces.size() == 1);10601061Defined *catNameSym = make<Defined>(1062"<merged category name>", /*file=*/objFile, newStringSec,1063/*value=*/0, nameData.size(),1064/*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,1065/*includeInSymtab=*/false, /*isReferencedDynamically=*/false,1066/*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);10671068catNameSym->used = true;1069objFile->symbols.push_back(catNameSym);1070return catNameSym;1071}10721073// This method fully creates a new category from the given ClassExtensionInfo.1074// It creates the category name, body and method/protocol/prop lists and links1075// them all together. Then it creates a new __objc_catlist entry and adds the1076// category to it. Calling this method will fully generate a category which will1077// be available in the final binary.1078Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {1079Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,1080extInfo.objFileForMergeData);10811082Defined *catBodySym = emitCategoryBody(1083extInfo.mergedContainerName, catNameSym, extInfo.baseClass,1084extInfo.baseClassName, extInfo.objFileForMergeData);10851086Defined *catListSym =1087emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,1088extInfo.objFileForMergeData);10891090// Add the single category body to the category list at the offset 0.1091createSymbolReference(catListSym, catBodySym, /*offset=*/0,1092infoCategoryWriter.catListInfo.relocTemplate);10931094emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,1095extInfo.instanceMethods);10961097emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,1098extInfo.classMethods);10991100emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,1101extInfo.protocols);11021103emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,1104extInfo.instanceProps);11051106emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,1107extInfo.classProps);11081109return catBodySym;1110}11111112// This method merges all the categories (sharing a base class) into a single1113// category.1114void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(1115std::vector<InfoInputCategory> &categories) {1116assert(categories.size() > 1 && "Expected at least 2 categories");11171118ClassExtensionInfo extInfo(catLayout);11191120for (auto &catInfo : categories)1121parseCatInfoToExtInfo(catInfo, extInfo);11221123Defined *newCatDef = emitCategory(extInfo);1124assert(newCatDef && "Failed to create a new category");11251126// Suppress unsuded var warning1127(void)newCatDef;11281129for (auto &catInfo : categories)1130catInfo.wasMerged = true;1131}11321133void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,1134const Symbol *refTo,1135uint32_t offset,1136const Reloc &relocTemplate) {1137Reloc r = relocTemplate;1138r.offset = offset;1139r.addend = 0;1140r.referent = const_cast<Symbol *>(refTo);1141refFrom->isec()->relocs.push_back(r);1142}11431144// Get the list of categories in the '__objc_nlcatlist' section. We can't1145// optimize these as they have a '+load' method that has to be called at1146// runtime.1147DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {1148DenseSet<const Symbol *> nlCategories;11491150for (InputSection *sec : allInputSections) {1151if (sec->getName() != section_names::objcNonLazyCatList)1152continue;11531154for (auto &r : sec->relocs) {1155const Symbol *sym = r.referent.dyn_cast<Symbol *>();1156nlCategories.insert(sym);1157}1158}1159return nlCategories;1160}11611162void ObjcCategoryMerger::collectAndValidateCategoriesData() {1163auto nlCategories = collectNlCategories();11641165for (InputSection *sec : allInputSections) {1166if (sec->getName() != section_names::objcCatList)1167continue;1168ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);1169assert(catListCisec &&1170"__objc_catList InputSection is not a ConcatInputSection");11711172for (uint32_t off = 0; off < catListCisec->getSize();1173off += target->wordSize) {1174Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);1175assert(categorySym &&1176"Failed to get a valid category at __objc_catlit offset");11771178if (nlCategories.count(categorySym))1179continue;11801181auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());1182assert(catBodyIsec &&1183"Category data section is not an ConcatInputSection");11841185SourceLanguage eLang = SourceLanguage::Unknown;1186if (categorySym->getName().starts_with(objc::symbol_names::category))1187eLang = SourceLanguage::ObjC;1188else if (categorySym->getName().starts_with(1189objc::symbol_names::swift_objc_category))1190eLang = SourceLanguage::Swift;1191else1192llvm_unreachable("Unexpected category symbol name");11931194InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};11951196// Check that the category has a reloc at 'klassOffset' (which is1197// a pointer to the class symbol)11981199Symbol *classSym =1200tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);1201assert(classSym && "Category does not have a valid base class");12021203categoryMap[classSym].push_back(catInputInfo);12041205collectCategoryWriterInfoFromCategory(catInputInfo);1206}1207}1208}12091210// In the input we have multiple __objc_catlist InputSection, each of which may1211// contain links to multiple categories. Of these categories, we will merge (and1212// erase) only some. There will be some categories that will remain untouched1213// (not erased). For these not erased categories, we generate new __objc_catlist1214// entries since the parent __objc_catlist entry will be erased1215void ObjcCategoryMerger::generateCatListForNonErasedCategories(1216const MapVector<ConcatInputSection *, std::set<uint64_t>>1217catListToErasedOffsets) {12181219// Go through all offsets of all __objc_catlist's that we process and if there1220// are categories that we didn't process - generate a new __objc_catlist for1221// each.1222for (auto &mapEntry : catListToErasedOffsets) {1223ConcatInputSection *catListIsec = mapEntry.first;1224for (uint32_t catListIsecOffset = 0;1225catListIsecOffset < catListIsec->data.size();1226catListIsecOffset += target->wordSize) {1227// This slot was erased, we can just skip it1228if (mapEntry.second.count(catListIsecOffset))1229continue;12301231Defined *nonErasedCatBody =1232tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);1233assert(nonErasedCatBody && "Failed to relocate non-deleted category");12341235// Allocate data for the new __objc_catlist slot1236llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);12371238// We mark the __objc_catlist slot as belonging to the same file as the1239// category1240ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());12411242ConcatInputSection *listSec = make<ConcatInputSection>(1243*infoCategoryWriter.catListInfo.inputSection, bodyData,1244infoCategoryWriter.catListInfo.align);1245listSec->parent = infoCategoryWriter.catListInfo.outputSection;1246listSec->live = true;12471248std::string slotSymName = "<__objc_catlist slot for category ";1249slotSymName += nonErasedCatBody->getName();1250slotSymName += ">";12511252Defined *catListSlotSym = make<Defined>(1253newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,1254/*value=*/0, bodyData.size(),1255/*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,1256/*includeInSymtab=*/false, /*isReferencedDynamically=*/false,1257/*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);12581259catListSlotSym->used = true;1260objFile->symbols.push_back(catListSlotSym);1261addInputSection(listSec);12621263// Now link the category body into the newly created slot1264createSymbolReference(catListSlotSym, nonErasedCatBody, 0,1265infoCategoryWriter.catListInfo.relocTemplate);1266}1267}1268}12691270void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {1271isec->live = false;1272for (auto &sym : isec->symbols)1273sym->used = false;1274}12751276// This fully erases the merged categories, including their body, their names,1277// their method/protocol/prop lists and the __objc_catlist entries that link to1278// them.1279void ObjcCategoryMerger::eraseMergedCategories() {1280// Map of InputSection to a set of offsets of the categories that were merged1281MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;12821283for (auto &mapEntry : categoryMap) {1284for (InfoInputCategory &catInfo : mapEntry.second) {1285if (catInfo.wasMerged) {1286eraseISec(catInfo.catListIsec);1287catListToErasedOffsets[catInfo.catListIsec].insert(1288catInfo.offCatListIsec);1289}1290}1291}12921293// If there were categories that we did not erase, we need to generate a new1294// __objc_catList that contains only the un-merged categories, and get rid of1295// the references to the ones we merged.1296generateCatListForNonErasedCategories(catListToErasedOffsets);12971298// Erase the old method lists & names of the categories that were merged1299for (auto &mapEntry : categoryMap) {1300for (InfoInputCategory &catInfo : mapEntry.second) {1301if (!catInfo.wasMerged)1302continue;13031304eraseISec(catInfo.catBodyIsec);13051306// We can't erase 'catLayout.nameOffset' for either Swift or ObjC1307// categories because the name will sometimes also be used for other1308// purposes.1309// For Swift, see usages of 'l_.str.11.SimpleClass' in1310// objc-category-merging-swift.s1311// For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in1312// objc-category-merging-erase-objc-name-test.s1313// TODO: handle the above in a smarter way13141315tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,1316catLayout.instanceMethodsOffset);1317tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,1318catLayout.classMethodsOffset);1319tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,1320catLayout.protocolsOffset);1321tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,1322catLayout.classPropsOffset);1323tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,1324catLayout.instancePropsOffset);1325}1326}1327}13281329void ObjcCategoryMerger::doMerge() {1330collectAndValidateCategoriesData();13311332for (auto &[baseClass, catInfos] : categoryMap) {1333if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {1334// Merge all categories into the base class1335mergeCategoriesIntoBaseClass(baseClassDef, catInfos);1336} else if (catInfos.size() > 1) {1337// Merge all categories into a new, single category1338mergeCategoriesIntoSingleCategory(catInfos);1339}1340}13411342// Erase all categories that were merged1343eraseMergedCategories();1344}13451346void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }13471348StringRef ObjcCategoryMerger::newStringData(const char *str) {1349uint32_t len = strlen(str);1350uint32_t bufSize = len + 1;1351SmallVector<uint8_t> &data = newSectionData(bufSize);1352char *strData = reinterpret_cast<char *>(data.data());1353// Copy the string chars and null-terminator1354memcpy(strData, str, bufSize);1355return StringRef(strData, len);1356}13571358SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {1359generatedSectionData.push_back(1360std::make_unique<SmallVector<uint8_t>>(size, 0));1361return *generatedSectionData.back();1362}13631364} // namespace13651366void objc::mergeCategories() {1367TimeTraceScope timeScope("ObjcCategoryMerger");13681369ObjcCategoryMerger merger(inputSections);1370merger.doMerge();1371}13721373void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }13741375ObjcCategoryMerger::SourceLanguage1376ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {1377if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))1378return SourceLanguage::Swift;13791380// If the symbol name matches the ObjC prefix, we don't necessarely know this1381// comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift1382// classes. Ex:1383// .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass1384// .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass1385// .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN1386//1387// So we scan for symbols with the same address and check for the Swift class1388if (classSym->getName().starts_with(objc::symbol_names::klass)) {1389for (auto &sym : classSym->originalIsec->symbols)1390if (sym->value == classSym->value)1391if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))1392return SourceLanguage::Swift;1393return SourceLanguage::ObjC;1394}13951396llvm_unreachable("Unexpected class symbol name during category merging");1397}1398void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(1399const Defined *baseClass, std::vector<InfoInputCategory> &categories) {1400assert(categories.size() >= 1 && "Expected at least one category to merge");14011402// Collect all the info from the categories1403ClassExtensionInfo extInfo(catLayout);1404extInfo.baseClass = baseClass;1405extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);14061407for (auto &catInfo : categories) {1408parseCatInfoToExtInfo(catInfo, extInfo);1409}14101411// Get metadata for the base class1412Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);1413ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());1414Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);1415ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());14161417// Now collect the info from the base class from the various lists in the1418// class metadata14191420// Protocol lists are a special case - the same protocol list is in classRo1421// and metaRo, so we only need to parse it once1422parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,1423extInfo.protocols, extInfo.baseClassSourceLanguage);14241425// Check that the classRo and metaRo protocol lists are identical1426assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,1427extInfo.baseClassSourceLanguage) ==1428parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,1429extInfo.baseClassSourceLanguage) &&1430"Category merger expects classRo and metaRo to have the same protocol "1431"list");14321433parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,1434extInfo.classMethods);1435parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,1436extInfo.instanceMethods);14371438parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,1439extInfo.classProps);1440parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,1441extInfo.instanceProps);14421443// Erase the old lists - these will be generated and replaced1444eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);1445eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);1446eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);1447eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);1448eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);1449eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);14501451// Emit the newly merged lists - first into the meta RO then into the class RO1452// First we emit and link the protocol list into the meta RO. Then we link it1453// in the classRo as well (they're supposed to be identical)1454if (Defined *protoListSym =1455emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,1456extInfo, extInfo.protocols)) {1457createSymbolReference(classRo, protoListSym,1458roClassLayout.baseProtocolsOffset,1459infoCategoryWriter.catBodyInfo.relocTemplate);1460}14611462emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,1463extInfo.classMethods);1464emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,1465extInfo.instanceMethods);14661467emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,1468extInfo.classProps);14691470emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,1471extInfo.instanceProps);14721473// Mark all the categories as merged - this will be used to erase them later1474for (auto &catInfo : categories)1475catInfo.wasMerged = true;1476}14771478// Erase the symbol at a given offset in an InputSection1479void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,1480uint32_t offset) {1481Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);1482if (!sym)1483return;14841485// Remove the symbol from isec->symbols1486assert(isa<Defined>(sym) && "Can only erase a Defined");1487llvm::erase(isec->symbols, sym);14881489// Remove the relocs that refer to this symbol1490auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };1491llvm::erase_if(isec->relocs, removeAtOff);14921493// Now, if the symbol fully occupies a ConcatInputSection, we can also erase1494// the whole ConcatInputSection1495if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))1496if (cisec->data.size() == sym->size)1497eraseISec(cisec);1498}149915001501