Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/MachO/ObjC.cpp
34878 views
1
//===- ObjC.cpp -----------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "ObjC.h"
10
#include "ConcatOutputSection.h"
11
#include "InputFiles.h"
12
#include "InputSection.h"
13
#include "Layout.h"
14
#include "OutputSegment.h"
15
#include "SyntheticSections.h"
16
#include "Target.h"
17
18
#include "lld/Common/ErrorHandler.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/BinaryFormat/MachO.h"
21
#include "llvm/Bitcode/BitcodeReader.h"
22
#include "llvm/Support/TimeProfiler.h"
23
24
using namespace llvm;
25
using namespace llvm::MachO;
26
using namespace lld;
27
using namespace lld::macho;
28
29
template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30
using SectionHeader = typename LP::section;
31
32
auto *hdr =
33
reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34
if (hdr->magic != LP::magic)
35
return false;
36
37
if (const auto *c =
38
findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39
auto sectionHeaders = ArrayRef<SectionHeader>{
40
reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41
for (const SectionHeader &secHead : sectionHeaders) {
42
StringRef sectname(secHead.sectname,
43
strnlen(secHead.sectname, sizeof(secHead.sectname)));
44
StringRef segname(secHead.segname,
45
strnlen(secHead.segname, sizeof(secHead.segname)));
46
if ((segname == segment_names::data &&
47
sectname == section_names::objcCatList) ||
48
(segname == segment_names::text &&
49
sectname.starts_with(section_names::swift))) {
50
return true;
51
}
52
}
53
}
54
return false;
55
}
56
57
static bool objectHasObjCSection(MemoryBufferRef mb) {
58
if (target->wordSize == 8)
59
return ::objectHasObjCSection<LP64>(mb);
60
else
61
return ::objectHasObjCSection<ILP32>(mb);
62
}
63
64
bool macho::hasObjCSection(MemoryBufferRef mb) {
65
switch (identify_magic(mb.getBuffer())) {
66
case file_magic::macho_object:
67
return objectHasObjCSection(mb);
68
case file_magic::bitcode:
69
return check(isBitcodeContainingObjCCategory(mb));
70
default:
71
return false;
72
}
73
}
74
75
namespace {
76
77
#define FOR_EACH_CATEGORY_FIELD(DO) \
78
DO(Ptr, name) \
79
DO(Ptr, klass) \
80
DO(Ptr, instanceMethods) \
81
DO(Ptr, classMethods) \
82
DO(Ptr, protocols) \
83
DO(Ptr, instanceProps) \
84
DO(Ptr, classProps) \
85
DO(uint32_t, size)
86
87
CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88
89
#undef FOR_EACH_CATEGORY_FIELD
90
91
#define FOR_EACH_CLASS_FIELD(DO) \
92
DO(Ptr, metaClass) \
93
DO(Ptr, superClass) \
94
DO(Ptr, methodCache) \
95
DO(Ptr, vtable) \
96
DO(Ptr, roData)
97
98
CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99
100
#undef FOR_EACH_CLASS_FIELD
101
102
#define FOR_EACH_RO_CLASS_FIELD(DO) \
103
DO(uint32_t, flags) \
104
DO(uint32_t, instanceStart) \
105
DO(Ptr, instanceSize) \
106
DO(Ptr, ivarLayout) \
107
DO(Ptr, name) \
108
DO(Ptr, baseMethods) \
109
DO(Ptr, baseProtocols) \
110
DO(Ptr, ivars) \
111
DO(Ptr, weakIvarLayout) \
112
DO(Ptr, baseProperties)
113
114
CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115
116
#undef FOR_EACH_RO_CLASS_FIELD
117
118
#define FOR_EACH_LIST_HEADER(DO) \
119
DO(uint32_t, structSize) \
120
DO(uint32_t, structCount)
121
122
CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123
124
#undef FOR_EACH_LIST_HEADER
125
126
#define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127
128
CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129
130
#undef FOR_EACH_PROTOCOL_LIST_HEADER
131
132
#define FOR_EACH_METHOD(DO) \
133
DO(Ptr, name) \
134
DO(Ptr, type) \
135
DO(Ptr, impl)
136
137
CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138
139
#undef FOR_EACH_METHOD
140
141
enum MethodContainerKind {
142
MCK_Class,
143
MCK_Category,
144
};
145
146
struct MethodContainer {
147
MethodContainerKind kind;
148
const ConcatInputSection *isec;
149
};
150
151
enum MethodKind {
152
MK_Instance,
153
MK_Static,
154
};
155
156
struct ObjcClass {
157
DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158
DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159
};
160
161
} // namespace
162
163
class ObjcCategoryChecker {
164
public:
165
ObjcCategoryChecker();
166
void parseCategory(const ConcatInputSection *catListIsec);
167
168
private:
169
void parseClass(const Defined *classSym);
170
void parseMethods(const ConcatInputSection *methodsIsec,
171
const Symbol *methodContainer,
172
const ConcatInputSection *containerIsec,
173
MethodContainerKind, MethodKind);
174
175
CategoryLayout catLayout;
176
ClassLayout classLayout;
177
ROClassLayout roClassLayout;
178
ListHeaderLayout listHeaderLayout;
179
MethodLayout methodLayout;
180
181
DenseMap<const Symbol *, ObjcClass> classMap;
182
};
183
184
ObjcCategoryChecker::ObjcCategoryChecker()
185
: catLayout(target->wordSize), classLayout(target->wordSize),
186
roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187
methodLayout(target->wordSize) {}
188
189
// \p r must point to an offset within a CStringInputSection or a
190
// ConcatInputSection
191
static StringRef getReferentString(const Reloc &r) {
192
if (auto *isec = r.referent.dyn_cast<InputSection *>())
193
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
194
195
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
196
auto *symIsec = sym->isec();
197
auto symOffset = sym->value + r.addend;
198
199
if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
200
return s->getStringRefAtOffset(symOffset);
201
202
if (isa<ConcatInputSection>(symIsec)) {
203
auto strData = symIsec->data.slice(symOffset);
204
const char *pszData = reinterpret_cast<const char *>(strData.data());
205
return StringRef(pszData, strnlen(pszData, strData.size()));
206
}
207
208
llvm_unreachable("unknown reference section in getReferentString");
209
}
210
211
void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
212
const Symbol *methodContainerSym,
213
const ConcatInputSection *containerIsec,
214
MethodContainerKind mcKind,
215
MethodKind mKind) {
216
ObjcClass &klass = classMap[methodContainerSym];
217
for (const Reloc &r : methodsIsec->relocs) {
218
if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
219
methodLayout.nameOffset)
220
continue;
221
222
CachedHashStringRef methodName(getReferentString(r));
223
// +load methods are special: all implementations are called by the runtime
224
// even if they are part of the same class. Thus there is no need to check
225
// for duplicates.
226
// NOTE: Instead of specifically checking for this method name, ld64 simply
227
// checks whether a class / category is present in __objc_nlclslist /
228
// __objc_nlcatlist respectively. This will be the case if the class /
229
// category has a +load method. It skips optimizing the categories if there
230
// are multiple +load methods. Since it does dupe checking as part of the
231
// optimization process, this avoids spurious dupe messages around +load,
232
// but it also means that legit dupe issues for other methods are ignored.
233
if (mKind == MK_Static && methodName.val() == "load")
234
continue;
235
236
auto &methodMap =
237
mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
238
if (methodMap
239
.try_emplace(methodName, MethodContainer{mcKind, containerIsec})
240
.second)
241
continue;
242
243
// We have a duplicate; generate a warning message.
244
const auto &mc = methodMap.lookup(methodName);
245
const Reloc *nameReloc = nullptr;
246
if (mc.kind == MCK_Category) {
247
nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
248
} else {
249
assert(mc.kind == MCK_Class);
250
const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
251
->getReferentInputSection();
252
nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
253
}
254
StringRef containerName = getReferentString(*nameReloc);
255
StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
256
257
// We should only ever encounter collisions when parsing category methods
258
// (since the Class struct is parsed before any of its categories).
259
assert(mcKind == MCK_Category);
260
StringRef newCatName =
261
getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));
262
263
auto formatObjAndSrcFileName = [](const InputSection *section) {
264
lld::macho::InputFile *inputFile = section->getFile();
265
std::string result = toString(inputFile);
266
267
auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
268
if (objFile && objFile->compileUnit)
269
result += " (" + objFile->sourceFile() + ")";
270
271
return result;
272
};
273
274
StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
275
warn("method '" + methPrefix + methodName.val() +
276
"' has conflicting definitions:\n>>> defined in category " +
277
newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
278
"\n>>> defined in " + containerType + " " + containerName + " from " +
279
formatObjAndSrcFileName(mc.isec));
280
}
281
}
282
283
void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
284
auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
285
if (!classReloc)
286
return;
287
288
auto *classSym = classReloc->referent.get<Symbol *>();
289
if (auto *d = dyn_cast<Defined>(classSym))
290
if (!classMap.count(d))
291
parseClass(d);
292
293
if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
294
parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
295
classSym, catIsec, MCK_Category, MK_Static);
296
}
297
298
if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
299
parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
300
classSym, catIsec, MCK_Category, MK_Instance);
301
}
302
}
303
304
void ObjcCategoryChecker::parseClass(const Defined *classSym) {
305
// Given a Class struct, get its corresponding Methods struct
306
auto getMethodsIsec =
307
[&](const InputSection *classIsec) -> ConcatInputSection * {
308
if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
309
if (const auto *roIsec =
310
cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
311
if (const auto *r =
312
roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
313
if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
314
r->getReferentInputSection()))
315
return methodsIsec;
316
}
317
}
318
}
319
return nullptr;
320
};
321
322
const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
323
324
// Parse instance methods.
325
if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
326
parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
327
MK_Instance);
328
329
// Class methods are contained in the metaclass.
330
if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
331
if (const auto *classMethodsIsec = getMethodsIsec(
332
cast<ConcatInputSection>(r->getReferentInputSection())))
333
parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
334
}
335
336
void objc::checkCategories() {
337
TimeTraceScope timeScope("ObjcCategoryChecker");
338
339
ObjcCategoryChecker checker;
340
for (const InputSection *isec : inputSections) {
341
if (isec->getName() == section_names::objcCatList)
342
for (const Reloc &r : isec->relocs) {
343
auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
344
checker.parseCategory(catIsec);
345
}
346
}
347
}
348
349
namespace {
350
351
class ObjcCategoryMerger {
352
// In which language was a particular construct originally defined
353
enum SourceLanguage { Unknown, ObjC, Swift };
354
355
// Information about an input category
356
struct InfoInputCategory {
357
ConcatInputSection *catListIsec;
358
ConcatInputSection *catBodyIsec;
359
uint32_t offCatListIsec = 0;
360
SourceLanguage sourceLanguage = SourceLanguage::Unknown;
361
362
bool wasMerged = false;
363
};
364
365
// To write new (merged) categories or classes, we will try make limited
366
// assumptions about the alignment and the sections the various class/category
367
// info are stored in and . So we'll just reuse the same sections and
368
// alignment as already used in existing (input) categories. To do this we
369
// have InfoCategoryWriter which contains the various sections that the
370
// generated categories will be written to.
371
struct InfoWriteSection {
372
bool valid = false; // Data has been successfully collected from input
373
uint32_t align = 0;
374
Section *inputSection;
375
Reloc relocTemplate;
376
OutputSection *outputSection;
377
};
378
379
struct InfoCategoryWriter {
380
InfoWriteSection catListInfo;
381
InfoWriteSection catBodyInfo;
382
InfoWriteSection catNameInfo;
383
InfoWriteSection catPtrListInfo;
384
};
385
386
// Information about a pointer list in the original categories or class(method
387
// lists, protocol lists, etc)
388
struct PointerListInfo {
389
PointerListInfo() = default;
390
PointerListInfo(const PointerListInfo &) = default;
391
PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
392
: categoryPrefix(_categoryPrefix),
393
pointersPerStruct(_pointersPerStruct) {}
394
395
inline bool operator==(const PointerListInfo &cmp) const {
396
return pointersPerStruct == cmp.pointersPerStruct &&
397
structSize == cmp.structSize && structCount == cmp.structCount &&
398
allPtrs == cmp.allPtrs;
399
}
400
401
const char *categoryPrefix;
402
403
uint32_t pointersPerStruct = 0;
404
405
uint32_t structSize = 0;
406
uint32_t structCount = 0;
407
408
std::vector<Symbol *> allPtrs;
409
};
410
411
// Full information describing an ObjC class . This will include all the
412
// additional methods, protocols, and properties that are contained in the
413
// class and all the categories that extend a particular class.
414
struct ClassExtensionInfo {
415
ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
416
417
// Merged names of containers. Ex: base|firstCategory|secondCategory|...
418
std::string mergedContainerName;
419
std::string baseClassName;
420
const Symbol *baseClass = nullptr;
421
SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
422
423
CategoryLayout &catLayout;
424
425
// In case we generate new data, mark the new data as belonging to this file
426
ObjFile *objFileForMergeData = nullptr;
427
428
PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
429
/*pointersPerStruct=*/3};
430
PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
431
/*pointersPerStruct=*/3};
432
PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
433
/*pointersPerStruct=*/0};
434
PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
435
/*pointersPerStruct=*/2};
436
PointerListInfo classProps = {objc::symbol_names::klassPropList,
437
/*pointersPerStruct=*/2};
438
};
439
440
public:
441
ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
442
void doMerge();
443
static void doCleanup();
444
445
private:
446
DenseSet<const Symbol *> collectNlCategories();
447
void collectAndValidateCategoriesData();
448
void
449
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
450
451
void eraseISec(ConcatInputSection *isec);
452
void eraseMergedCategories();
453
454
void generateCatListForNonErasedCategories(
455
MapVector<ConcatInputSection *, std::set<uint64_t>>
456
catListToErasedOffsets);
457
void collectSectionWriteInfoFromIsec(const InputSection *isec,
458
InfoWriteSection &catWriteInfo);
459
void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
460
void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
461
ClassExtensionInfo &extInfo);
462
463
void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
464
PointerListInfo &ptrList,
465
SourceLanguage sourceLang);
466
467
PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
468
uint32_t secOffset,
469
SourceLanguage sourceLang);
470
471
void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
472
PointerListInfo &ptrList);
473
474
void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
475
const ClassExtensionInfo &extInfo,
476
const PointerListInfo &ptrList);
477
478
Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
479
const ClassExtensionInfo &extInfo,
480
const PointerListInfo &ptrList);
481
482
Defined *emitCategory(const ClassExtensionInfo &extInfo);
483
Defined *emitCatListEntrySec(const std::string &forCategoryName,
484
const std::string &forBaseClassName,
485
ObjFile *objFile);
486
Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
487
const Symbol *baseClassSym,
488
const std::string &baseClassName, ObjFile *objFile);
489
Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
490
void createSymbolReference(Defined *refFrom, const Symbol *refTo,
491
uint32_t offset, const Reloc &relocTemplate);
492
Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
493
Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
494
uint32_t offset);
495
Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
496
uint32_t offset);
497
Defined *getClassRo(const Defined *classSym, bool getMetaRo);
498
SourceLanguage getClassSymSourceLang(const Defined *classSym);
499
void mergeCategoriesIntoBaseClass(const Defined *baseClass,
500
std::vector<InfoInputCategory> &categories);
501
void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
502
void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
503
uint32_t offset);
504
505
// Allocate a null-terminated StringRef backed by generatedSectionData
506
StringRef newStringData(const char *str);
507
// Allocate section data, backed by generatedSectionData
508
SmallVector<uint8_t> &newSectionData(uint32_t size);
509
510
CategoryLayout catLayout;
511
ClassLayout classLayout;
512
ROClassLayout roClassLayout;
513
ListHeaderLayout listHeaderLayout;
514
MethodLayout methodLayout;
515
ProtocolListHeaderLayout protocolListHeaderLayout;
516
517
InfoCategoryWriter infoCategoryWriter;
518
std::vector<ConcatInputSection *> &allInputSections;
519
// Map of base class Symbol to list of InfoInputCategory's for it
520
MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
521
522
// Normally, the binary data comes from the input files, but since we're
523
// generating binary data ourselves, we use the below array to store it in.
524
// Need this to be 'static' so the data survives past the ObjcCategoryMerger
525
// object, as the data will be read by the Writer when the final binary is
526
// generated.
527
static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
528
generatedSectionData;
529
};
530
531
SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
532
ObjcCategoryMerger::generatedSectionData;
533
534
ObjcCategoryMerger::ObjcCategoryMerger(
535
std::vector<ConcatInputSection *> &_allInputSections)
536
: catLayout(target->wordSize), classLayout(target->wordSize),
537
roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
538
methodLayout(target->wordSize),
539
protocolListHeaderLayout(target->wordSize),
540
allInputSections(_allInputSections) {}
541
542
void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
543
const InputSection *isec, InfoWriteSection &catWriteInfo) {
544
545
catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
546
catWriteInfo.align = isec->align;
547
catWriteInfo.outputSection = isec->parent;
548
549
assert(catWriteInfo.outputSection &&
550
"outputSection may not be null in collectSectionWriteInfoFromIsec.");
551
552
if (isec->relocs.size())
553
catWriteInfo.relocTemplate = isec->relocs[0];
554
555
catWriteInfo.valid = true;
556
}
557
558
Symbol *
559
ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
560
uint32_t offset) {
561
if (!isec)
562
return nullptr;
563
const Reloc *reloc = isec->getRelocAt(offset);
564
565
if (!reloc)
566
return nullptr;
567
568
Symbol *sym = reloc->referent.get<Symbol *>();
569
570
if (reloc->addend) {
571
assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
572
Defined *definedSym = cast<Defined>(sym);
573
sym = tryFindDefinedOnIsec(definedSym->isec(),
574
definedSym->value + reloc->addend);
575
}
576
577
return sym;
578
}
579
580
Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
581
uint32_t offset) {
582
for (Defined *sym : isec->symbols)
583
if ((sym->value <= offset) && (sym->value + sym->size > offset))
584
return sym;
585
586
return nullptr;
587
}
588
589
Defined *
590
ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
591
uint32_t offset) {
592
Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
593
return dyn_cast_or_null<Defined>(sym);
594
}
595
596
// Get the class's ro_data symbol. If getMetaRo is true, then we will return
597
// the meta-class's ro_data symbol. Otherwise, we will return the class
598
// (instance) ro_data symbol.
599
Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
600
bool getMetaRo) {
601
ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
602
if (!isec)
603
return nullptr;
604
605
if (!getMetaRo)
606
return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
607
classSym->value);
608
609
Defined *metaClass = tryGetDefinedAtIsecOffset(
610
isec, classLayout.metaClassOffset + classSym->value);
611
if (!metaClass)
612
return nullptr;
613
614
return tryGetDefinedAtIsecOffset(
615
dyn_cast<ConcatInputSection>(metaClass->isec()),
616
classLayout.roDataOffset);
617
}
618
619
// Given an ConcatInputSection or CStringInputSection and an offset, if there is
620
// a symbol(Defined) at that offset, then erase the symbol (mark it not live)
621
void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
622
const ConcatInputSection *isec, uint32_t offset) {
623
const Reloc *reloc = isec->getRelocAt(offset);
624
625
if (!reloc)
626
return;
627
628
Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
629
if (!sym)
630
return;
631
632
if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
633
eraseISec(cisec);
634
else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
635
uint32_t totalOffset = sym->value + reloc->addend;
636
StringPiece &piece = csisec->getStringPiece(totalOffset);
637
piece.live = false;
638
} else {
639
llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
640
}
641
}
642
643
void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
644
const InfoInputCategory &catInfo) {
645
646
if (!infoCategoryWriter.catListInfo.valid)
647
collectSectionWriteInfoFromIsec(catInfo.catListIsec,
648
infoCategoryWriter.catListInfo);
649
if (!infoCategoryWriter.catBodyInfo.valid)
650
collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
651
infoCategoryWriter.catBodyInfo);
652
653
if (!infoCategoryWriter.catNameInfo.valid) {
654
lld::macho::Defined *catNameSym =
655
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
656
assert(catNameSym && "Category does not have a valid name Symbol");
657
658
collectSectionWriteInfoFromIsec(catNameSym->isec(),
659
infoCategoryWriter.catNameInfo);
660
}
661
662
// Collect writer info from all the category lists (we're assuming they all
663
// would provide the same info)
664
if (!infoCategoryWriter.catPtrListInfo.valid) {
665
for (uint32_t off = catLayout.instanceMethodsOffset;
666
off <= catLayout.classPropsOffset; off += target->wordSize) {
667
if (Defined *ptrList =
668
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
669
collectSectionWriteInfoFromIsec(ptrList->isec(),
670
infoCategoryWriter.catPtrListInfo);
671
// we've successfully collected data, so we can break
672
break;
673
}
674
}
675
}
676
}
677
678
// Parse a protocol list that might be linked to ConcatInputSection at a given
679
// offset. The format of the protocol list is different than other lists (prop
680
// lists, method lists) so we need to parse it differently
681
void ObjcCategoryMerger::parseProtocolListInfo(
682
const ConcatInputSection *isec, uint32_t secOffset,
683
PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
684
assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
685
"Tried to read pointer list beyond protocol section end");
686
687
const Reloc *reloc = isec->getRelocAt(secOffset);
688
if (!reloc)
689
return;
690
691
auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
692
assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
693
694
// Theoretically protocol count can be either 32b or 64b, depending on
695
// platform pointer size, but to simplify implementation we always just read
696
// the lower 32b which should be good enough.
697
uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
698
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
699
700
ptrList.structCount += protocolCount;
701
ptrList.structSize = target->wordSize;
702
703
[[maybe_unused]] uint32_t expectedListSize =
704
(protocolCount * target->wordSize) +
705
/*header(count)*/ protocolListHeaderLayout.totalSize +
706
/*extra null value*/ target->wordSize;
707
708
// On Swift, the protocol list does not have the extra (unnecessary) null
709
[[maybe_unused]] uint32_t expectedListSizeSwift =
710
expectedListSize - target->wordSize;
711
712
assert(((expectedListSize == ptrListSym->isec()->data.size() &&
713
sourceLang == SourceLanguage::ObjC) ||
714
(expectedListSizeSwift == ptrListSym->isec()->data.size() &&
715
sourceLang == SourceLanguage::Swift)) &&
716
"Protocol list does not match expected size");
717
718
uint32_t off = protocolListHeaderLayout.totalSize;
719
for (uint32_t inx = 0; inx < protocolCount; ++inx) {
720
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
721
assert(reloc && "No reloc found at protocol list offset");
722
723
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
724
assert(listSym && "Protocol list reloc does not have a valid Defined");
725
726
ptrList.allPtrs.push_back(listSym);
727
off += target->wordSize;
728
}
729
assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
730
"expected null terminating protocol");
731
assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
732
"Protocol list end offset does not match expected size");
733
}
734
735
// Parse a protocol list and return the PointerListInfo for it
736
ObjcCategoryMerger::PointerListInfo
737
ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
738
uint32_t secOffset,
739
SourceLanguage sourceLang) {
740
PointerListInfo ptrList;
741
parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
742
return ptrList;
743
}
744
745
// Parse a pointer list that might be linked to ConcatInputSection at a given
746
// offset. This can be used for instance methods, class methods, instance props
747
// and class props since they have the same format.
748
void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
749
uint32_t secOffset,
750
PointerListInfo &ptrList) {
751
assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
752
assert(isec && "Trying to parse pointer list from null isec");
753
assert(secOffset + target->wordSize <= isec->data.size() &&
754
"Trying to read pointer list beyond section end");
755
756
const Reloc *reloc = isec->getRelocAt(secOffset);
757
if (!reloc)
758
return;
759
760
auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
761
assert(ptrListSym && "Reloc does not have a valid Defined");
762
763
uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
764
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
765
uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
766
ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
767
assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
768
769
assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
770
771
ptrList.structCount += thisStructCount;
772
ptrList.structSize = thisStructSize;
773
774
uint32_t expectedListSize =
775
listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
776
assert(expectedListSize == ptrListSym->isec()->data.size() &&
777
"Pointer list does not match expected size");
778
779
for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
780
off += target->wordSize) {
781
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
782
assert(reloc && "No reloc found at pointer list offset");
783
784
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
785
assert(listSym && "Reloc does not have a valid Defined");
786
787
ptrList.allPtrs.push_back(listSym);
788
}
789
}
790
791
// Here we parse all the information of an input category (catInfo) and
792
// append the parsed info into the structure which will contain all the
793
// information about how a class is extended (extInfo)
794
void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
795
ClassExtensionInfo &extInfo) {
796
const Reloc *catNameReloc =
797
catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
798
799
// Parse name
800
assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
801
802
// is this the first category we are parsing?
803
if (extInfo.mergedContainerName.empty())
804
extInfo.objFileForMergeData =
805
dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
806
else
807
extInfo.mergedContainerName += "|";
808
809
assert(extInfo.objFileForMergeData &&
810
"Expected to already have valid objextInfo.objFileForMergeData");
811
812
StringRef catName = getReferentString(*catNameReloc);
813
extInfo.mergedContainerName += catName.str();
814
815
// Parse base class
816
if (!extInfo.baseClass) {
817
Symbol *classSym =
818
tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
819
assert(extInfo.baseClassName.empty());
820
extInfo.baseClass = classSym;
821
llvm::StringRef classPrefix(objc::symbol_names::klass);
822
assert(classSym->getName().starts_with(classPrefix) &&
823
"Base class symbol does not start with expected prefix");
824
extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
825
} else {
826
assert((extInfo.baseClass ==
827
tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
828
catLayout.klassOffset)) &&
829
"Trying to parse category info into container with different base "
830
"class");
831
}
832
833
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
834
extInfo.instanceMethods);
835
836
parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
837
extInfo.classMethods);
838
839
parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
840
extInfo.protocols, catInfo.sourceLanguage);
841
842
parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
843
extInfo.instanceProps);
844
845
parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
846
extInfo.classProps);
847
}
848
849
// Generate a protocol list (including header) and link it into the parent at
850
// the specified offset.
851
Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
852
Defined *parentSym, uint32_t linkAtOffset,
853
const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
854
if (ptrList.allPtrs.empty())
855
return nullptr;
856
857
assert(ptrList.allPtrs.size() == ptrList.structCount);
858
859
uint32_t bodySize = (ptrList.structCount * target->wordSize) +
860
/*header(count)*/ protocolListHeaderLayout.totalSize +
861
/*extra null value*/ target->wordSize;
862
llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
863
864
// This theoretically can be either 32b or 64b, but writing just the first 32b
865
// is good enough
866
const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
867
bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
868
869
*const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
870
871
ConcatInputSection *listSec = make<ConcatInputSection>(
872
*infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
873
infoCategoryWriter.catPtrListInfo.align);
874
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
875
listSec->live = true;
876
877
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
878
879
std::string symName = ptrList.categoryPrefix;
880
symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
881
882
Defined *ptrListSym = make<Defined>(
883
newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
884
listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
885
/*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
886
/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
887
/*isWeakDefCanBeHidden=*/false);
888
889
ptrListSym->used = true;
890
parentSym->getObjectFile()->symbols.push_back(ptrListSym);
891
addInputSection(listSec);
892
893
createSymbolReference(parentSym, ptrListSym, linkAtOffset,
894
infoCategoryWriter.catBodyInfo.relocTemplate);
895
896
uint32_t offset = protocolListHeaderLayout.totalSize;
897
for (Symbol *symbol : ptrList.allPtrs) {
898
createSymbolReference(ptrListSym, symbol, offset,
899
infoCategoryWriter.catPtrListInfo.relocTemplate);
900
offset += target->wordSize;
901
}
902
903
return ptrListSym;
904
}
905
906
// Generate a pointer list (including header) and link it into the parent at the
907
// specified offset. This is used for instance and class methods and
908
// proprieties.
909
void ObjcCategoryMerger::emitAndLinkPointerList(
910
Defined *parentSym, uint32_t linkAtOffset,
911
const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
912
if (ptrList.allPtrs.empty())
913
return;
914
915
assert(ptrList.allPtrs.size() * target->wordSize ==
916
ptrList.structCount * ptrList.structSize);
917
918
// Generate body
919
uint32_t bodySize =
920
listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
921
llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
922
923
const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
924
bodyData.data() + listHeaderLayout.structSizeOffset);
925
const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
926
bodyData.data() + listHeaderLayout.structCountOffset);
927
928
*const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
929
*const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
930
931
ConcatInputSection *listSec = make<ConcatInputSection>(
932
*infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
933
infoCategoryWriter.catPtrListInfo.align);
934
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
935
listSec->live = true;
936
937
listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
938
939
std::string symName = ptrList.categoryPrefix;
940
symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
941
942
Defined *ptrListSym = make<Defined>(
943
newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
944
listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
945
/*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
946
/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
947
/*isWeakDefCanBeHidden=*/false);
948
949
ptrListSym->used = true;
950
parentSym->getObjectFile()->symbols.push_back(ptrListSym);
951
addInputSection(listSec);
952
953
createSymbolReference(parentSym, ptrListSym, linkAtOffset,
954
infoCategoryWriter.catBodyInfo.relocTemplate);
955
956
uint32_t offset = listHeaderLayout.totalSize;
957
for (Symbol *symbol : ptrList.allPtrs) {
958
createSymbolReference(ptrListSym, symbol, offset,
959
infoCategoryWriter.catPtrListInfo.relocTemplate);
960
offset += target->wordSize;
961
}
962
}
963
964
// This method creates an __objc_catlist ConcatInputSection with a single slot
965
Defined *
966
ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
967
const std::string &forBaseClassName,
968
ObjFile *objFile) {
969
uint32_t sectionSize = target->wordSize;
970
llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
971
972
ConcatInputSection *newCatList =
973
make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
974
bodyData, infoCategoryWriter.catListInfo.align);
975
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
976
newCatList->live = true;
977
978
newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
979
980
std::string catSymName = "<__objc_catlist slot for merged category ";
981
catSymName += forBaseClassName + "(" + forCategoryName + ")>";
982
983
Defined *catListSym = make<Defined>(
984
newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
985
/*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
986
/*isPrivateExtern=*/false, /*includeInSymtab=*/false,
987
/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
988
/*isWeakDefCanBeHidden=*/false);
989
990
catListSym->used = true;
991
objFile->symbols.push_back(catListSym);
992
addInputSection(newCatList);
993
return catListSym;
994
}
995
996
// Here we generate the main category body and link the name and base class into
997
// it. We don't link any other info yet like the protocol and class/instance
998
// methods/props.
999
Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1000
const Defined *nameSym,
1001
const Symbol *baseClassSym,
1002
const std::string &baseClassName,
1003
ObjFile *objFile) {
1004
llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1005
1006
uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1007
catLayout.sizeOffset);
1008
*ptrSize = catLayout.totalSize;
1009
1010
ConcatInputSection *newBodySec =
1011
make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1012
bodyData, infoCategoryWriter.catBodyInfo.align);
1013
newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1014
newBodySec->live = true;
1015
1016
std::string symName =
1017
objc::symbol_names::category + baseClassName + "(" + name + ")";
1018
Defined *catBodySym = make<Defined>(
1019
newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1020
/*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1021
/*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1022
/*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1023
/*isWeakDefCanBeHidden=*/false);
1024
1025
catBodySym->used = true;
1026
objFile->symbols.push_back(catBodySym);
1027
addInputSection(newBodySec);
1028
1029
createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1030
infoCategoryWriter.catBodyInfo.relocTemplate);
1031
1032
// Create a reloc to the base class (either external or internal)
1033
createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1034
infoCategoryWriter.catBodyInfo.relocTemplate);
1035
1036
return catBodySym;
1037
}
1038
1039
// This writes the new category name (for the merged category) into the binary
1040
// and returns the sybmol for it.
1041
Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1042
ObjFile *objFile) {
1043
StringRef nameStrData = newStringData(name.c_str());
1044
// We use +1 below to include the null terminator
1045
llvm::ArrayRef<uint8_t> nameData(
1046
reinterpret_cast<const uint8_t *>(nameStrData.data()),
1047
nameStrData.size() + 1);
1048
1049
auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1050
CStringInputSection *newStringSec = make<CStringInputSection>(
1051
*infoCategoryWriter.catNameInfo.inputSection, nameData,
1052
infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1053
1054
parentSection->subsections.push_back({0, newStringSec});
1055
1056
newStringSec->splitIntoPieces();
1057
newStringSec->pieces[0].live = true;
1058
newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1059
in.cStringSection->addInput(newStringSec);
1060
assert(newStringSec->pieces.size() == 1);
1061
1062
Defined *catNameSym = make<Defined>(
1063
"<merged category name>", /*file=*/objFile, newStringSec,
1064
/*value=*/0, nameData.size(),
1065
/*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1066
/*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1067
/*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1068
1069
catNameSym->used = true;
1070
objFile->symbols.push_back(catNameSym);
1071
return catNameSym;
1072
}
1073
1074
// This method fully creates a new category from the given ClassExtensionInfo.
1075
// It creates the category name, body and method/protocol/prop lists and links
1076
// them all together. Then it creates a new __objc_catlist entry and adds the
1077
// category to it. Calling this method will fully generate a category which will
1078
// be available in the final binary.
1079
Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1080
Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1081
extInfo.objFileForMergeData);
1082
1083
Defined *catBodySym = emitCategoryBody(
1084
extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1085
extInfo.baseClassName, extInfo.objFileForMergeData);
1086
1087
Defined *catListSym =
1088
emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1089
extInfo.objFileForMergeData);
1090
1091
// Add the single category body to the category list at the offset 0.
1092
createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1093
infoCategoryWriter.catListInfo.relocTemplate);
1094
1095
emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1096
extInfo.instanceMethods);
1097
1098
emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1099
extInfo.classMethods);
1100
1101
emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1102
extInfo.protocols);
1103
1104
emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1105
extInfo.instanceProps);
1106
1107
emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1108
extInfo.classProps);
1109
1110
return catBodySym;
1111
}
1112
1113
// This method merges all the categories (sharing a base class) into a single
1114
// category.
1115
void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1116
std::vector<InfoInputCategory> &categories) {
1117
assert(categories.size() > 1 && "Expected at least 2 categories");
1118
1119
ClassExtensionInfo extInfo(catLayout);
1120
1121
for (auto &catInfo : categories)
1122
parseCatInfoToExtInfo(catInfo, extInfo);
1123
1124
Defined *newCatDef = emitCategory(extInfo);
1125
assert(newCatDef && "Failed to create a new category");
1126
1127
// Suppress unsuded var warning
1128
(void)newCatDef;
1129
1130
for (auto &catInfo : categories)
1131
catInfo.wasMerged = true;
1132
}
1133
1134
void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1135
const Symbol *refTo,
1136
uint32_t offset,
1137
const Reloc &relocTemplate) {
1138
Reloc r = relocTemplate;
1139
r.offset = offset;
1140
r.addend = 0;
1141
r.referent = const_cast<Symbol *>(refTo);
1142
refFrom->isec()->relocs.push_back(r);
1143
}
1144
1145
// Get the list of categories in the '__objc_nlcatlist' section. We can't
1146
// optimize these as they have a '+load' method that has to be called at
1147
// runtime.
1148
DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1149
DenseSet<const Symbol *> nlCategories;
1150
1151
for (InputSection *sec : allInputSections) {
1152
if (sec->getName() != section_names::objcNonLazyCatList)
1153
continue;
1154
1155
for (auto &r : sec->relocs) {
1156
const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1157
nlCategories.insert(sym);
1158
}
1159
}
1160
return nlCategories;
1161
}
1162
1163
void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1164
auto nlCategories = collectNlCategories();
1165
1166
for (InputSection *sec : allInputSections) {
1167
if (sec->getName() != section_names::objcCatList)
1168
continue;
1169
ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1170
assert(catListCisec &&
1171
"__objc_catList InputSection is not a ConcatInputSection");
1172
1173
for (uint32_t off = 0; off < catListCisec->getSize();
1174
off += target->wordSize) {
1175
Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1176
assert(categorySym &&
1177
"Failed to get a valid category at __objc_catlit offset");
1178
1179
if (nlCategories.count(categorySym))
1180
continue;
1181
1182
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1183
assert(catBodyIsec &&
1184
"Category data section is not an ConcatInputSection");
1185
1186
SourceLanguage eLang = SourceLanguage::Unknown;
1187
if (categorySym->getName().starts_with(objc::symbol_names::category))
1188
eLang = SourceLanguage::ObjC;
1189
else if (categorySym->getName().starts_with(
1190
objc::symbol_names::swift_objc_category))
1191
eLang = SourceLanguage::Swift;
1192
else
1193
llvm_unreachable("Unexpected category symbol name");
1194
1195
InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1196
1197
// Check that the category has a reloc at 'klassOffset' (which is
1198
// a pointer to the class symbol)
1199
1200
Symbol *classSym =
1201
tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1202
assert(classSym && "Category does not have a valid base class");
1203
1204
categoryMap[classSym].push_back(catInputInfo);
1205
1206
collectCategoryWriterInfoFromCategory(catInputInfo);
1207
}
1208
}
1209
}
1210
1211
// In the input we have multiple __objc_catlist InputSection, each of which may
1212
// contain links to multiple categories. Of these categories, we will merge (and
1213
// erase) only some. There will be some categories that will remain untouched
1214
// (not erased). For these not erased categories, we generate new __objc_catlist
1215
// entries since the parent __objc_catlist entry will be erased
1216
void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1217
const MapVector<ConcatInputSection *, std::set<uint64_t>>
1218
catListToErasedOffsets) {
1219
1220
// Go through all offsets of all __objc_catlist's that we process and if there
1221
// are categories that we didn't process - generate a new __objc_catlist for
1222
// each.
1223
for (auto &mapEntry : catListToErasedOffsets) {
1224
ConcatInputSection *catListIsec = mapEntry.first;
1225
for (uint32_t catListIsecOffset = 0;
1226
catListIsecOffset < catListIsec->data.size();
1227
catListIsecOffset += target->wordSize) {
1228
// This slot was erased, we can just skip it
1229
if (mapEntry.second.count(catListIsecOffset))
1230
continue;
1231
1232
Defined *nonErasedCatBody =
1233
tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1234
assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1235
1236
// Allocate data for the new __objc_catlist slot
1237
llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1238
1239
// We mark the __objc_catlist slot as belonging to the same file as the
1240
// category
1241
ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1242
1243
ConcatInputSection *listSec = make<ConcatInputSection>(
1244
*infoCategoryWriter.catListInfo.inputSection, bodyData,
1245
infoCategoryWriter.catListInfo.align);
1246
listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1247
listSec->live = true;
1248
1249
std::string slotSymName = "<__objc_catlist slot for category ";
1250
slotSymName += nonErasedCatBody->getName();
1251
slotSymName += ">";
1252
1253
Defined *catListSlotSym = make<Defined>(
1254
newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1255
/*value=*/0, bodyData.size(),
1256
/*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1257
/*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1258
/*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1259
1260
catListSlotSym->used = true;
1261
objFile->symbols.push_back(catListSlotSym);
1262
addInputSection(listSec);
1263
1264
// Now link the category body into the newly created slot
1265
createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1266
infoCategoryWriter.catListInfo.relocTemplate);
1267
}
1268
}
1269
}
1270
1271
void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1272
isec->live = false;
1273
for (auto &sym : isec->symbols)
1274
sym->used = false;
1275
}
1276
1277
// This fully erases the merged categories, including their body, their names,
1278
// their method/protocol/prop lists and the __objc_catlist entries that link to
1279
// them.
1280
void ObjcCategoryMerger::eraseMergedCategories() {
1281
// Map of InputSection to a set of offsets of the categories that were merged
1282
MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1283
1284
for (auto &mapEntry : categoryMap) {
1285
for (InfoInputCategory &catInfo : mapEntry.second) {
1286
if (catInfo.wasMerged) {
1287
eraseISec(catInfo.catListIsec);
1288
catListToErasedOffsets[catInfo.catListIsec].insert(
1289
catInfo.offCatListIsec);
1290
}
1291
}
1292
}
1293
1294
// If there were categories that we did not erase, we need to generate a new
1295
// __objc_catList that contains only the un-merged categories, and get rid of
1296
// the references to the ones we merged.
1297
generateCatListForNonErasedCategories(catListToErasedOffsets);
1298
1299
// Erase the old method lists & names of the categories that were merged
1300
for (auto &mapEntry : categoryMap) {
1301
for (InfoInputCategory &catInfo : mapEntry.second) {
1302
if (!catInfo.wasMerged)
1303
continue;
1304
1305
eraseISec(catInfo.catBodyIsec);
1306
1307
// We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1308
// categories because the name will sometimes also be used for other
1309
// purposes.
1310
// For Swift, see usages of 'l_.str.11.SimpleClass' in
1311
// objc-category-merging-swift.s
1312
// For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1313
// objc-category-merging-erase-objc-name-test.s
1314
// TODO: handle the above in a smarter way
1315
1316
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1317
catLayout.instanceMethodsOffset);
1318
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1319
catLayout.classMethodsOffset);
1320
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1321
catLayout.protocolsOffset);
1322
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1323
catLayout.classPropsOffset);
1324
tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1325
catLayout.instancePropsOffset);
1326
}
1327
}
1328
}
1329
1330
void ObjcCategoryMerger::doMerge() {
1331
collectAndValidateCategoriesData();
1332
1333
for (auto &[baseClass, catInfos] : categoryMap) {
1334
if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1335
// Merge all categories into the base class
1336
mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1337
} else if (catInfos.size() > 1) {
1338
// Merge all categories into a new, single category
1339
mergeCategoriesIntoSingleCategory(catInfos);
1340
}
1341
}
1342
1343
// Erase all categories that were merged
1344
eraseMergedCategories();
1345
}
1346
1347
void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1348
1349
StringRef ObjcCategoryMerger::newStringData(const char *str) {
1350
uint32_t len = strlen(str);
1351
uint32_t bufSize = len + 1;
1352
SmallVector<uint8_t> &data = newSectionData(bufSize);
1353
char *strData = reinterpret_cast<char *>(data.data());
1354
// Copy the string chars and null-terminator
1355
memcpy(strData, str, bufSize);
1356
return StringRef(strData, len);
1357
}
1358
1359
SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1360
generatedSectionData.push_back(
1361
std::make_unique<SmallVector<uint8_t>>(size, 0));
1362
return *generatedSectionData.back();
1363
}
1364
1365
} // namespace
1366
1367
void objc::mergeCategories() {
1368
TimeTraceScope timeScope("ObjcCategoryMerger");
1369
1370
ObjcCategoryMerger merger(inputSections);
1371
merger.doMerge();
1372
}
1373
1374
void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1375
1376
ObjcCategoryMerger::SourceLanguage
1377
ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1378
if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1379
return SourceLanguage::Swift;
1380
1381
// If the symbol name matches the ObjC prefix, we don't necessarely know this
1382
// comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1383
// classes. Ex:
1384
// .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1385
// .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1386
// .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1387
//
1388
// So we scan for symbols with the same address and check for the Swift class
1389
if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1390
for (auto &sym : classSym->originalIsec->symbols)
1391
if (sym->value == classSym->value)
1392
if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1393
return SourceLanguage::Swift;
1394
return SourceLanguage::ObjC;
1395
}
1396
1397
llvm_unreachable("Unexpected class symbol name during category merging");
1398
}
1399
void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1400
const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1401
assert(categories.size() >= 1 && "Expected at least one category to merge");
1402
1403
// Collect all the info from the categories
1404
ClassExtensionInfo extInfo(catLayout);
1405
extInfo.baseClass = baseClass;
1406
extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1407
1408
for (auto &catInfo : categories) {
1409
parseCatInfoToExtInfo(catInfo, extInfo);
1410
}
1411
1412
// Get metadata for the base class
1413
Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1414
ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1415
Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1416
ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1417
1418
// Now collect the info from the base class from the various lists in the
1419
// class metadata
1420
1421
// Protocol lists are a special case - the same protocol list is in classRo
1422
// and metaRo, so we only need to parse it once
1423
parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1424
extInfo.protocols, extInfo.baseClassSourceLanguage);
1425
1426
// Check that the classRo and metaRo protocol lists are identical
1427
assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1428
extInfo.baseClassSourceLanguage) ==
1429
parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1430
extInfo.baseClassSourceLanguage) &&
1431
"Category merger expects classRo and metaRo to have the same protocol "
1432
"list");
1433
1434
parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1435
extInfo.classMethods);
1436
parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1437
extInfo.instanceMethods);
1438
1439
parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1440
extInfo.classProps);
1441
parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1442
extInfo.instanceProps);
1443
1444
// Erase the old lists - these will be generated and replaced
1445
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1446
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1447
eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1448
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1449
eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1450
eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1451
1452
// Emit the newly merged lists - first into the meta RO then into the class RO
1453
// First we emit and link the protocol list into the meta RO. Then we link it
1454
// in the classRo as well (they're supposed to be identical)
1455
if (Defined *protoListSym =
1456
emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1457
extInfo, extInfo.protocols)) {
1458
createSymbolReference(classRo, protoListSym,
1459
roClassLayout.baseProtocolsOffset,
1460
infoCategoryWriter.catBodyInfo.relocTemplate);
1461
}
1462
1463
emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1464
extInfo.classMethods);
1465
emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1466
extInfo.instanceMethods);
1467
1468
emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1469
extInfo.classProps);
1470
1471
emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1472
extInfo.instanceProps);
1473
1474
// Mark all the categories as merged - this will be used to erase them later
1475
for (auto &catInfo : categories)
1476
catInfo.wasMerged = true;
1477
}
1478
1479
// Erase the symbol at a given offset in an InputSection
1480
void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1481
uint32_t offset) {
1482
Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1483
if (!sym)
1484
return;
1485
1486
// Remove the symbol from isec->symbols
1487
assert(isa<Defined>(sym) && "Can only erase a Defined");
1488
llvm::erase(isec->symbols, sym);
1489
1490
// Remove the relocs that refer to this symbol
1491
auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1492
llvm::erase_if(isec->relocs, removeAtOff);
1493
1494
// Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1495
// the whole ConcatInputSection
1496
if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1497
if (cisec->data.size() == sym->size)
1498
eraseISec(cisec);
1499
}
1500
1501