Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
35269 views
1
//===- GsymCreator.cpp ----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//===----------------------------------------------------------------------===//
7
8
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
9
#include "llvm/DebugInfo/GSYM/FileWriter.h"
10
#include "llvm/DebugInfo/GSYM/Header.h"
11
#include "llvm/DebugInfo/GSYM/LineTable.h"
12
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
13
#include "llvm/MC/StringTableBuilder.h"
14
#include "llvm/Support/raw_ostream.h"
15
16
#include <algorithm>
17
#include <cassert>
18
#include <functional>
19
#include <vector>
20
21
using namespace llvm;
22
using namespace gsym;
23
24
GsymCreator::GsymCreator(bool Quiet)
25
: StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
26
insertFile(StringRef());
27
}
28
29
uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
30
llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
31
llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
32
// We must insert the strings first, then call the FileEntry constructor.
33
// If we inline the insertString() function call into the constructor, the
34
// call order is undefined due to parameter lists not having any ordering
35
// requirements.
36
const uint32_t Dir = insertString(directory);
37
const uint32_t Base = insertString(filename);
38
return insertFileEntry(FileEntry(Dir, Base));
39
}
40
41
uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
42
std::lock_guard<std::mutex> Guard(Mutex);
43
const auto NextIndex = Files.size();
44
// Find FE in hash map and insert if not present.
45
auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
46
if (R.second)
47
Files.emplace_back(FE);
48
return R.first->second;
49
}
50
51
uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
52
// File index zero is reserved for a FileEntry with no directory and no
53
// filename. Any other file and we need to copy the strings for the directory
54
// and filename.
55
if (FileIdx == 0)
56
return 0;
57
const FileEntry SrcFE = SrcGC.Files[FileIdx];
58
// Copy the strings for the file and then add the newly converted file entry.
59
uint32_t Dir =
60
SrcFE.Dir == 0
61
? 0
62
: StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
63
uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
64
FileEntry DstFE(Dir, Base);
65
return insertFileEntry(DstFE);
66
}
67
68
llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
69
std::optional<uint64_t> SegmentSize) const {
70
if (SegmentSize)
71
return saveSegments(Path, ByteOrder, *SegmentSize);
72
std::error_code EC;
73
raw_fd_ostream OutStrm(Path, EC);
74
if (EC)
75
return llvm::errorCodeToError(EC);
76
FileWriter O(OutStrm, ByteOrder);
77
return encode(O);
78
}
79
80
llvm::Error GsymCreator::encode(FileWriter &O) const {
81
std::lock_guard<std::mutex> Guard(Mutex);
82
if (Funcs.empty())
83
return createStringError(std::errc::invalid_argument,
84
"no functions to encode");
85
if (!Finalized)
86
return createStringError(std::errc::invalid_argument,
87
"GsymCreator wasn't finalized prior to encoding");
88
89
if (Funcs.size() > UINT32_MAX)
90
return createStringError(std::errc::invalid_argument,
91
"too many FunctionInfos");
92
93
std::optional<uint64_t> BaseAddress = getBaseAddress();
94
// Base address should be valid if we have any functions.
95
if (!BaseAddress)
96
return createStringError(std::errc::invalid_argument,
97
"invalid base address");
98
Header Hdr;
99
Hdr.Magic = GSYM_MAGIC;
100
Hdr.Version = GSYM_VERSION;
101
Hdr.AddrOffSize = getAddressOffsetSize();
102
Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
103
Hdr.BaseAddress = *BaseAddress;
104
Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
105
Hdr.StrtabOffset = 0; // We will fix this up later.
106
Hdr.StrtabSize = 0; // We will fix this up later.
107
memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
108
if (UUID.size() > sizeof(Hdr.UUID))
109
return createStringError(std::errc::invalid_argument,
110
"invalid UUID size %u", (uint32_t)UUID.size());
111
// Copy the UUID value if we have one.
112
if (UUID.size() > 0)
113
memcpy(Hdr.UUID, UUID.data(), UUID.size());
114
// Write out the header.
115
llvm::Error Err = Hdr.encode(O);
116
if (Err)
117
return Err;
118
119
const uint64_t MaxAddressOffset = getMaxAddressOffset();
120
// Write out the address offsets.
121
O.alignTo(Hdr.AddrOffSize);
122
for (const auto &FuncInfo : Funcs) {
123
uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
124
// Make sure we calculated the address offsets byte size correctly by
125
// verifying the current address offset is within ranges. We have seen bugs
126
// introduced when the code changes that can cause problems here so it is
127
// good to catch this during testing.
128
assert(AddrOffset <= MaxAddressOffset);
129
(void)MaxAddressOffset;
130
switch (Hdr.AddrOffSize) {
131
case 1:
132
O.writeU8(static_cast<uint8_t>(AddrOffset));
133
break;
134
case 2:
135
O.writeU16(static_cast<uint16_t>(AddrOffset));
136
break;
137
case 4:
138
O.writeU32(static_cast<uint32_t>(AddrOffset));
139
break;
140
case 8:
141
O.writeU64(AddrOffset);
142
break;
143
}
144
}
145
146
// Write out all zeros for the AddrInfoOffsets.
147
O.alignTo(4);
148
const off_t AddrInfoOffsetsOffset = O.tell();
149
for (size_t i = 0, n = Funcs.size(); i < n; ++i)
150
O.writeU32(0);
151
152
// Write out the file table
153
O.alignTo(4);
154
assert(!Files.empty());
155
assert(Files[0].Dir == 0);
156
assert(Files[0].Base == 0);
157
size_t NumFiles = Files.size();
158
if (NumFiles > UINT32_MAX)
159
return createStringError(std::errc::invalid_argument, "too many files");
160
O.writeU32(static_cast<uint32_t>(NumFiles));
161
for (auto File : Files) {
162
O.writeU32(File.Dir);
163
O.writeU32(File.Base);
164
}
165
166
// Write out the string table.
167
const off_t StrtabOffset = O.tell();
168
StrTab.write(O.get_stream());
169
const off_t StrtabSize = O.tell() - StrtabOffset;
170
std::vector<uint32_t> AddrInfoOffsets;
171
172
// Write out the address infos for each function info.
173
for (const auto &FuncInfo : Funcs) {
174
if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
175
AddrInfoOffsets.push_back(OffsetOrErr.get());
176
else
177
return OffsetOrErr.takeError();
178
}
179
// Fixup the string table offset and size in the header
180
O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
181
O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
182
183
// Fixup all address info offsets
184
uint64_t Offset = 0;
185
for (auto AddrInfoOffset : AddrInfoOffsets) {
186
O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
187
Offset += 4;
188
}
189
return ErrorSuccess();
190
}
191
192
llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
193
std::lock_guard<std::mutex> Guard(Mutex);
194
if (Finalized)
195
return createStringError(std::errc::invalid_argument, "already finalized");
196
Finalized = true;
197
198
// Don't let the string table indexes change by finalizing in order.
199
StrTab.finalizeInOrder();
200
201
// Remove duplicates function infos that have both entries from debug info
202
// (DWARF or Breakpad) and entries from the SymbolTable.
203
//
204
// Also handle overlapping function. Usually there shouldn't be any, but they
205
// can and do happen in some rare cases.
206
//
207
// (a) (b) (c)
208
// ^ ^ ^ ^
209
// |X |Y |X ^ |X
210
// | | | |Y | ^
211
// | | | v v |Y
212
// v v v v
213
//
214
// In (a) and (b), Y is ignored and X will be reported for the full range.
215
// In (c), both functions will be included in the result and lookups for an
216
// address in the intersection will return Y because of binary search.
217
//
218
// Note that in case of (b), we cannot include Y in the result because then
219
// we wouldn't find any function for range (end of Y, end of X)
220
// with binary search
221
222
const auto NumBefore = Funcs.size();
223
// Only sort and unique if this isn't a segment. If this is a segment we
224
// already finalized the main GsymCreator with all of the function infos
225
// and then the already sorted and uniqued function infos were added to this
226
// object.
227
if (!IsSegment) {
228
if (NumBefore > 1) {
229
// Sort function infos so we can emit sorted functions.
230
llvm::sort(Funcs);
231
std::vector<FunctionInfo> FinalizedFuncs;
232
FinalizedFuncs.reserve(Funcs.size());
233
FinalizedFuncs.emplace_back(std::move(Funcs.front()));
234
for (size_t Idx=1; Idx < NumBefore; ++Idx) {
235
FunctionInfo &Prev = FinalizedFuncs.back();
236
FunctionInfo &Curr = Funcs[Idx];
237
// Empty ranges won't intersect, but we still need to
238
// catch the case where we have multiple symbols at the
239
// same address and coalesce them.
240
const bool ranges_equal = Prev.Range == Curr.Range;
241
if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
242
// Overlapping ranges or empty identical ranges.
243
if (ranges_equal) {
244
// Same address range. Check if one is from debug
245
// info and the other is from a symbol table. If
246
// so, then keep the one with debug info. Our
247
// sorting guarantees that entries with matching
248
// address ranges that have debug info are last in
249
// the sort.
250
if (!(Prev == Curr)) {
251
if (Prev.hasRichInfo() && Curr.hasRichInfo())
252
Out.Report(
253
"Duplicate address ranges with different debug info.",
254
[&](raw_ostream &OS) {
255
OS << "warning: same address range contains "
256
"different debug "
257
<< "info. Removing:\n"
258
<< Prev << "\nIn favor of this one:\n"
259
<< Curr << "\n";
260
});
261
262
// We want to swap the current entry with the previous since
263
// later entries with the same range always have more debug info
264
// or different debug info.
265
std::swap(Prev, Curr);
266
}
267
} else {
268
Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
269
// print warnings about overlaps
270
OS << "warning: function ranges overlap:\n"
271
<< Prev << "\n"
272
<< Curr << "\n";
273
});
274
FinalizedFuncs.emplace_back(std::move(Curr));
275
}
276
} else {
277
if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
278
// Symbols on macOS don't have address ranges, so if the range
279
// doesn't match and the size is zero, then we replace the empty
280
// symbol function info with the current one.
281
std::swap(Prev, Curr);
282
} else {
283
FinalizedFuncs.emplace_back(std::move(Curr));
284
}
285
}
286
}
287
std::swap(Funcs, FinalizedFuncs);
288
}
289
// If our last function info entry doesn't have a size and if we have valid
290
// text ranges, we should set the size of the last entry since any search for
291
// a high address might match our last entry. By fixing up this size, we can
292
// help ensure we don't cause lookups to always return the last symbol that
293
// has no size when doing lookups.
294
if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
295
if (auto Range =
296
ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
297
Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
298
}
299
}
300
Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
301
<< Funcs.size() << " total\n";
302
}
303
return Error::success();
304
}
305
306
uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
307
// String offset at zero is always the empty string, no copying needed.
308
if (StrOff == 0)
309
return 0;
310
return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
311
}
312
313
uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
314
if (S.empty())
315
return 0;
316
317
// The hash can be calculated outside the lock.
318
CachedHashStringRef CHStr(S);
319
std::lock_guard<std::mutex> Guard(Mutex);
320
if (Copy) {
321
// We need to provide backing storage for the string if requested
322
// since StringTableBuilder stores references to strings. Any string
323
// that comes from a section in an object file doesn't need to be
324
// copied, but any string created by code will need to be copied.
325
// This allows GsymCreator to be really fast when parsing DWARF and
326
// other object files as most strings don't need to be copied.
327
if (!StrTab.contains(CHStr))
328
CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
329
CHStr.hash()};
330
}
331
const uint32_t StrOff = StrTab.add(CHStr);
332
// Save a mapping of string offsets to the cached string reference in case
333
// we need to segment the GSYM file and copy string from one string table to
334
// another.
335
if (StringOffsetMap.count(StrOff) == 0)
336
StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
337
return StrOff;
338
}
339
340
void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
341
std::lock_guard<std::mutex> Guard(Mutex);
342
Funcs.emplace_back(std::move(FI));
343
}
344
345
void GsymCreator::forEachFunctionInfo(
346
std::function<bool(FunctionInfo &)> const &Callback) {
347
std::lock_guard<std::mutex> Guard(Mutex);
348
for (auto &FI : Funcs) {
349
if (!Callback(FI))
350
break;
351
}
352
}
353
354
void GsymCreator::forEachFunctionInfo(
355
std::function<bool(const FunctionInfo &)> const &Callback) const {
356
std::lock_guard<std::mutex> Guard(Mutex);
357
for (const auto &FI : Funcs) {
358
if (!Callback(FI))
359
break;
360
}
361
}
362
363
size_t GsymCreator::getNumFunctionInfos() const {
364
std::lock_guard<std::mutex> Guard(Mutex);
365
return Funcs.size();
366
}
367
368
bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
369
if (ValidTextRanges)
370
return ValidTextRanges->contains(Addr);
371
return true; // No valid text ranges has been set, so accept all ranges.
372
}
373
374
std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
375
// If we have finalized then Funcs are sorted. If we are a segment then
376
// Funcs will be sorted as well since function infos get added from an
377
// already finalized GsymCreator object where its functions were sorted and
378
// uniqued.
379
if ((Finalized || IsSegment) && !Funcs.empty())
380
return std::optional<uint64_t>(Funcs.front().startAddress());
381
return std::nullopt;
382
}
383
384
std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
385
// If we have finalized then Funcs are sorted. If we are a segment then
386
// Funcs will be sorted as well since function infos get added from an
387
// already finalized GsymCreator object where its functions were sorted and
388
// uniqued.
389
if ((Finalized || IsSegment) && !Funcs.empty())
390
return std::optional<uint64_t>(Funcs.back().startAddress());
391
return std::nullopt;
392
}
393
394
std::optional<uint64_t> GsymCreator::getBaseAddress() const {
395
if (BaseAddress)
396
return BaseAddress;
397
return getFirstFunctionAddress();
398
}
399
400
uint64_t GsymCreator::getMaxAddressOffset() const {
401
switch (getAddressOffsetSize()) {
402
case 1: return UINT8_MAX;
403
case 2: return UINT16_MAX;
404
case 4: return UINT32_MAX;
405
case 8: return UINT64_MAX;
406
}
407
llvm_unreachable("invalid address offset");
408
}
409
410
uint8_t GsymCreator::getAddressOffsetSize() const {
411
const std::optional<uint64_t> BaseAddress = getBaseAddress();
412
const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
413
if (BaseAddress && LastFuncAddr) {
414
const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
415
if (AddrDelta <= UINT8_MAX)
416
return 1;
417
else if (AddrDelta <= UINT16_MAX)
418
return 2;
419
else if (AddrDelta <= UINT32_MAX)
420
return 4;
421
return 8;
422
}
423
return 1;
424
}
425
426
uint64_t GsymCreator::calculateHeaderAndTableSize() const {
427
uint64_t Size = sizeof(Header);
428
const size_t NumFuncs = Funcs.size();
429
// Add size of address offset table
430
Size += NumFuncs * getAddressOffsetSize();
431
// Add size of address info offsets which are 32 bit integers in version 1.
432
Size += NumFuncs * sizeof(uint32_t);
433
// Add file table size
434
Size += Files.size() * sizeof(FileEntry);
435
// Add string table size
436
Size += StrTab.getSize();
437
438
return Size;
439
}
440
441
// This function takes a InlineInfo class that was copy constructed from an
442
// InlineInfo from the \a SrcGC and updates all members that point to strings
443
// and files to point to strings and files from this GsymCreator.
444
void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
445
II.Name = copyString(SrcGC, II.Name);
446
II.CallFile = copyFile(SrcGC, II.CallFile);
447
for (auto &ChildII: II.Children)
448
fixupInlineInfo(SrcGC, ChildII);
449
}
450
451
uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
452
// To copy a function info we need to copy any files and strings over into
453
// this GsymCreator and then copy the function info and update the string
454
// table offsets to match the new offsets.
455
const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
456
457
FunctionInfo DstFI;
458
DstFI.Range = SrcFI.Range;
459
DstFI.Name = copyString(SrcGC, SrcFI.Name);
460
// Copy the line table if there is one.
461
if (SrcFI.OptLineTable) {
462
// Copy the entire line table.
463
DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
464
// Fixup all LineEntry::File entries which are indexes in the the file table
465
// from SrcGC and must be converted to file indexes from this GsymCreator.
466
LineTable &DstLT = DstFI.OptLineTable.value();
467
const size_t NumLines = DstLT.size();
468
for (size_t I=0; I<NumLines; ++I) {
469
LineEntry &LE = DstLT.get(I);
470
LE.File = copyFile(SrcGC, LE.File);
471
}
472
}
473
// Copy the inline information if needed.
474
if (SrcFI.Inline) {
475
// Make a copy of the source inline information.
476
DstFI.Inline = SrcFI.Inline.value();
477
// Fixup all strings and files in the copied inline information.
478
fixupInlineInfo(SrcGC, *DstFI.Inline);
479
}
480
std::lock_guard<std::mutex> Guard(Mutex);
481
Funcs.emplace_back(DstFI);
482
return Funcs.back().cacheEncoding();
483
}
484
485
llvm::Error GsymCreator::saveSegments(StringRef Path,
486
llvm::endianness ByteOrder,
487
uint64_t SegmentSize) const {
488
if (SegmentSize == 0)
489
return createStringError(std::errc::invalid_argument,
490
"invalid segment size zero");
491
492
size_t FuncIdx = 0;
493
const size_t NumFuncs = Funcs.size();
494
while (FuncIdx < NumFuncs) {
495
llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
496
createSegment(SegmentSize, FuncIdx);
497
if (ExpectedGC) {
498
GsymCreator *GC = ExpectedGC->get();
499
if (GC == NULL)
500
break; // We had not more functions to encode.
501
// Don't collect any messages at all
502
OutputAggregator Out(nullptr);
503
llvm::Error Err = GC->finalize(Out);
504
if (Err)
505
return Err;
506
std::string SegmentedGsymPath;
507
raw_string_ostream SGP(SegmentedGsymPath);
508
std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
509
if (FirstFuncAddr) {
510
SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
511
SGP.flush();
512
Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
513
if (Err)
514
return Err;
515
}
516
} else {
517
return ExpectedGC.takeError();
518
}
519
}
520
return Error::success();
521
}
522
523
llvm::Expected<std::unique_ptr<GsymCreator>>
524
GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
525
// No function entries, return empty unique pointer
526
if (FuncIdx >= Funcs.size())
527
return std::unique_ptr<GsymCreator>();
528
529
std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
530
531
// Tell the creator that this is a segment.
532
GC->setIsSegment();
533
534
// Set the base address if there is one.
535
if (BaseAddress)
536
GC->setBaseAddress(*BaseAddress);
537
// Copy the UUID value from this object into the new creator.
538
GC->setUUID(UUID);
539
const size_t NumFuncs = Funcs.size();
540
// Track how big the function infos are for the current segment so we can
541
// emit segments that are close to the requested size. It is quick math to
542
// determine the current header and tables sizes, so we can do that each loop.
543
uint64_t SegmentFuncInfosSize = 0;
544
for (; FuncIdx < NumFuncs; ++FuncIdx) {
545
const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
546
if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
547
if (SegmentFuncInfosSize == 0)
548
return createStringError(std::errc::invalid_argument,
549
"a segment size of %" PRIu64 " is to small to "
550
"fit any function infos, specify a larger value",
551
SegmentSize);
552
553
break;
554
}
555
SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
556
}
557
return std::move(GC);
558
}
559
560