Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp
35233 views
1
//===--- DLangDemangle.cpp ------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file defines a demangler for the D programming language as specified
11
/// in the ABI specification, available at:
12
/// https://dlang.org/spec/abi.html#name_mangling
13
///
14
//===----------------------------------------------------------------------===//
15
16
#include "llvm/Demangle/Demangle.h"
17
#include "llvm/Demangle/StringViewExtras.h"
18
#include "llvm/Demangle/Utility.h"
19
20
#include <cctype>
21
#include <cstring>
22
#include <limits>
23
#include <string_view>
24
25
using namespace llvm;
26
using llvm::itanium_demangle::OutputBuffer;
27
using llvm::itanium_demangle::starts_with;
28
29
namespace {
30
31
/// Demangle information structure.
32
struct Demangler {
33
/// Initialize the information structure we use to pass around information.
34
///
35
/// \param Mangled String to demangle.
36
Demangler(std::string_view Mangled);
37
38
/// Extract and demangle the mangled symbol and append it to the output
39
/// string.
40
///
41
/// \param Demangled Output buffer to write the demangled name.
42
///
43
/// \return The remaining string on success or nullptr on failure.
44
///
45
/// \see https://dlang.org/spec/abi.html#name_mangling .
46
/// \see https://dlang.org/spec/abi.html#MangledName .
47
const char *parseMangle(OutputBuffer *Demangled);
48
49
private:
50
/// Extract and demangle a given mangled symbol and append it to the output
51
/// string.
52
///
53
/// \param Demangled output buffer to write the demangled name.
54
/// \param Mangled mangled symbol to be demangled.
55
///
56
/// \see https://dlang.org/spec/abi.html#name_mangling .
57
/// \see https://dlang.org/spec/abi.html#MangledName .
58
void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
59
60
/// Extract the number from a given string.
61
///
62
/// \param Mangled string to extract the number.
63
/// \param Ret assigned result value.
64
///
65
/// \note Ret larger than UINT_MAX is considered a failure.
66
///
67
/// \see https://dlang.org/spec/abi.html#Number .
68
void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
69
70
/// Extract the back reference position from a given string.
71
///
72
/// \param Mangled string to extract the back reference position.
73
/// \param Ret assigned result value.
74
///
75
/// \return true on success, false on error.
76
///
77
/// \note Ret is always >= 0 on success, and unspecified on failure
78
///
79
/// \see https://dlang.org/spec/abi.html#back_ref .
80
/// \see https://dlang.org/spec/abi.html#NumberBackRef .
81
bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
82
83
/// Extract the symbol pointed by the back reference form a given string.
84
///
85
/// \param Mangled string to extract the back reference position.
86
/// \param Ret assigned result value.
87
///
88
/// \return true on success, false on error.
89
///
90
/// \see https://dlang.org/spec/abi.html#back_ref .
91
bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
92
93
/// Extract and demangle backreferenced symbol from a given mangled symbol
94
/// and append it to the output string.
95
///
96
/// \param Demangled output buffer to write the demangled name.
97
/// \param Mangled mangled symbol to be demangled.
98
///
99
/// \see https://dlang.org/spec/abi.html#back_ref .
100
/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
101
void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
102
103
/// Extract and demangle backreferenced type from a given mangled symbol
104
/// and append it to the output string.
105
///
106
/// \param Mangled mangled symbol to be demangled.
107
///
108
/// \see https://dlang.org/spec/abi.html#back_ref .
109
/// \see https://dlang.org/spec/abi.html#TypeBackRef .
110
void parseTypeBackref(std::string_view &Mangled);
111
112
/// Check whether it is the beginning of a symbol name.
113
///
114
/// \param Mangled string to extract the symbol name.
115
///
116
/// \return true on success, false otherwise.
117
///
118
/// \see https://dlang.org/spec/abi.html#SymbolName .
119
bool isSymbolName(std::string_view Mangled);
120
121
/// Extract and demangle an identifier from a given mangled symbol append it
122
/// to the output string.
123
///
124
/// \param Demangled Output buffer to write the demangled name.
125
/// \param Mangled Mangled symbol to be demangled.
126
///
127
/// \see https://dlang.org/spec/abi.html#SymbolName .
128
void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
129
130
/// Extract and demangle the plain identifier from a given mangled symbol and
131
/// prepend/append it to the output string, with a special treatment for some
132
/// magic compiler generated symbols.
133
///
134
/// \param Demangled Output buffer to write the demangled name.
135
/// \param Mangled Mangled symbol to be demangled.
136
/// \param Len Length of the mangled symbol name.
137
///
138
/// \see https://dlang.org/spec/abi.html#LName .
139
void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
140
unsigned long Len);
141
142
/// Extract and demangle the qualified symbol from a given mangled symbol
143
/// append it to the output string.
144
///
145
/// \param Demangled Output buffer to write the demangled name.
146
/// \param Mangled Mangled symbol to be demangled.
147
///
148
/// \see https://dlang.org/spec/abi.html#QualifiedName .
149
void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
150
151
/// Extract and demangle a type from a given mangled symbol append it to
152
/// the output string.
153
///
154
/// \param Mangled mangled symbol to be demangled.
155
///
156
/// \return true on success, false on error.
157
///
158
/// \see https://dlang.org/spec/abi.html#Type .
159
bool parseType(std::string_view &Mangled);
160
161
/// An immutable view of the string we are demangling.
162
const std::string_view Str;
163
/// The index of the last back reference.
164
int LastBackref;
165
};
166
167
} // namespace
168
169
void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
170
// Clear Mangled if trying to extract something that isn't a digit.
171
if (Mangled.empty()) {
172
Mangled = {};
173
return;
174
}
175
176
if (!std::isdigit(Mangled.front())) {
177
Mangled = {};
178
return;
179
}
180
181
unsigned long Val = 0;
182
183
do {
184
unsigned long Digit = Mangled[0] - '0';
185
186
// Check for overflow.
187
if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
188
Mangled = {};
189
return;
190
}
191
192
Val = Val * 10 + Digit;
193
Mangled.remove_prefix(1);
194
} while (!Mangled.empty() && std::isdigit(Mangled.front()));
195
196
if (Mangled.empty()) {
197
Mangled = {};
198
return;
199
}
200
201
Ret = Val;
202
}
203
204
bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
205
// Return nullptr if trying to extract something that isn't a digit
206
if (Mangled.empty()) {
207
Mangled = {};
208
return false;
209
}
210
// Any identifier or non-basic type that has been emitted to the mangled
211
// symbol before will not be emitted again, but is referenced by a special
212
// sequence encoding the relative position of the original occurrence in the
213
// mangled symbol name.
214
// Numbers in back references are encoded with base 26 by upper case letters
215
// A-Z for higher digits but lower case letters a-z for the last digit.
216
// NumberBackRef:
217
// [a-z]
218
// [A-Z] NumberBackRef
219
// ^
220
unsigned long Val = 0;
221
222
while (!Mangled.empty() && std::isalpha(Mangled.front())) {
223
// Check for overflow
224
if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
225
break;
226
227
Val *= 26;
228
229
if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {
230
Val += Mangled[0] - 'a';
231
if ((long)Val <= 0)
232
break;
233
Ret = Val;
234
Mangled.remove_prefix(1);
235
return true;
236
}
237
238
Val += Mangled[0] - 'A';
239
Mangled.remove_prefix(1);
240
}
241
242
Mangled = {};
243
return false;
244
}
245
246
bool Demangler::decodeBackref(std::string_view &Mangled,
247
std::string_view &Ret) {
248
assert(!Mangled.empty() && Mangled.front() == 'Q' &&
249
"Invalid back reference!");
250
Ret = {};
251
252
// Position of 'Q'
253
const char *Qpos = Mangled.data();
254
long RefPos;
255
Mangled.remove_prefix(1);
256
257
if (!decodeBackrefPos(Mangled, RefPos)) {
258
Mangled = {};
259
return false;
260
}
261
262
if (RefPos > Qpos - Str.data()) {
263
Mangled = {};
264
return false;
265
}
266
267
// Set the position of the back reference.
268
Ret = Qpos - RefPos;
269
270
return true;
271
}
272
273
void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
274
std::string_view &Mangled) {
275
// An identifier back reference always points to a digit 0 to 9.
276
// IdentifierBackRef:
277
// Q NumberBackRef
278
// ^
279
unsigned long Len;
280
281
// Get position of the back reference
282
std::string_view Backref;
283
if (!decodeBackref(Mangled, Backref)) {
284
Mangled = {};
285
return;
286
}
287
288
// Must point to a simple identifier
289
decodeNumber(Backref, Len);
290
if (Backref.empty() || Backref.length() < Len) {
291
Mangled = {};
292
return;
293
}
294
295
parseLName(Demangled, Backref, Len);
296
if (Backref.empty())
297
Mangled = {};
298
}
299
300
void Demangler::parseTypeBackref(std::string_view &Mangled) {
301
// A type back reference always points to a letter.
302
// TypeBackRef:
303
// Q NumberBackRef
304
// ^
305
306
// If we appear to be moving backwards through the mangle string, then
307
// bail as this may be a recursive back reference.
308
if (Mangled.data() - Str.data() >= LastBackref) {
309
Mangled = {};
310
return;
311
}
312
313
int SaveRefPos = LastBackref;
314
LastBackref = Mangled.data() - Str.data();
315
316
// Get position of the back reference.
317
std::string_view Backref;
318
if (!decodeBackref(Mangled, Backref)) {
319
Mangled = {};
320
return;
321
}
322
323
// Can't decode back reference.
324
if (Backref.empty()) {
325
Mangled = {};
326
return;
327
}
328
329
// TODO: Add support for function type back references.
330
if (!parseType(Backref))
331
Mangled = {};
332
333
LastBackref = SaveRefPos;
334
335
if (Backref.empty())
336
Mangled = {};
337
}
338
339
bool Demangler::isSymbolName(std::string_view Mangled) {
340
long Ret;
341
const char *Qref = Mangled.data();
342
343
if (std::isdigit(Mangled.front()))
344
return true;
345
346
// TODO: Handle template instances.
347
348
if (Mangled.front() != 'Q')
349
return false;
350
351
Mangled.remove_prefix(1);
352
bool Valid = decodeBackrefPos(Mangled, Ret);
353
if (!Valid || Ret > Qref - Str.data())
354
return false;
355
356
return std::isdigit(Qref[-Ret]);
357
}
358
359
void Demangler::parseMangle(OutputBuffer *Demangled,
360
std::string_view &Mangled) {
361
// A D mangled symbol is comprised of both scope and type information.
362
// MangleName:
363
// _D QualifiedName Type
364
// _D QualifiedName Z
365
// ^
366
// The caller should have guaranteed that the start pointer is at the
367
// above location.
368
// Note that type is never a function type, but only the return type of
369
// a function or the type of a variable.
370
Mangled.remove_prefix(2);
371
372
parseQualified(Demangled, Mangled);
373
374
if (Mangled.empty()) {
375
Mangled = {};
376
return;
377
}
378
379
// Artificial symbols end with 'Z' and have no type.
380
if (Mangled.front() == 'Z') {
381
Mangled.remove_prefix(1);
382
} else if (!parseType(Mangled))
383
Mangled = {};
384
}
385
386
void Demangler::parseQualified(OutputBuffer *Demangled,
387
std::string_view &Mangled) {
388
// Qualified names are identifiers separated by their encoded length.
389
// Nested functions also encode their argument types without specifying
390
// what they return.
391
// QualifiedName:
392
// SymbolFunctionName
393
// SymbolFunctionName QualifiedName
394
// ^
395
// SymbolFunctionName:
396
// SymbolName
397
// SymbolName TypeFunctionNoReturn
398
// SymbolName M TypeFunctionNoReturn
399
// SymbolName M TypeModifiers TypeFunctionNoReturn
400
// The start pointer should be at the above location.
401
402
// Whether it has more than one symbol
403
size_t NotFirst = false;
404
do {
405
// Skip over anonymous symbols.
406
if (!Mangled.empty() && Mangled.front() == '0') {
407
do
408
Mangled.remove_prefix(1);
409
while (!Mangled.empty() && Mangled.front() == '0');
410
411
continue;
412
}
413
414
if (NotFirst)
415
*Demangled << '.';
416
NotFirst = true;
417
418
parseIdentifier(Demangled, Mangled);
419
} while (!Mangled.empty() && isSymbolName(Mangled));
420
}
421
422
void Demangler::parseIdentifier(OutputBuffer *Demangled,
423
std::string_view &Mangled) {
424
if (Mangled.empty()) {
425
Mangled = {};
426
return;
427
}
428
429
if (Mangled.front() == 'Q')
430
return parseSymbolBackref(Demangled, Mangled);
431
432
// TODO: Parse lengthless template instances.
433
434
unsigned long Len;
435
decodeNumber(Mangled, Len);
436
437
if (Mangled.empty()) {
438
Mangled = {};
439
return;
440
}
441
if (!Len || Mangled.length() < Len) {
442
Mangled = {};
443
return;
444
}
445
446
// TODO: Parse template instances with a length prefix.
447
448
// There can be multiple different declarations in the same function that
449
// have the same mangled name. To make the mangled names unique, a fake
450
// parent in the form `__Sddd' is added to the symbol.
451
if (Len >= 4 && starts_with(Mangled, "__S")) {
452
const size_t SuffixLen = Mangled.length() - Len;
453
std::string_view P = Mangled.substr(3);
454
while (P.length() > SuffixLen && std::isdigit(P.front()))
455
P.remove_prefix(1);
456
if (P.length() == SuffixLen) {
457
// Skip over the fake parent.
458
Mangled.remove_prefix(Len);
459
return parseIdentifier(Demangled, Mangled);
460
}
461
462
// Else demangle it as a plain identifier.
463
}
464
465
parseLName(Demangled, Mangled, Len);
466
}
467
468
bool Demangler::parseType(std::string_view &Mangled) {
469
if (Mangled.empty()) {
470
Mangled = {};
471
return false;
472
}
473
474
switch (Mangled.front()) {
475
// TODO: Parse type qualifiers.
476
// TODO: Parse function types.
477
// TODO: Parse compound types.
478
// TODO: Parse delegate types.
479
// TODO: Parse tuple types.
480
481
// Basic types.
482
case 'i':
483
Mangled.remove_prefix(1);
484
// TODO: Add type name dumping
485
return true;
486
487
// TODO: Add support for the rest of the basic types.
488
489
// Back referenced type.
490
case 'Q': {
491
parseTypeBackref(Mangled);
492
return true;
493
}
494
495
default: // unhandled.
496
Mangled = {};
497
return false;
498
}
499
}
500
501
void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
502
unsigned long Len) {
503
switch (Len) {
504
case 6:
505
if (starts_with(Mangled, "__initZ")) {
506
// The static initializer for a given symbol.
507
Demangled->prepend("initializer for ");
508
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
509
Mangled.remove_prefix(Len);
510
return;
511
}
512
if (starts_with(Mangled, "__vtblZ")) {
513
// The vtable symbol for a given class.
514
Demangled->prepend("vtable for ");
515
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
516
Mangled.remove_prefix(Len);
517
return;
518
}
519
break;
520
521
case 7:
522
if (starts_with(Mangled, "__ClassZ")) {
523
// The classinfo symbol for a given class.
524
Demangled->prepend("ClassInfo for ");
525
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
526
Mangled.remove_prefix(Len);
527
return;
528
}
529
break;
530
531
case 11:
532
if (starts_with(Mangled, "__InterfaceZ")) {
533
// The interface symbol for a given class.
534
Demangled->prepend("Interface for ");
535
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
536
Mangled.remove_prefix(Len);
537
return;
538
}
539
break;
540
541
case 12:
542
if (starts_with(Mangled, "__ModuleInfoZ")) {
543
// The ModuleInfo symbol for a given module.
544
Demangled->prepend("ModuleInfo for ");
545
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
546
Mangled.remove_prefix(Len);
547
return;
548
}
549
break;
550
}
551
552
*Demangled << Mangled.substr(0, Len);
553
Mangled.remove_prefix(Len);
554
}
555
556
Demangler::Demangler(std::string_view Mangled)
557
: Str(Mangled), LastBackref(Mangled.length()) {}
558
559
const char *Demangler::parseMangle(OutputBuffer *Demangled) {
560
std::string_view M(this->Str);
561
parseMangle(Demangled, M);
562
return M.data();
563
}
564
565
char *llvm::dlangDemangle(std::string_view MangledName) {
566
if (MangledName.empty() || !starts_with(MangledName, "_D"))
567
return nullptr;
568
569
OutputBuffer Demangled;
570
if (MangledName == "_Dmain") {
571
Demangled << "D main";
572
} else {
573
574
Demangler D(MangledName);
575
const char *M = D.parseMangle(&Demangled);
576
577
// Check that the entire symbol was successfully demangled.
578
if (M == nullptr || *M != '\0') {
579
std::free(Demangled.getBuffer());
580
return nullptr;
581
}
582
}
583
584
// OutputBuffer's internal buffer is not null terminated and therefore we need
585
// to add it to comply with C null terminated strings.
586
if (Demangled.getCurrentPosition() > 0) {
587
Demangled << '\0';
588
Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
589
return Demangled.getBuffer();
590
}
591
592
std::free(Demangled.getBuffer());
593
return nullptr;
594
}
595
596