Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp
35232 views
1
//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines a demangler for Rust v0 mangled symbols as specified in
10
// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Demangle/Demangle.h"
15
#include "llvm/Demangle/StringViewExtras.h"
16
#include "llvm/Demangle/Utility.h"
17
18
#include <algorithm>
19
#include <cassert>
20
#include <cstdint>
21
#include <cstring>
22
#include <limits>
23
#include <string_view>
24
25
using namespace llvm;
26
27
using llvm::itanium_demangle::OutputBuffer;
28
using llvm::itanium_demangle::ScopedOverride;
29
using llvm::itanium_demangle::starts_with;
30
31
namespace {
32
33
struct Identifier {
34
std::string_view Name;
35
bool Punycode;
36
37
bool empty() const { return Name.empty(); }
38
};
39
40
enum class BasicType {
41
Bool,
42
Char,
43
I8,
44
I16,
45
I32,
46
I64,
47
I128,
48
ISize,
49
U8,
50
U16,
51
U32,
52
U64,
53
U128,
54
USize,
55
F32,
56
F64,
57
Str,
58
Placeholder,
59
Unit,
60
Variadic,
61
Never,
62
};
63
64
enum class IsInType {
65
No,
66
Yes,
67
};
68
69
enum class LeaveGenericsOpen {
70
No,
71
Yes,
72
};
73
74
class Demangler {
75
// Maximum recursion level. Used to avoid stack overflow.
76
size_t MaxRecursionLevel;
77
// Current recursion level.
78
size_t RecursionLevel;
79
size_t BoundLifetimes;
80
// Input string that is being demangled with "_R" prefix removed.
81
std::string_view Input;
82
// Position in the input string.
83
size_t Position;
84
// When true, print methods append the output to the stream.
85
// When false, the output is suppressed.
86
bool Print;
87
// True if an error occurred.
88
bool Error;
89
90
public:
91
// Demangled output.
92
OutputBuffer Output;
93
94
Demangler(size_t MaxRecursionLevel = 500);
95
96
bool demangle(std::string_view MangledName);
97
98
private:
99
bool demanglePath(IsInType Type,
100
LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);
101
void demangleImplPath(IsInType InType);
102
void demangleGenericArg();
103
void demangleType();
104
void demangleFnSig();
105
void demangleDynBounds();
106
void demangleDynTrait();
107
void demangleOptionalBinder();
108
void demangleConst();
109
void demangleConstInt();
110
void demangleConstBool();
111
void demangleConstChar();
112
113
template <typename Callable> void demangleBackref(Callable Demangler) {
114
uint64_t Backref = parseBase62Number();
115
if (Error || Backref >= Position) {
116
Error = true;
117
return;
118
}
119
120
if (!Print)
121
return;
122
123
ScopedOverride<size_t> SavePosition(Position, Position);
124
Position = Backref;
125
Demangler();
126
}
127
128
Identifier parseIdentifier();
129
uint64_t parseOptionalBase62Number(char Tag);
130
uint64_t parseBase62Number();
131
uint64_t parseDecimalNumber();
132
uint64_t parseHexNumber(std::string_view &HexDigits);
133
134
void print(char C);
135
void print(std::string_view S);
136
void printDecimalNumber(uint64_t N);
137
void printBasicType(BasicType);
138
void printLifetime(uint64_t Index);
139
void printIdentifier(Identifier Ident);
140
141
char look() const;
142
char consume();
143
bool consumeIf(char Prefix);
144
145
bool addAssign(uint64_t &A, uint64_t B);
146
bool mulAssign(uint64_t &A, uint64_t B);
147
};
148
149
} // namespace
150
151
char *llvm::rustDemangle(std::string_view MangledName) {
152
// Return early if mangled name doesn't look like a Rust symbol.
153
if (MangledName.empty() || !starts_with(MangledName, "_R"))
154
return nullptr;
155
156
Demangler D;
157
if (!D.demangle(MangledName)) {
158
std::free(D.Output.getBuffer());
159
return nullptr;
160
}
161
162
D.Output += '\0';
163
164
return D.Output.getBuffer();
165
}
166
167
Demangler::Demangler(size_t MaxRecursionLevel)
168
: MaxRecursionLevel(MaxRecursionLevel) {}
169
170
static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }
171
172
static inline bool isHexDigit(const char C) {
173
return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');
174
}
175
176
static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }
177
178
static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }
179
180
/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
181
static inline bool isValid(const char C) {
182
return isDigit(C) || isLower(C) || isUpper(C) || C == '_';
183
}
184
185
// Demangles Rust v0 mangled symbol. Returns true when successful, and false
186
// otherwise. The demangled symbol is stored in Output field. It is
187
// responsibility of the caller to free the memory behind the output stream.
188
//
189
// <symbol-name> = "_R" <path> [<instantiating-crate>]
190
bool Demangler::demangle(std::string_view Mangled) {
191
Position = 0;
192
Error = false;
193
Print = true;
194
RecursionLevel = 0;
195
BoundLifetimes = 0;
196
197
if (!starts_with(Mangled, "_R")) {
198
Error = true;
199
return false;
200
}
201
Mangled.remove_prefix(2);
202
size_t Dot = Mangled.find('.');
203
Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
204
205
demanglePath(IsInType::No);
206
207
if (Position != Input.size()) {
208
ScopedOverride<bool> SavePrint(Print, false);
209
demanglePath(IsInType::No);
210
}
211
212
if (Position != Input.size())
213
Error = true;
214
215
if (Dot != std::string_view::npos) {
216
print(" (");
217
print(Mangled.substr(Dot));
218
print(")");
219
}
220
221
return !Error;
222
}
223
224
// Demangles a path. InType indicates whether a path is inside a type. When
225
// LeaveOpen is true, a closing `>` after generic arguments is omitted from the
226
// output. Return value indicates whether generics arguments have been left
227
// open.
228
//
229
// <path> = "C" <identifier> // crate root
230
// | "M" <impl-path> <type> // <T> (inherent impl)
231
// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)
232
// | "Y" <type> <path> // <T as Trait> (trait definition)
233
// | "N" <ns> <path> <identifier> // ...::ident (nested path)
234
// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)
235
// | <backref>
236
// <identifier> = [<disambiguator>] <undisambiguated-identifier>
237
// <ns> = "C" // closure
238
// | "S" // shim
239
// | <A-Z> // other special namespaces
240
// | <a-z> // internal namespaces
241
bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
242
if (Error || RecursionLevel >= MaxRecursionLevel) {
243
Error = true;
244
return false;
245
}
246
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
247
248
switch (consume()) {
249
case 'C': {
250
parseOptionalBase62Number('s');
251
printIdentifier(parseIdentifier());
252
break;
253
}
254
case 'M': {
255
demangleImplPath(InType);
256
print("<");
257
demangleType();
258
print(">");
259
break;
260
}
261
case 'X': {
262
demangleImplPath(InType);
263
print("<");
264
demangleType();
265
print(" as ");
266
demanglePath(IsInType::Yes);
267
print(">");
268
break;
269
}
270
case 'Y': {
271
print("<");
272
demangleType();
273
print(" as ");
274
demanglePath(IsInType::Yes);
275
print(">");
276
break;
277
}
278
case 'N': {
279
char NS = consume();
280
if (!isLower(NS) && !isUpper(NS)) {
281
Error = true;
282
break;
283
}
284
demanglePath(InType);
285
286
uint64_t Disambiguator = parseOptionalBase62Number('s');
287
Identifier Ident = parseIdentifier();
288
289
if (isUpper(NS)) {
290
// Special namespaces
291
print("::{");
292
if (NS == 'C')
293
print("closure");
294
else if (NS == 'S')
295
print("shim");
296
else
297
print(NS);
298
if (!Ident.empty()) {
299
print(":");
300
printIdentifier(Ident);
301
}
302
print('#');
303
printDecimalNumber(Disambiguator);
304
print('}');
305
} else {
306
// Implementation internal namespaces.
307
if (!Ident.empty()) {
308
print("::");
309
printIdentifier(Ident);
310
}
311
}
312
break;
313
}
314
case 'I': {
315
demanglePath(InType);
316
// Omit "::" when in a type, where it is optional.
317
if (InType == IsInType::No)
318
print("::");
319
print("<");
320
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
321
if (I > 0)
322
print(", ");
323
demangleGenericArg();
324
}
325
if (LeaveOpen == LeaveGenericsOpen::Yes)
326
return true;
327
else
328
print(">");
329
break;
330
}
331
case 'B': {
332
bool IsOpen = false;
333
demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });
334
return IsOpen;
335
}
336
default:
337
Error = true;
338
break;
339
}
340
341
return false;
342
}
343
344
// <impl-path> = [<disambiguator>] <path>
345
// <disambiguator> = "s" <base-62-number>
346
void Demangler::demangleImplPath(IsInType InType) {
347
ScopedOverride<bool> SavePrint(Print, false);
348
parseOptionalBase62Number('s');
349
demanglePath(InType);
350
}
351
352
// <generic-arg> = <lifetime>
353
// | <type>
354
// | "K" <const>
355
// <lifetime> = "L" <base-62-number>
356
void Demangler::demangleGenericArg() {
357
if (consumeIf('L'))
358
printLifetime(parseBase62Number());
359
else if (consumeIf('K'))
360
demangleConst();
361
else
362
demangleType();
363
}
364
365
// <basic-type> = "a" // i8
366
// | "b" // bool
367
// | "c" // char
368
// | "d" // f64
369
// | "e" // str
370
// | "f" // f32
371
// | "h" // u8
372
// | "i" // isize
373
// | "j" // usize
374
// | "l" // i32
375
// | "m" // u32
376
// | "n" // i128
377
// | "o" // u128
378
// | "s" // i16
379
// | "t" // u16
380
// | "u" // ()
381
// | "v" // ...
382
// | "x" // i64
383
// | "y" // u64
384
// | "z" // !
385
// | "p" // placeholder (e.g. for generic params), shown as _
386
static bool parseBasicType(char C, BasicType &Type) {
387
switch (C) {
388
case 'a':
389
Type = BasicType::I8;
390
return true;
391
case 'b':
392
Type = BasicType::Bool;
393
return true;
394
case 'c':
395
Type = BasicType::Char;
396
return true;
397
case 'd':
398
Type = BasicType::F64;
399
return true;
400
case 'e':
401
Type = BasicType::Str;
402
return true;
403
case 'f':
404
Type = BasicType::F32;
405
return true;
406
case 'h':
407
Type = BasicType::U8;
408
return true;
409
case 'i':
410
Type = BasicType::ISize;
411
return true;
412
case 'j':
413
Type = BasicType::USize;
414
return true;
415
case 'l':
416
Type = BasicType::I32;
417
return true;
418
case 'm':
419
Type = BasicType::U32;
420
return true;
421
case 'n':
422
Type = BasicType::I128;
423
return true;
424
case 'o':
425
Type = BasicType::U128;
426
return true;
427
case 'p':
428
Type = BasicType::Placeholder;
429
return true;
430
case 's':
431
Type = BasicType::I16;
432
return true;
433
case 't':
434
Type = BasicType::U16;
435
return true;
436
case 'u':
437
Type = BasicType::Unit;
438
return true;
439
case 'v':
440
Type = BasicType::Variadic;
441
return true;
442
case 'x':
443
Type = BasicType::I64;
444
return true;
445
case 'y':
446
Type = BasicType::U64;
447
return true;
448
case 'z':
449
Type = BasicType::Never;
450
return true;
451
default:
452
return false;
453
}
454
}
455
456
void Demangler::printBasicType(BasicType Type) {
457
switch (Type) {
458
case BasicType::Bool:
459
print("bool");
460
break;
461
case BasicType::Char:
462
print("char");
463
break;
464
case BasicType::I8:
465
print("i8");
466
break;
467
case BasicType::I16:
468
print("i16");
469
break;
470
case BasicType::I32:
471
print("i32");
472
break;
473
case BasicType::I64:
474
print("i64");
475
break;
476
case BasicType::I128:
477
print("i128");
478
break;
479
case BasicType::ISize:
480
print("isize");
481
break;
482
case BasicType::U8:
483
print("u8");
484
break;
485
case BasicType::U16:
486
print("u16");
487
break;
488
case BasicType::U32:
489
print("u32");
490
break;
491
case BasicType::U64:
492
print("u64");
493
break;
494
case BasicType::U128:
495
print("u128");
496
break;
497
case BasicType::USize:
498
print("usize");
499
break;
500
case BasicType::F32:
501
print("f32");
502
break;
503
case BasicType::F64:
504
print("f64");
505
break;
506
case BasicType::Str:
507
print("str");
508
break;
509
case BasicType::Placeholder:
510
print("_");
511
break;
512
case BasicType::Unit:
513
print("()");
514
break;
515
case BasicType::Variadic:
516
print("...");
517
break;
518
case BasicType::Never:
519
print("!");
520
break;
521
}
522
}
523
524
// <type> = | <basic-type>
525
// | <path> // named type
526
// | "A" <type> <const> // [T; N]
527
// | "S" <type> // [T]
528
// | "T" {<type>} "E" // (T1, T2, T3, ...)
529
// | "R" [<lifetime>] <type> // &T
530
// | "Q" [<lifetime>] <type> // &mut T
531
// | "P" <type> // *const T
532
// | "O" <type> // *mut T
533
// | "F" <fn-sig> // fn(...) -> ...
534
// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a
535
// | <backref> // backref
536
void Demangler::demangleType() {
537
if (Error || RecursionLevel >= MaxRecursionLevel) {
538
Error = true;
539
return;
540
}
541
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
542
543
size_t Start = Position;
544
char C = consume();
545
BasicType Type;
546
if (parseBasicType(C, Type))
547
return printBasicType(Type);
548
549
switch (C) {
550
case 'A':
551
print("[");
552
demangleType();
553
print("; ");
554
demangleConst();
555
print("]");
556
break;
557
case 'S':
558
print("[");
559
demangleType();
560
print("]");
561
break;
562
case 'T': {
563
print("(");
564
size_t I = 0;
565
for (; !Error && !consumeIf('E'); ++I) {
566
if (I > 0)
567
print(", ");
568
demangleType();
569
}
570
if (I == 1)
571
print(",");
572
print(")");
573
break;
574
}
575
case 'R':
576
case 'Q':
577
print('&');
578
if (consumeIf('L')) {
579
if (auto Lifetime = parseBase62Number()) {
580
printLifetime(Lifetime);
581
print(' ');
582
}
583
}
584
if (C == 'Q')
585
print("mut ");
586
demangleType();
587
break;
588
case 'P':
589
print("*const ");
590
demangleType();
591
break;
592
case 'O':
593
print("*mut ");
594
demangleType();
595
break;
596
case 'F':
597
demangleFnSig();
598
break;
599
case 'D':
600
demangleDynBounds();
601
if (consumeIf('L')) {
602
if (auto Lifetime = parseBase62Number()) {
603
print(" + ");
604
printLifetime(Lifetime);
605
}
606
} else {
607
Error = true;
608
}
609
break;
610
case 'B':
611
demangleBackref([&] { demangleType(); });
612
break;
613
default:
614
Position = Start;
615
demanglePath(IsInType::Yes);
616
break;
617
}
618
}
619
620
// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>
621
// <abi> = "C"
622
// | <undisambiguated-identifier>
623
void Demangler::demangleFnSig() {
624
ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
625
demangleOptionalBinder();
626
627
if (consumeIf('U'))
628
print("unsafe ");
629
630
if (consumeIf('K')) {
631
print("extern \"");
632
if (consumeIf('C')) {
633
print("C");
634
} else {
635
Identifier Ident = parseIdentifier();
636
if (Ident.Punycode)
637
Error = true;
638
for (char C : Ident.Name) {
639
// When mangling ABI string, the "-" is replaced with "_".
640
if (C == '_')
641
C = '-';
642
print(C);
643
}
644
}
645
print("\" ");
646
}
647
648
print("fn(");
649
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
650
if (I > 0)
651
print(", ");
652
demangleType();
653
}
654
print(")");
655
656
if (consumeIf('u')) {
657
// Skip the unit type from the output.
658
} else {
659
print(" -> ");
660
demangleType();
661
}
662
}
663
664
// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"
665
void Demangler::demangleDynBounds() {
666
ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);
667
print("dyn ");
668
demangleOptionalBinder();
669
for (size_t I = 0; !Error && !consumeIf('E'); ++I) {
670
if (I > 0)
671
print(" + ");
672
demangleDynTrait();
673
}
674
}
675
676
// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}
677
// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>
678
void Demangler::demangleDynTrait() {
679
bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);
680
while (!Error && consumeIf('p')) {
681
if (!IsOpen) {
682
IsOpen = true;
683
print('<');
684
} else {
685
print(", ");
686
}
687
print(parseIdentifier().Name);
688
print(" = ");
689
demangleType();
690
}
691
if (IsOpen)
692
print(">");
693
}
694
695
// Demangles optional binder and updates the number of bound lifetimes.
696
//
697
// <binder> = "G" <base-62-number>
698
void Demangler::demangleOptionalBinder() {
699
uint64_t Binder = parseOptionalBase62Number('G');
700
if (Error || Binder == 0)
701
return;
702
703
// In valid inputs each bound lifetime is referenced later. Referencing a
704
// lifetime requires at least one byte of input. Reject inputs that are too
705
// short to reference all bound lifetimes. Otherwise demangling of invalid
706
// binders could generate excessive amounts of output.
707
if (Binder >= Input.size() - BoundLifetimes) {
708
Error = true;
709
return;
710
}
711
712
print("for<");
713
for (size_t I = 0; I != Binder; ++I) {
714
BoundLifetimes += 1;
715
if (I > 0)
716
print(", ");
717
printLifetime(1);
718
}
719
print("> ");
720
}
721
722
// <const> = <basic-type> <const-data>
723
// | "p" // placeholder
724
// | <backref>
725
void Demangler::demangleConst() {
726
if (Error || RecursionLevel >= MaxRecursionLevel) {
727
Error = true;
728
return;
729
}
730
ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);
731
732
char C = consume();
733
BasicType Type;
734
if (parseBasicType(C, Type)) {
735
switch (Type) {
736
case BasicType::I8:
737
case BasicType::I16:
738
case BasicType::I32:
739
case BasicType::I64:
740
case BasicType::I128:
741
case BasicType::ISize:
742
case BasicType::U8:
743
case BasicType::U16:
744
case BasicType::U32:
745
case BasicType::U64:
746
case BasicType::U128:
747
case BasicType::USize:
748
demangleConstInt();
749
break;
750
case BasicType::Bool:
751
demangleConstBool();
752
break;
753
case BasicType::Char:
754
demangleConstChar();
755
break;
756
case BasicType::Placeholder:
757
print('_');
758
break;
759
default:
760
Error = true;
761
break;
762
}
763
} else if (C == 'B') {
764
demangleBackref([&] { demangleConst(); });
765
} else {
766
Error = true;
767
}
768
}
769
770
// <const-data> = ["n"] <hex-number>
771
void Demangler::demangleConstInt() {
772
if (consumeIf('n'))
773
print('-');
774
775
std::string_view HexDigits;
776
uint64_t Value = parseHexNumber(HexDigits);
777
if (HexDigits.size() <= 16) {
778
printDecimalNumber(Value);
779
} else {
780
print("0x");
781
print(HexDigits);
782
}
783
}
784
785
// <const-data> = "0_" // false
786
// | "1_" // true
787
void Demangler::demangleConstBool() {
788
std::string_view HexDigits;
789
parseHexNumber(HexDigits);
790
if (HexDigits == "0")
791
print("false");
792
else if (HexDigits == "1")
793
print("true");
794
else
795
Error = true;
796
}
797
798
/// Returns true if CodePoint represents a printable ASCII character.
799
static bool isAsciiPrintable(uint64_t CodePoint) {
800
return 0x20 <= CodePoint && CodePoint <= 0x7e;
801
}
802
803
// <const-data> = <hex-number>
804
void Demangler::demangleConstChar() {
805
std::string_view HexDigits;
806
uint64_t CodePoint = parseHexNumber(HexDigits);
807
if (Error || HexDigits.size() > 6) {
808
Error = true;
809
return;
810
}
811
812
print("'");
813
switch (CodePoint) {
814
case '\t':
815
print(R"(\t)");
816
break;
817
case '\r':
818
print(R"(\r)");
819
break;
820
case '\n':
821
print(R"(\n)");
822
break;
823
case '\\':
824
print(R"(\\)");
825
break;
826
case '"':
827
print(R"(")");
828
break;
829
case '\'':
830
print(R"(\')");
831
break;
832
default:
833
if (isAsciiPrintable(CodePoint)) {
834
char C = CodePoint;
835
print(C);
836
} else {
837
print(R"(\u{)");
838
print(HexDigits);
839
print('}');
840
}
841
break;
842
}
843
print('\'');
844
}
845
846
// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>
847
Identifier Demangler::parseIdentifier() {
848
bool Punycode = consumeIf('u');
849
uint64_t Bytes = parseDecimalNumber();
850
851
// Underscore resolves the ambiguity when identifier starts with a decimal
852
// digit or another underscore.
853
consumeIf('_');
854
855
if (Error || Bytes > Input.size() - Position) {
856
Error = true;
857
return {};
858
}
859
std::string_view S = Input.substr(Position, Bytes);
860
Position += Bytes;
861
862
if (!std::all_of(S.begin(), S.end(), isValid)) {
863
Error = true;
864
return {};
865
}
866
867
return {S, Punycode};
868
}
869
870
// Parses optional base 62 number. The presence of a number is determined using
871
// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise
872
//
873
// This function is indended for parsing disambiguators and binders which when
874
// not present have their value interpreted as 0, and otherwise as decoded
875
// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is
876
// 2. When "G" is absent value is 0.
877
uint64_t Demangler::parseOptionalBase62Number(char Tag) {
878
if (!consumeIf(Tag))
879
return 0;
880
881
uint64_t N = parseBase62Number();
882
if (Error || !addAssign(N, 1))
883
return 0;
884
885
return N;
886
}
887
888
// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by
889
// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,
890
// "1_" encodes 2, etc.
891
//
892
// <base-62-number> = {<0-9a-zA-Z>} "_"
893
uint64_t Demangler::parseBase62Number() {
894
if (consumeIf('_'))
895
return 0;
896
897
uint64_t Value = 0;
898
899
while (true) {
900
uint64_t Digit;
901
char C = consume();
902
903
if (C == '_') {
904
break;
905
} else if (isDigit(C)) {
906
Digit = C - '0';
907
} else if (isLower(C)) {
908
Digit = 10 + (C - 'a');
909
} else if (isUpper(C)) {
910
Digit = 10 + 26 + (C - 'A');
911
} else {
912
Error = true;
913
return 0;
914
}
915
916
if (!mulAssign(Value, 62))
917
return 0;
918
919
if (!addAssign(Value, Digit))
920
return 0;
921
}
922
923
if (!addAssign(Value, 1))
924
return 0;
925
926
return Value;
927
}
928
929
// Parses a decimal number that had been encoded without any leading zeros.
930
//
931
// <decimal-number> = "0"
932
// | <1-9> {<0-9>}
933
uint64_t Demangler::parseDecimalNumber() {
934
char C = look();
935
if (!isDigit(C)) {
936
Error = true;
937
return 0;
938
}
939
940
if (C == '0') {
941
consume();
942
return 0;
943
}
944
945
uint64_t Value = 0;
946
947
while (isDigit(look())) {
948
if (!mulAssign(Value, 10)) {
949
Error = true;
950
return 0;
951
}
952
953
uint64_t D = consume() - '0';
954
if (!addAssign(Value, D))
955
return 0;
956
}
957
958
return Value;
959
}
960
961
// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed
962
// value and stores hex digits in HexDigits. The return value is unspecified if
963
// HexDigits.size() > 16.
964
//
965
// <hex-number> = "0_"
966
// | <1-9a-f> {<0-9a-f>} "_"
967
uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
968
size_t Start = Position;
969
uint64_t Value = 0;
970
971
if (!isHexDigit(look()))
972
Error = true;
973
974
if (consumeIf('0')) {
975
if (!consumeIf('_'))
976
Error = true;
977
} else {
978
while (!Error && !consumeIf('_')) {
979
char C = consume();
980
Value *= 16;
981
if (isDigit(C))
982
Value += C - '0';
983
else if ('a' <= C && C <= 'f')
984
Value += 10 + (C - 'a');
985
else
986
Error = true;
987
}
988
}
989
990
if (Error) {
991
HexDigits = std::string_view();
992
return 0;
993
}
994
995
size_t End = Position - 1;
996
assert(Start < End);
997
HexDigits = Input.substr(Start, End - Start);
998
return Value;
999
}
1000
1001
void Demangler::print(char C) {
1002
if (Error || !Print)
1003
return;
1004
1005
Output += C;
1006
}
1007
1008
void Demangler::print(std::string_view S) {
1009
if (Error || !Print)
1010
return;
1011
1012
Output += S;
1013
}
1014
1015
void Demangler::printDecimalNumber(uint64_t N) {
1016
if (Error || !Print)
1017
return;
1018
1019
Output << N;
1020
}
1021
1022
// Prints a lifetime. An index 0 always represents an erased lifetime. Indices
1023
// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes
1024
// bound by one of the enclosing binders.
1025
void Demangler::printLifetime(uint64_t Index) {
1026
if (Index == 0) {
1027
print("'_");
1028
return;
1029
}
1030
1031
if (Index - 1 >= BoundLifetimes) {
1032
Error = true;
1033
return;
1034
}
1035
1036
uint64_t Depth = BoundLifetimes - Index;
1037
print('\'');
1038
if (Depth < 26) {
1039
char C = 'a' + Depth;
1040
print(C);
1041
} else {
1042
print('z');
1043
printDecimalNumber(Depth - 26 + 1);
1044
}
1045
}
1046
1047
static inline bool decodePunycodeDigit(char C, size_t &Value) {
1048
if (isLower(C)) {
1049
Value = C - 'a';
1050
return true;
1051
}
1052
1053
if (isDigit(C)) {
1054
Value = 26 + (C - '0');
1055
return true;
1056
}
1057
1058
return false;
1059
}
1060
1061
static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
1062
char *Buffer = Output.getBuffer();
1063
char *Start = Buffer + StartIdx;
1064
char *End = Buffer + Output.getCurrentPosition();
1065
Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
1066
}
1067
1068
// Encodes code point as UTF-8 and stores results in Output. Returns false if
1069
// CodePoint is not a valid unicode scalar value.
1070
static inline bool encodeUTF8(size_t CodePoint, char *Output) {
1071
if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
1072
return false;
1073
1074
if (CodePoint <= 0x7F) {
1075
Output[0] = CodePoint;
1076
return true;
1077
}
1078
1079
if (CodePoint <= 0x7FF) {
1080
Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
1081
Output[1] = 0x80 | (CodePoint & 0x3F);
1082
return true;
1083
}
1084
1085
if (CodePoint <= 0xFFFF) {
1086
Output[0] = 0xE0 | (CodePoint >> 12);
1087
Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
1088
Output[2] = 0x80 | (CodePoint & 0x3F);
1089
return true;
1090
}
1091
1092
if (CodePoint <= 0x10FFFF) {
1093
Output[0] = 0xF0 | (CodePoint >> 18);
1094
Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
1095
Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
1096
Output[3] = 0x80 | (CodePoint & 0x3F);
1097
return true;
1098
}
1099
1100
return false;
1101
}
1102
1103
// Decodes string encoded using punycode and appends results to Output.
1104
// Returns true if decoding was successful.
1105
static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
1106
size_t OutputSize = Output.getCurrentPosition();
1107
size_t InputIdx = 0;
1108
1109
// Rust uses an underscore as a delimiter.
1110
size_t DelimiterPos = std::string_view::npos;
1111
for (size_t I = 0; I != Input.size(); ++I)
1112
if (Input[I] == '_')
1113
DelimiterPos = I;
1114
1115
if (DelimiterPos != std::string_view::npos) {
1116
// Copy basic code points before the last delimiter to the output.
1117
for (; InputIdx != DelimiterPos; ++InputIdx) {
1118
char C = Input[InputIdx];
1119
if (!isValid(C))
1120
return false;
1121
// Code points are padded with zeros while decoding is in progress.
1122
char UTF8[4] = {C};
1123
Output += std::string_view(UTF8, 4);
1124
}
1125
// Skip over the delimiter.
1126
++InputIdx;
1127
}
1128
1129
size_t Base = 36;
1130
size_t Skew = 38;
1131
size_t Bias = 72;
1132
size_t N = 0x80;
1133
size_t TMin = 1;
1134
size_t TMax = 26;
1135
size_t Damp = 700;
1136
1137
auto Adapt = [&](size_t Delta, size_t NumPoints) {
1138
Delta /= Damp;
1139
Delta += Delta / NumPoints;
1140
Damp = 2;
1141
1142
size_t K = 0;
1143
while (Delta > (Base - TMin) * TMax / 2) {
1144
Delta /= Base - TMin;
1145
K += Base;
1146
}
1147
return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
1148
};
1149
1150
// Main decoding loop.
1151
for (size_t I = 0; InputIdx != Input.size(); I += 1) {
1152
size_t OldI = I;
1153
size_t W = 1;
1154
size_t Max = std::numeric_limits<size_t>::max();
1155
for (size_t K = Base; true; K += Base) {
1156
if (InputIdx == Input.size())
1157
return false;
1158
char C = Input[InputIdx++];
1159
size_t Digit = 0;
1160
if (!decodePunycodeDigit(C, Digit))
1161
return false;
1162
1163
if (Digit > (Max - I) / W)
1164
return false;
1165
I += Digit * W;
1166
1167
size_t T;
1168
if (K <= Bias)
1169
T = TMin;
1170
else if (K >= Bias + TMax)
1171
T = TMax;
1172
else
1173
T = K - Bias;
1174
1175
if (Digit < T)
1176
break;
1177
1178
if (W > Max / (Base - T))
1179
return false;
1180
W *= (Base - T);
1181
}
1182
size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
1183
Bias = Adapt(I - OldI, NumPoints);
1184
1185
if (I / NumPoints > Max - N)
1186
return false;
1187
N += I / NumPoints;
1188
I = I % NumPoints;
1189
1190
// Insert N at position I in the output.
1191
char UTF8[4] = {};
1192
if (!encodeUTF8(N, UTF8))
1193
return false;
1194
Output.insert(OutputSize + I * 4, UTF8, 4);
1195
}
1196
1197
removeNullBytes(Output, OutputSize);
1198
return true;
1199
}
1200
1201
void Demangler::printIdentifier(Identifier Ident) {
1202
if (Error || !Print)
1203
return;
1204
1205
if (Ident.Punycode) {
1206
if (!decodePunycode(Ident.Name, Output))
1207
Error = true;
1208
} else {
1209
print(Ident.Name);
1210
}
1211
}
1212
1213
char Demangler::look() const {
1214
if (Error || Position >= Input.size())
1215
return 0;
1216
1217
return Input[Position];
1218
}
1219
1220
char Demangler::consume() {
1221
if (Error || Position >= Input.size()) {
1222
Error = true;
1223
return 0;
1224
}
1225
1226
return Input[Position++];
1227
}
1228
1229
bool Demangler::consumeIf(char Prefix) {
1230
if (Error || Position >= Input.size() || Input[Position] != Prefix)
1231
return false;
1232
1233
Position += 1;
1234
return true;
1235
}
1236
1237
/// Computes A + B. When computation wraps around sets the error and returns
1238
/// false. Otherwise assigns the result to A and returns true.
1239
bool Demangler::addAssign(uint64_t &A, uint64_t B) {
1240
if (A > std::numeric_limits<uint64_t>::max() - B) {
1241
Error = true;
1242
return false;
1243
}
1244
1245
A += B;
1246
return true;
1247
}
1248
1249
/// Computes A * B. When computation wraps around sets the error and returns
1250
/// false. Otherwise assigns the result to A and returns true.
1251
bool Demangler::mulAssign(uint64_t &A, uint64_t B) {
1252
if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {
1253
Error = true;
1254
return false;
1255
}
1256
1257
A *= B;
1258
return true;
1259
}
1260
1261