Path: blob/main/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp
35232 views
//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines a demangler for Rust v0 mangled symbols as specified in9// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html10//11//===----------------------------------------------------------------------===//1213#include "llvm/Demangle/Demangle.h"14#include "llvm/Demangle/StringViewExtras.h"15#include "llvm/Demangle/Utility.h"1617#include <algorithm>18#include <cassert>19#include <cstdint>20#include <cstring>21#include <limits>22#include <string_view>2324using namespace llvm;2526using llvm::itanium_demangle::OutputBuffer;27using llvm::itanium_demangle::ScopedOverride;28using llvm::itanium_demangle::starts_with;2930namespace {3132struct Identifier {33std::string_view Name;34bool Punycode;3536bool empty() const { return Name.empty(); }37};3839enum class BasicType {40Bool,41Char,42I8,43I16,44I32,45I64,46I128,47ISize,48U8,49U16,50U32,51U64,52U128,53USize,54F32,55F64,56Str,57Placeholder,58Unit,59Variadic,60Never,61};6263enum class IsInType {64No,65Yes,66};6768enum class LeaveGenericsOpen {69No,70Yes,71};7273class Demangler {74// Maximum recursion level. Used to avoid stack overflow.75size_t MaxRecursionLevel;76// Current recursion level.77size_t RecursionLevel;78size_t BoundLifetimes;79// Input string that is being demangled with "_R" prefix removed.80std::string_view Input;81// Position in the input string.82size_t Position;83// When true, print methods append the output to the stream.84// When false, the output is suppressed.85bool Print;86// True if an error occurred.87bool Error;8889public:90// Demangled output.91OutputBuffer Output;9293Demangler(size_t MaxRecursionLevel = 500);9495bool demangle(std::string_view MangledName);9697private:98bool demanglePath(IsInType Type,99LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No);100void demangleImplPath(IsInType InType);101void demangleGenericArg();102void demangleType();103void demangleFnSig();104void demangleDynBounds();105void demangleDynTrait();106void demangleOptionalBinder();107void demangleConst();108void demangleConstInt();109void demangleConstBool();110void demangleConstChar();111112template <typename Callable> void demangleBackref(Callable Demangler) {113uint64_t Backref = parseBase62Number();114if (Error || Backref >= Position) {115Error = true;116return;117}118119if (!Print)120return;121122ScopedOverride<size_t> SavePosition(Position, Position);123Position = Backref;124Demangler();125}126127Identifier parseIdentifier();128uint64_t parseOptionalBase62Number(char Tag);129uint64_t parseBase62Number();130uint64_t parseDecimalNumber();131uint64_t parseHexNumber(std::string_view &HexDigits);132133void print(char C);134void print(std::string_view S);135void printDecimalNumber(uint64_t N);136void printBasicType(BasicType);137void printLifetime(uint64_t Index);138void printIdentifier(Identifier Ident);139140char look() const;141char consume();142bool consumeIf(char Prefix);143144bool addAssign(uint64_t &A, uint64_t B);145bool mulAssign(uint64_t &A, uint64_t B);146};147148} // namespace149150char *llvm::rustDemangle(std::string_view MangledName) {151// Return early if mangled name doesn't look like a Rust symbol.152if (MangledName.empty() || !starts_with(MangledName, "_R"))153return nullptr;154155Demangler D;156if (!D.demangle(MangledName)) {157std::free(D.Output.getBuffer());158return nullptr;159}160161D.Output += '\0';162163return D.Output.getBuffer();164}165166Demangler::Demangler(size_t MaxRecursionLevel)167: MaxRecursionLevel(MaxRecursionLevel) {}168169static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; }170171static inline bool isHexDigit(const char C) {172return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f');173}174175static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; }176177static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; }178179/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>.180static inline bool isValid(const char C) {181return isDigit(C) || isLower(C) || isUpper(C) || C == '_';182}183184// Demangles Rust v0 mangled symbol. Returns true when successful, and false185// otherwise. The demangled symbol is stored in Output field. It is186// responsibility of the caller to free the memory behind the output stream.187//188// <symbol-name> = "_R" <path> [<instantiating-crate>]189bool Demangler::demangle(std::string_view Mangled) {190Position = 0;191Error = false;192Print = true;193RecursionLevel = 0;194BoundLifetimes = 0;195196if (!starts_with(Mangled, "_R")) {197Error = true;198return false;199}200Mangled.remove_prefix(2);201size_t Dot = Mangled.find('.');202Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);203204demanglePath(IsInType::No);205206if (Position != Input.size()) {207ScopedOverride<bool> SavePrint(Print, false);208demanglePath(IsInType::No);209}210211if (Position != Input.size())212Error = true;213214if (Dot != std::string_view::npos) {215print(" (");216print(Mangled.substr(Dot));217print(")");218}219220return !Error;221}222223// Demangles a path. InType indicates whether a path is inside a type. When224// LeaveOpen is true, a closing `>` after generic arguments is omitted from the225// output. Return value indicates whether generics arguments have been left226// open.227//228// <path> = "C" <identifier> // crate root229// | "M" <impl-path> <type> // <T> (inherent impl)230// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl)231// | "Y" <type> <path> // <T as Trait> (trait definition)232// | "N" <ns> <path> <identifier> // ...::ident (nested path)233// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args)234// | <backref>235// <identifier> = [<disambiguator>] <undisambiguated-identifier>236// <ns> = "C" // closure237// | "S" // shim238// | <A-Z> // other special namespaces239// | <a-z> // internal namespaces240bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {241if (Error || RecursionLevel >= MaxRecursionLevel) {242Error = true;243return false;244}245ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);246247switch (consume()) {248case 'C': {249parseOptionalBase62Number('s');250printIdentifier(parseIdentifier());251break;252}253case 'M': {254demangleImplPath(InType);255print("<");256demangleType();257print(">");258break;259}260case 'X': {261demangleImplPath(InType);262print("<");263demangleType();264print(" as ");265demanglePath(IsInType::Yes);266print(">");267break;268}269case 'Y': {270print("<");271demangleType();272print(" as ");273demanglePath(IsInType::Yes);274print(">");275break;276}277case 'N': {278char NS = consume();279if (!isLower(NS) && !isUpper(NS)) {280Error = true;281break;282}283demanglePath(InType);284285uint64_t Disambiguator = parseOptionalBase62Number('s');286Identifier Ident = parseIdentifier();287288if (isUpper(NS)) {289// Special namespaces290print("::{");291if (NS == 'C')292print("closure");293else if (NS == 'S')294print("shim");295else296print(NS);297if (!Ident.empty()) {298print(":");299printIdentifier(Ident);300}301print('#');302printDecimalNumber(Disambiguator);303print('}');304} else {305// Implementation internal namespaces.306if (!Ident.empty()) {307print("::");308printIdentifier(Ident);309}310}311break;312}313case 'I': {314demanglePath(InType);315// Omit "::" when in a type, where it is optional.316if (InType == IsInType::No)317print("::");318print("<");319for (size_t I = 0; !Error && !consumeIf('E'); ++I) {320if (I > 0)321print(", ");322demangleGenericArg();323}324if (LeaveOpen == LeaveGenericsOpen::Yes)325return true;326else327print(">");328break;329}330case 'B': {331bool IsOpen = false;332demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); });333return IsOpen;334}335default:336Error = true;337break;338}339340return false;341}342343// <impl-path> = [<disambiguator>] <path>344// <disambiguator> = "s" <base-62-number>345void Demangler::demangleImplPath(IsInType InType) {346ScopedOverride<bool> SavePrint(Print, false);347parseOptionalBase62Number('s');348demanglePath(InType);349}350351// <generic-arg> = <lifetime>352// | <type>353// | "K" <const>354// <lifetime> = "L" <base-62-number>355void Demangler::demangleGenericArg() {356if (consumeIf('L'))357printLifetime(parseBase62Number());358else if (consumeIf('K'))359demangleConst();360else361demangleType();362}363364// <basic-type> = "a" // i8365// | "b" // bool366// | "c" // char367// | "d" // f64368// | "e" // str369// | "f" // f32370// | "h" // u8371// | "i" // isize372// | "j" // usize373// | "l" // i32374// | "m" // u32375// | "n" // i128376// | "o" // u128377// | "s" // i16378// | "t" // u16379// | "u" // ()380// | "v" // ...381// | "x" // i64382// | "y" // u64383// | "z" // !384// | "p" // placeholder (e.g. for generic params), shown as _385static bool parseBasicType(char C, BasicType &Type) {386switch (C) {387case 'a':388Type = BasicType::I8;389return true;390case 'b':391Type = BasicType::Bool;392return true;393case 'c':394Type = BasicType::Char;395return true;396case 'd':397Type = BasicType::F64;398return true;399case 'e':400Type = BasicType::Str;401return true;402case 'f':403Type = BasicType::F32;404return true;405case 'h':406Type = BasicType::U8;407return true;408case 'i':409Type = BasicType::ISize;410return true;411case 'j':412Type = BasicType::USize;413return true;414case 'l':415Type = BasicType::I32;416return true;417case 'm':418Type = BasicType::U32;419return true;420case 'n':421Type = BasicType::I128;422return true;423case 'o':424Type = BasicType::U128;425return true;426case 'p':427Type = BasicType::Placeholder;428return true;429case 's':430Type = BasicType::I16;431return true;432case 't':433Type = BasicType::U16;434return true;435case 'u':436Type = BasicType::Unit;437return true;438case 'v':439Type = BasicType::Variadic;440return true;441case 'x':442Type = BasicType::I64;443return true;444case 'y':445Type = BasicType::U64;446return true;447case 'z':448Type = BasicType::Never;449return true;450default:451return false;452}453}454455void Demangler::printBasicType(BasicType Type) {456switch (Type) {457case BasicType::Bool:458print("bool");459break;460case BasicType::Char:461print("char");462break;463case BasicType::I8:464print("i8");465break;466case BasicType::I16:467print("i16");468break;469case BasicType::I32:470print("i32");471break;472case BasicType::I64:473print("i64");474break;475case BasicType::I128:476print("i128");477break;478case BasicType::ISize:479print("isize");480break;481case BasicType::U8:482print("u8");483break;484case BasicType::U16:485print("u16");486break;487case BasicType::U32:488print("u32");489break;490case BasicType::U64:491print("u64");492break;493case BasicType::U128:494print("u128");495break;496case BasicType::USize:497print("usize");498break;499case BasicType::F32:500print("f32");501break;502case BasicType::F64:503print("f64");504break;505case BasicType::Str:506print("str");507break;508case BasicType::Placeholder:509print("_");510break;511case BasicType::Unit:512print("()");513break;514case BasicType::Variadic:515print("...");516break;517case BasicType::Never:518print("!");519break;520}521}522523// <type> = | <basic-type>524// | <path> // named type525// | "A" <type> <const> // [T; N]526// | "S" <type> // [T]527// | "T" {<type>} "E" // (T1, T2, T3, ...)528// | "R" [<lifetime>] <type> // &T529// | "Q" [<lifetime>] <type> // &mut T530// | "P" <type> // *const T531// | "O" <type> // *mut T532// | "F" <fn-sig> // fn(...) -> ...533// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a534// | <backref> // backref535void Demangler::demangleType() {536if (Error || RecursionLevel >= MaxRecursionLevel) {537Error = true;538return;539}540ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);541542size_t Start = Position;543char C = consume();544BasicType Type;545if (parseBasicType(C, Type))546return printBasicType(Type);547548switch (C) {549case 'A':550print("[");551demangleType();552print("; ");553demangleConst();554print("]");555break;556case 'S':557print("[");558demangleType();559print("]");560break;561case 'T': {562print("(");563size_t I = 0;564for (; !Error && !consumeIf('E'); ++I) {565if (I > 0)566print(", ");567demangleType();568}569if (I == 1)570print(",");571print(")");572break;573}574case 'R':575case 'Q':576print('&');577if (consumeIf('L')) {578if (auto Lifetime = parseBase62Number()) {579printLifetime(Lifetime);580print(' ');581}582}583if (C == 'Q')584print("mut ");585demangleType();586break;587case 'P':588print("*const ");589demangleType();590break;591case 'O':592print("*mut ");593demangleType();594break;595case 'F':596demangleFnSig();597break;598case 'D':599demangleDynBounds();600if (consumeIf('L')) {601if (auto Lifetime = parseBase62Number()) {602print(" + ");603printLifetime(Lifetime);604}605} else {606Error = true;607}608break;609case 'B':610demangleBackref([&] { demangleType(); });611break;612default:613Position = Start;614demanglePath(IsInType::Yes);615break;616}617}618619// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type>620// <abi> = "C"621// | <undisambiguated-identifier>622void Demangler::demangleFnSig() {623ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);624demangleOptionalBinder();625626if (consumeIf('U'))627print("unsafe ");628629if (consumeIf('K')) {630print("extern \"");631if (consumeIf('C')) {632print("C");633} else {634Identifier Ident = parseIdentifier();635if (Ident.Punycode)636Error = true;637for (char C : Ident.Name) {638// When mangling ABI string, the "-" is replaced with "_".639if (C == '_')640C = '-';641print(C);642}643}644print("\" ");645}646647print("fn(");648for (size_t I = 0; !Error && !consumeIf('E'); ++I) {649if (I > 0)650print(", ");651demangleType();652}653print(")");654655if (consumeIf('u')) {656// Skip the unit type from the output.657} else {658print(" -> ");659demangleType();660}661}662663// <dyn-bounds> = [<binder>] {<dyn-trait>} "E"664void Demangler::demangleDynBounds() {665ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes);666print("dyn ");667demangleOptionalBinder();668for (size_t I = 0; !Error && !consumeIf('E'); ++I) {669if (I > 0)670print(" + ");671demangleDynTrait();672}673}674675// <dyn-trait> = <path> {<dyn-trait-assoc-binding>}676// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type>677void Demangler::demangleDynTrait() {678bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes);679while (!Error && consumeIf('p')) {680if (!IsOpen) {681IsOpen = true;682print('<');683} else {684print(", ");685}686print(parseIdentifier().Name);687print(" = ");688demangleType();689}690if (IsOpen)691print(">");692}693694// Demangles optional binder and updates the number of bound lifetimes.695//696// <binder> = "G" <base-62-number>697void Demangler::demangleOptionalBinder() {698uint64_t Binder = parseOptionalBase62Number('G');699if (Error || Binder == 0)700return;701702// In valid inputs each bound lifetime is referenced later. Referencing a703// lifetime requires at least one byte of input. Reject inputs that are too704// short to reference all bound lifetimes. Otherwise demangling of invalid705// binders could generate excessive amounts of output.706if (Binder >= Input.size() - BoundLifetimes) {707Error = true;708return;709}710711print("for<");712for (size_t I = 0; I != Binder; ++I) {713BoundLifetimes += 1;714if (I > 0)715print(", ");716printLifetime(1);717}718print("> ");719}720721// <const> = <basic-type> <const-data>722// | "p" // placeholder723// | <backref>724void Demangler::demangleConst() {725if (Error || RecursionLevel >= MaxRecursionLevel) {726Error = true;727return;728}729ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1);730731char C = consume();732BasicType Type;733if (parseBasicType(C, Type)) {734switch (Type) {735case BasicType::I8:736case BasicType::I16:737case BasicType::I32:738case BasicType::I64:739case BasicType::I128:740case BasicType::ISize:741case BasicType::U8:742case BasicType::U16:743case BasicType::U32:744case BasicType::U64:745case BasicType::U128:746case BasicType::USize:747demangleConstInt();748break;749case BasicType::Bool:750demangleConstBool();751break;752case BasicType::Char:753demangleConstChar();754break;755case BasicType::Placeholder:756print('_');757break;758default:759Error = true;760break;761}762} else if (C == 'B') {763demangleBackref([&] { demangleConst(); });764} else {765Error = true;766}767}768769// <const-data> = ["n"] <hex-number>770void Demangler::demangleConstInt() {771if (consumeIf('n'))772print('-');773774std::string_view HexDigits;775uint64_t Value = parseHexNumber(HexDigits);776if (HexDigits.size() <= 16) {777printDecimalNumber(Value);778} else {779print("0x");780print(HexDigits);781}782}783784// <const-data> = "0_" // false785// | "1_" // true786void Demangler::demangleConstBool() {787std::string_view HexDigits;788parseHexNumber(HexDigits);789if (HexDigits == "0")790print("false");791else if (HexDigits == "1")792print("true");793else794Error = true;795}796797/// Returns true if CodePoint represents a printable ASCII character.798static bool isAsciiPrintable(uint64_t CodePoint) {799return 0x20 <= CodePoint && CodePoint <= 0x7e;800}801802// <const-data> = <hex-number>803void Demangler::demangleConstChar() {804std::string_view HexDigits;805uint64_t CodePoint = parseHexNumber(HexDigits);806if (Error || HexDigits.size() > 6) {807Error = true;808return;809}810811print("'");812switch (CodePoint) {813case '\t':814print(R"(\t)");815break;816case '\r':817print(R"(\r)");818break;819case '\n':820print(R"(\n)");821break;822case '\\':823print(R"(\\)");824break;825case '"':826print(R"(")");827break;828case '\'':829print(R"(\')");830break;831default:832if (isAsciiPrintable(CodePoint)) {833char C = CodePoint;834print(C);835} else {836print(R"(\u{)");837print(HexDigits);838print('}');839}840break;841}842print('\'');843}844845// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes>846Identifier Demangler::parseIdentifier() {847bool Punycode = consumeIf('u');848uint64_t Bytes = parseDecimalNumber();849850// Underscore resolves the ambiguity when identifier starts with a decimal851// digit or another underscore.852consumeIf('_');853854if (Error || Bytes > Input.size() - Position) {855Error = true;856return {};857}858std::string_view S = Input.substr(Position, Bytes);859Position += Bytes;860861if (!std::all_of(S.begin(), S.end(), isValid)) {862Error = true;863return {};864}865866return {S, Punycode};867}868869// Parses optional base 62 number. The presence of a number is determined using870// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise871//872// This function is indended for parsing disambiguators and binders which when873// not present have their value interpreted as 0, and otherwise as decoded874// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is875// 2. When "G" is absent value is 0.876uint64_t Demangler::parseOptionalBase62Number(char Tag) {877if (!consumeIf(Tag))878return 0;879880uint64_t N = parseBase62Number();881if (Error || !addAssign(N, 1))882return 0;883884return N;885}886887// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by888// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1,889// "1_" encodes 2, etc.890//891// <base-62-number> = {<0-9a-zA-Z>} "_"892uint64_t Demangler::parseBase62Number() {893if (consumeIf('_'))894return 0;895896uint64_t Value = 0;897898while (true) {899uint64_t Digit;900char C = consume();901902if (C == '_') {903break;904} else if (isDigit(C)) {905Digit = C - '0';906} else if (isLower(C)) {907Digit = 10 + (C - 'a');908} else if (isUpper(C)) {909Digit = 10 + 26 + (C - 'A');910} else {911Error = true;912return 0;913}914915if (!mulAssign(Value, 62))916return 0;917918if (!addAssign(Value, Digit))919return 0;920}921922if (!addAssign(Value, 1))923return 0;924925return Value;926}927928// Parses a decimal number that had been encoded without any leading zeros.929//930// <decimal-number> = "0"931// | <1-9> {<0-9>}932uint64_t Demangler::parseDecimalNumber() {933char C = look();934if (!isDigit(C)) {935Error = true;936return 0;937}938939if (C == '0') {940consume();941return 0;942}943944uint64_t Value = 0;945946while (isDigit(look())) {947if (!mulAssign(Value, 10)) {948Error = true;949return 0;950}951952uint64_t D = consume() - '0';953if (!addAssign(Value, D))954return 0;955}956957return Value;958}959960// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed961// value and stores hex digits in HexDigits. The return value is unspecified if962// HexDigits.size() > 16.963//964// <hex-number> = "0_"965// | <1-9a-f> {<0-9a-f>} "_"966uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {967size_t Start = Position;968uint64_t Value = 0;969970if (!isHexDigit(look()))971Error = true;972973if (consumeIf('0')) {974if (!consumeIf('_'))975Error = true;976} else {977while (!Error && !consumeIf('_')) {978char C = consume();979Value *= 16;980if (isDigit(C))981Value += C - '0';982else if ('a' <= C && C <= 'f')983Value += 10 + (C - 'a');984else985Error = true;986}987}988989if (Error) {990HexDigits = std::string_view();991return 0;992}993994size_t End = Position - 1;995assert(Start < End);996HexDigits = Input.substr(Start, End - Start);997return Value;998}9991000void Demangler::print(char C) {1001if (Error || !Print)1002return;10031004Output += C;1005}10061007void Demangler::print(std::string_view S) {1008if (Error || !Print)1009return;10101011Output += S;1012}10131014void Demangler::printDecimalNumber(uint64_t N) {1015if (Error || !Print)1016return;10171018Output << N;1019}10201021// Prints a lifetime. An index 0 always represents an erased lifetime. Indices1022// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes1023// bound by one of the enclosing binders.1024void Demangler::printLifetime(uint64_t Index) {1025if (Index == 0) {1026print("'_");1027return;1028}10291030if (Index - 1 >= BoundLifetimes) {1031Error = true;1032return;1033}10341035uint64_t Depth = BoundLifetimes - Index;1036print('\'');1037if (Depth < 26) {1038char C = 'a' + Depth;1039print(C);1040} else {1041print('z');1042printDecimalNumber(Depth - 26 + 1);1043}1044}10451046static inline bool decodePunycodeDigit(char C, size_t &Value) {1047if (isLower(C)) {1048Value = C - 'a';1049return true;1050}10511052if (isDigit(C)) {1053Value = 26 + (C - '0');1054return true;1055}10561057return false;1058}10591060static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {1061char *Buffer = Output.getBuffer();1062char *Start = Buffer + StartIdx;1063char *End = Buffer + Output.getCurrentPosition();1064Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);1065}10661067// Encodes code point as UTF-8 and stores results in Output. Returns false if1068// CodePoint is not a valid unicode scalar value.1069static inline bool encodeUTF8(size_t CodePoint, char *Output) {1070if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)1071return false;10721073if (CodePoint <= 0x7F) {1074Output[0] = CodePoint;1075return true;1076}10771078if (CodePoint <= 0x7FF) {1079Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);1080Output[1] = 0x80 | (CodePoint & 0x3F);1081return true;1082}10831084if (CodePoint <= 0xFFFF) {1085Output[0] = 0xE0 | (CodePoint >> 12);1086Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);1087Output[2] = 0x80 | (CodePoint & 0x3F);1088return true;1089}10901091if (CodePoint <= 0x10FFFF) {1092Output[0] = 0xF0 | (CodePoint >> 18);1093Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);1094Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);1095Output[3] = 0x80 | (CodePoint & 0x3F);1096return true;1097}10981099return false;1100}11011102// Decodes string encoded using punycode and appends results to Output.1103// Returns true if decoding was successful.1104static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {1105size_t OutputSize = Output.getCurrentPosition();1106size_t InputIdx = 0;11071108// Rust uses an underscore as a delimiter.1109size_t DelimiterPos = std::string_view::npos;1110for (size_t I = 0; I != Input.size(); ++I)1111if (Input[I] == '_')1112DelimiterPos = I;11131114if (DelimiterPos != std::string_view::npos) {1115// Copy basic code points before the last delimiter to the output.1116for (; InputIdx != DelimiterPos; ++InputIdx) {1117char C = Input[InputIdx];1118if (!isValid(C))1119return false;1120// Code points are padded with zeros while decoding is in progress.1121char UTF8[4] = {C};1122Output += std::string_view(UTF8, 4);1123}1124// Skip over the delimiter.1125++InputIdx;1126}11271128size_t Base = 36;1129size_t Skew = 38;1130size_t Bias = 72;1131size_t N = 0x80;1132size_t TMin = 1;1133size_t TMax = 26;1134size_t Damp = 700;11351136auto Adapt = [&](size_t Delta, size_t NumPoints) {1137Delta /= Damp;1138Delta += Delta / NumPoints;1139Damp = 2;11401141size_t K = 0;1142while (Delta > (Base - TMin) * TMax / 2) {1143Delta /= Base - TMin;1144K += Base;1145}1146return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));1147};11481149// Main decoding loop.1150for (size_t I = 0; InputIdx != Input.size(); I += 1) {1151size_t OldI = I;1152size_t W = 1;1153size_t Max = std::numeric_limits<size_t>::max();1154for (size_t K = Base; true; K += Base) {1155if (InputIdx == Input.size())1156return false;1157char C = Input[InputIdx++];1158size_t Digit = 0;1159if (!decodePunycodeDigit(C, Digit))1160return false;11611162if (Digit > (Max - I) / W)1163return false;1164I += Digit * W;11651166size_t T;1167if (K <= Bias)1168T = TMin;1169else if (K >= Bias + TMax)1170T = TMax;1171else1172T = K - Bias;11731174if (Digit < T)1175break;11761177if (W > Max / (Base - T))1178return false;1179W *= (Base - T);1180}1181size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;1182Bias = Adapt(I - OldI, NumPoints);11831184if (I / NumPoints > Max - N)1185return false;1186N += I / NumPoints;1187I = I % NumPoints;11881189// Insert N at position I in the output.1190char UTF8[4] = {};1191if (!encodeUTF8(N, UTF8))1192return false;1193Output.insert(OutputSize + I * 4, UTF8, 4);1194}11951196removeNullBytes(Output, OutputSize);1197return true;1198}11991200void Demangler::printIdentifier(Identifier Ident) {1201if (Error || !Print)1202return;12031204if (Ident.Punycode) {1205if (!decodePunycode(Ident.Name, Output))1206Error = true;1207} else {1208print(Ident.Name);1209}1210}12111212char Demangler::look() const {1213if (Error || Position >= Input.size())1214return 0;12151216return Input[Position];1217}12181219char Demangler::consume() {1220if (Error || Position >= Input.size()) {1221Error = true;1222return 0;1223}12241225return Input[Position++];1226}12271228bool Demangler::consumeIf(char Prefix) {1229if (Error || Position >= Input.size() || Input[Position] != Prefix)1230return false;12311232Position += 1;1233return true;1234}12351236/// Computes A + B. When computation wraps around sets the error and returns1237/// false. Otherwise assigns the result to A and returns true.1238bool Demangler::addAssign(uint64_t &A, uint64_t B) {1239if (A > std::numeric_limits<uint64_t>::max() - B) {1240Error = true;1241return false;1242}12431244A += B;1245return true;1246}12471248/// Computes A * B. When computation wraps around sets the error and returns1249/// false. Otherwise assigns the result to A and returns true.1250bool Demangler::mulAssign(uint64_t &A, uint64_t B) {1251if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) {1252Error = true;1253return false;1254}12551256A *= B;1257return true;1258}125912601261