Path: blob/main/contrib/llvm-project/llvm/lib/Demangle/DLangDemangle.cpp
35233 views
//===--- DLangDemangle.cpp ------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file defines a demangler for the D programming language as specified10/// in the ABI specification, available at:11/// https://dlang.org/spec/abi.html#name_mangling12///13//===----------------------------------------------------------------------===//1415#include "llvm/Demangle/Demangle.h"16#include "llvm/Demangle/StringViewExtras.h"17#include "llvm/Demangle/Utility.h"1819#include <cctype>20#include <cstring>21#include <limits>22#include <string_view>2324using namespace llvm;25using llvm::itanium_demangle::OutputBuffer;26using llvm::itanium_demangle::starts_with;2728namespace {2930/// Demangle information structure.31struct Demangler {32/// Initialize the information structure we use to pass around information.33///34/// \param Mangled String to demangle.35Demangler(std::string_view Mangled);3637/// Extract and demangle the mangled symbol and append it to the output38/// string.39///40/// \param Demangled Output buffer to write the demangled name.41///42/// \return The remaining string on success or nullptr on failure.43///44/// \see https://dlang.org/spec/abi.html#name_mangling .45/// \see https://dlang.org/spec/abi.html#MangledName .46const char *parseMangle(OutputBuffer *Demangled);4748private:49/// Extract and demangle a given mangled symbol and append it to the output50/// string.51///52/// \param Demangled output buffer to write the demangled name.53/// \param Mangled mangled symbol to be demangled.54///55/// \see https://dlang.org/spec/abi.html#name_mangling .56/// \see https://dlang.org/spec/abi.html#MangledName .57void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);5859/// Extract the number from a given string.60///61/// \param Mangled string to extract the number.62/// \param Ret assigned result value.63///64/// \note Ret larger than UINT_MAX is considered a failure.65///66/// \see https://dlang.org/spec/abi.html#Number .67void decodeNumber(std::string_view &Mangled, unsigned long &Ret);6869/// Extract the back reference position from a given string.70///71/// \param Mangled string to extract the back reference position.72/// \param Ret assigned result value.73///74/// \return true on success, false on error.75///76/// \note Ret is always >= 0 on success, and unspecified on failure77///78/// \see https://dlang.org/spec/abi.html#back_ref .79/// \see https://dlang.org/spec/abi.html#NumberBackRef .80bool decodeBackrefPos(std::string_view &Mangled, long &Ret);8182/// Extract the symbol pointed by the back reference form a given string.83///84/// \param Mangled string to extract the back reference position.85/// \param Ret assigned result value.86///87/// \return true on success, false on error.88///89/// \see https://dlang.org/spec/abi.html#back_ref .90bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);9192/// Extract and demangle backreferenced symbol from a given mangled symbol93/// and append it to the output string.94///95/// \param Demangled output buffer to write the demangled name.96/// \param Mangled mangled symbol to be demangled.97///98/// \see https://dlang.org/spec/abi.html#back_ref .99/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .100void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);101102/// Extract and demangle backreferenced type from a given mangled symbol103/// and append it to the output string.104///105/// \param Mangled mangled symbol to be demangled.106///107/// \see https://dlang.org/spec/abi.html#back_ref .108/// \see https://dlang.org/spec/abi.html#TypeBackRef .109void parseTypeBackref(std::string_view &Mangled);110111/// Check whether it is the beginning of a symbol name.112///113/// \param Mangled string to extract the symbol name.114///115/// \return true on success, false otherwise.116///117/// \see https://dlang.org/spec/abi.html#SymbolName .118bool isSymbolName(std::string_view Mangled);119120/// Extract and demangle an identifier from a given mangled symbol append it121/// to the output string.122///123/// \param Demangled Output buffer to write the demangled name.124/// \param Mangled Mangled symbol to be demangled.125///126/// \see https://dlang.org/spec/abi.html#SymbolName .127void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);128129/// Extract and demangle the plain identifier from a given mangled symbol and130/// prepend/append it to the output string, with a special treatment for some131/// magic compiler generated symbols.132///133/// \param Demangled Output buffer to write the demangled name.134/// \param Mangled Mangled symbol to be demangled.135/// \param Len Length of the mangled symbol name.136///137/// \see https://dlang.org/spec/abi.html#LName .138void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,139unsigned long Len);140141/// Extract and demangle the qualified symbol from a given mangled symbol142/// append it to the output string.143///144/// \param Demangled Output buffer to write the demangled name.145/// \param Mangled Mangled symbol to be demangled.146///147/// \see https://dlang.org/spec/abi.html#QualifiedName .148void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);149150/// Extract and demangle a type from a given mangled symbol append it to151/// the output string.152///153/// \param Mangled mangled symbol to be demangled.154///155/// \return true on success, false on error.156///157/// \see https://dlang.org/spec/abi.html#Type .158bool parseType(std::string_view &Mangled);159160/// An immutable view of the string we are demangling.161const std::string_view Str;162/// The index of the last back reference.163int LastBackref;164};165166} // namespace167168void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {169// Clear Mangled if trying to extract something that isn't a digit.170if (Mangled.empty()) {171Mangled = {};172return;173}174175if (!std::isdigit(Mangled.front())) {176Mangled = {};177return;178}179180unsigned long Val = 0;181182do {183unsigned long Digit = Mangled[0] - '0';184185// Check for overflow.186if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {187Mangled = {};188return;189}190191Val = Val * 10 + Digit;192Mangled.remove_prefix(1);193} while (!Mangled.empty() && std::isdigit(Mangled.front()));194195if (Mangled.empty()) {196Mangled = {};197return;198}199200Ret = Val;201}202203bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {204// Return nullptr if trying to extract something that isn't a digit205if (Mangled.empty()) {206Mangled = {};207return false;208}209// Any identifier or non-basic type that has been emitted to the mangled210// symbol before will not be emitted again, but is referenced by a special211// sequence encoding the relative position of the original occurrence in the212// mangled symbol name.213// Numbers in back references are encoded with base 26 by upper case letters214// A-Z for higher digits but lower case letters a-z for the last digit.215// NumberBackRef:216// [a-z]217// [A-Z] NumberBackRef218// ^219unsigned long Val = 0;220221while (!Mangled.empty() && std::isalpha(Mangled.front())) {222// Check for overflow223if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)224break;225226Val *= 26;227228if (Mangled[0] >= 'a' && Mangled[0] <= 'z') {229Val += Mangled[0] - 'a';230if ((long)Val <= 0)231break;232Ret = Val;233Mangled.remove_prefix(1);234return true;235}236237Val += Mangled[0] - 'A';238Mangled.remove_prefix(1);239}240241Mangled = {};242return false;243}244245bool Demangler::decodeBackref(std::string_view &Mangled,246std::string_view &Ret) {247assert(!Mangled.empty() && Mangled.front() == 'Q' &&248"Invalid back reference!");249Ret = {};250251// Position of 'Q'252const char *Qpos = Mangled.data();253long RefPos;254Mangled.remove_prefix(1);255256if (!decodeBackrefPos(Mangled, RefPos)) {257Mangled = {};258return false;259}260261if (RefPos > Qpos - Str.data()) {262Mangled = {};263return false;264}265266// Set the position of the back reference.267Ret = Qpos - RefPos;268269return true;270}271272void Demangler::parseSymbolBackref(OutputBuffer *Demangled,273std::string_view &Mangled) {274// An identifier back reference always points to a digit 0 to 9.275// IdentifierBackRef:276// Q NumberBackRef277// ^278unsigned long Len;279280// Get position of the back reference281std::string_view Backref;282if (!decodeBackref(Mangled, Backref)) {283Mangled = {};284return;285}286287// Must point to a simple identifier288decodeNumber(Backref, Len);289if (Backref.empty() || Backref.length() < Len) {290Mangled = {};291return;292}293294parseLName(Demangled, Backref, Len);295if (Backref.empty())296Mangled = {};297}298299void Demangler::parseTypeBackref(std::string_view &Mangled) {300// A type back reference always points to a letter.301// TypeBackRef:302// Q NumberBackRef303// ^304305// If we appear to be moving backwards through the mangle string, then306// bail as this may be a recursive back reference.307if (Mangled.data() - Str.data() >= LastBackref) {308Mangled = {};309return;310}311312int SaveRefPos = LastBackref;313LastBackref = Mangled.data() - Str.data();314315// Get position of the back reference.316std::string_view Backref;317if (!decodeBackref(Mangled, Backref)) {318Mangled = {};319return;320}321322// Can't decode back reference.323if (Backref.empty()) {324Mangled = {};325return;326}327328// TODO: Add support for function type back references.329if (!parseType(Backref))330Mangled = {};331332LastBackref = SaveRefPos;333334if (Backref.empty())335Mangled = {};336}337338bool Demangler::isSymbolName(std::string_view Mangled) {339long Ret;340const char *Qref = Mangled.data();341342if (std::isdigit(Mangled.front()))343return true;344345// TODO: Handle template instances.346347if (Mangled.front() != 'Q')348return false;349350Mangled.remove_prefix(1);351bool Valid = decodeBackrefPos(Mangled, Ret);352if (!Valid || Ret > Qref - Str.data())353return false;354355return std::isdigit(Qref[-Ret]);356}357358void Demangler::parseMangle(OutputBuffer *Demangled,359std::string_view &Mangled) {360// A D mangled symbol is comprised of both scope and type information.361// MangleName:362// _D QualifiedName Type363// _D QualifiedName Z364// ^365// The caller should have guaranteed that the start pointer is at the366// above location.367// Note that type is never a function type, but only the return type of368// a function or the type of a variable.369Mangled.remove_prefix(2);370371parseQualified(Demangled, Mangled);372373if (Mangled.empty()) {374Mangled = {};375return;376}377378// Artificial symbols end with 'Z' and have no type.379if (Mangled.front() == 'Z') {380Mangled.remove_prefix(1);381} else if (!parseType(Mangled))382Mangled = {};383}384385void Demangler::parseQualified(OutputBuffer *Demangled,386std::string_view &Mangled) {387// Qualified names are identifiers separated by their encoded length.388// Nested functions also encode their argument types without specifying389// what they return.390// QualifiedName:391// SymbolFunctionName392// SymbolFunctionName QualifiedName393// ^394// SymbolFunctionName:395// SymbolName396// SymbolName TypeFunctionNoReturn397// SymbolName M TypeFunctionNoReturn398// SymbolName M TypeModifiers TypeFunctionNoReturn399// The start pointer should be at the above location.400401// Whether it has more than one symbol402size_t NotFirst = false;403do {404// Skip over anonymous symbols.405if (!Mangled.empty() && Mangled.front() == '0') {406do407Mangled.remove_prefix(1);408while (!Mangled.empty() && Mangled.front() == '0');409410continue;411}412413if (NotFirst)414*Demangled << '.';415NotFirst = true;416417parseIdentifier(Demangled, Mangled);418} while (!Mangled.empty() && isSymbolName(Mangled));419}420421void Demangler::parseIdentifier(OutputBuffer *Demangled,422std::string_view &Mangled) {423if (Mangled.empty()) {424Mangled = {};425return;426}427428if (Mangled.front() == 'Q')429return parseSymbolBackref(Demangled, Mangled);430431// TODO: Parse lengthless template instances.432433unsigned long Len;434decodeNumber(Mangled, Len);435436if (Mangled.empty()) {437Mangled = {};438return;439}440if (!Len || Mangled.length() < Len) {441Mangled = {};442return;443}444445// TODO: Parse template instances with a length prefix.446447// There can be multiple different declarations in the same function that448// have the same mangled name. To make the mangled names unique, a fake449// parent in the form `__Sddd' is added to the symbol.450if (Len >= 4 && starts_with(Mangled, "__S")) {451const size_t SuffixLen = Mangled.length() - Len;452std::string_view P = Mangled.substr(3);453while (P.length() > SuffixLen && std::isdigit(P.front()))454P.remove_prefix(1);455if (P.length() == SuffixLen) {456// Skip over the fake parent.457Mangled.remove_prefix(Len);458return parseIdentifier(Demangled, Mangled);459}460461// Else demangle it as a plain identifier.462}463464parseLName(Demangled, Mangled, Len);465}466467bool Demangler::parseType(std::string_view &Mangled) {468if (Mangled.empty()) {469Mangled = {};470return false;471}472473switch (Mangled.front()) {474// TODO: Parse type qualifiers.475// TODO: Parse function types.476// TODO: Parse compound types.477// TODO: Parse delegate types.478// TODO: Parse tuple types.479480// Basic types.481case 'i':482Mangled.remove_prefix(1);483// TODO: Add type name dumping484return true;485486// TODO: Add support for the rest of the basic types.487488// Back referenced type.489case 'Q': {490parseTypeBackref(Mangled);491return true;492}493494default: // unhandled.495Mangled = {};496return false;497}498}499500void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,501unsigned long Len) {502switch (Len) {503case 6:504if (starts_with(Mangled, "__initZ")) {505// The static initializer for a given symbol.506Demangled->prepend("initializer for ");507Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);508Mangled.remove_prefix(Len);509return;510}511if (starts_with(Mangled, "__vtblZ")) {512// The vtable symbol for a given class.513Demangled->prepend("vtable for ");514Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);515Mangled.remove_prefix(Len);516return;517}518break;519520case 7:521if (starts_with(Mangled, "__ClassZ")) {522// The classinfo symbol for a given class.523Demangled->prepend("ClassInfo for ");524Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);525Mangled.remove_prefix(Len);526return;527}528break;529530case 11:531if (starts_with(Mangled, "__InterfaceZ")) {532// The interface symbol for a given class.533Demangled->prepend("Interface for ");534Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);535Mangled.remove_prefix(Len);536return;537}538break;539540case 12:541if (starts_with(Mangled, "__ModuleInfoZ")) {542// The ModuleInfo symbol for a given module.543Demangled->prepend("ModuleInfo for ");544Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);545Mangled.remove_prefix(Len);546return;547}548break;549}550551*Demangled << Mangled.substr(0, Len);552Mangled.remove_prefix(Len);553}554555Demangler::Demangler(std::string_view Mangled)556: Str(Mangled), LastBackref(Mangled.length()) {}557558const char *Demangler::parseMangle(OutputBuffer *Demangled) {559std::string_view M(this->Str);560parseMangle(Demangled, M);561return M.data();562}563564char *llvm::dlangDemangle(std::string_view MangledName) {565if (MangledName.empty() || !starts_with(MangledName, "_D"))566return nullptr;567568OutputBuffer Demangled;569if (MangledName == "_Dmain") {570Demangled << "D main";571} else {572573Demangler D(MangledName);574const char *M = D.parseMangle(&Demangled);575576// Check that the entire symbol was successfully demangled.577if (M == nullptr || *M != '\0') {578std::free(Demangled.getBuffer());579return nullptr;580}581}582583// OutputBuffer's internal buffer is not null terminated and therefore we need584// to add it to comply with C null terminated strings.585if (Demangled.getCurrentPosition() > 0) {586Demangled << '\0';587Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);588return Demangled.getBuffer();589}590591std::free(Demangled.getBuffer());592return nullptr;593}594595596