Path: blob/main/contrib/llvm-project/lldb/source/Core/Mangled.cpp
39587 views
//===-- Mangled.cpp -------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "lldb/Core/Mangled.h"910#include "lldb/Core/DataFileCache.h"11#include "lldb/Core/RichManglingContext.h"12#include "lldb/Target/Language.h"13#include "lldb/Utility/ConstString.h"14#include "lldb/Utility/DataEncoder.h"15#include "lldb/Utility/LLDBLog.h"16#include "lldb/Utility/Log.h"17#include "lldb/Utility/RegularExpression.h"18#include "lldb/Utility/Stream.h"19#include "lldb/lldb-enumerations.h"2021#include "llvm/ADT/StringExtras.h"22#include "llvm/ADT/StringRef.h"23#include "llvm/Demangle/Demangle.h"24#include "llvm/Support/Compiler.h"2526#include <mutex>27#include <string>28#include <string_view>29#include <utility>3031#include <cstdlib>32#include <cstring>33using namespace lldb_private;3435static inline bool cstring_is_mangled(llvm::StringRef s) {36return Mangled::GetManglingScheme(s) != Mangled::eManglingSchemeNone;37}3839#pragma mark Mangled4041Mangled::ManglingScheme Mangled::GetManglingScheme(llvm::StringRef const name) {42if (name.empty())43return Mangled::eManglingSchemeNone;4445if (name.starts_with("?"))46return Mangled::eManglingSchemeMSVC;4748if (name.starts_with("_R"))49return Mangled::eManglingSchemeRustV0;5051if (name.starts_with("_D")) {52// A dlang mangled name begins with `_D`, followed by a numeric length. One53// known exception is the symbol `_Dmain`.54// See `SymbolName` and `LName` in55// https://dlang.org/spec/abi.html#name_mangling56llvm::StringRef buf = name.drop_front(2);57if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))58return Mangled::eManglingSchemeD;59}6061if (name.starts_with("_Z"))62return Mangled::eManglingSchemeItanium;6364// ___Z is a clang extension of block invocations65if (name.starts_with("___Z"))66return Mangled::eManglingSchemeItanium;6768// Swift's older style of mangling used "_T" as a mangling prefix. This can69// lead to false positives with other symbols that just so happen to start70// with "_T". To minimize the chance of that happening, we only return true71// for select old-style swift mangled names. The known cases are ObjC classes72// and protocols. Classes are either prefixed with "_TtC" or "_TtGC".73// Protocols are prefixed with "_TtP".74if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||75name.starts_with("_TtP"))76return Mangled::eManglingSchemeSwift;7778// Swift 4.2 used "$S" and "_$S".79// Swift 5 and onward uses "$s" and "_$s".80// Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.81if (name.starts_with("$S") || name.starts_with("_$S") ||82name.starts_with("$s") || name.starts_with("_$s") ||83name.starts_with("@__swiftmacro_"))84return Mangled::eManglingSchemeSwift;8586return Mangled::eManglingSchemeNone;87}8889Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {90if (s)91SetValue(s);92}9394Mangled::Mangled(llvm::StringRef name) {95if (!name.empty())96SetValue(ConstString(name));97}9899// Convert to bool operator. This allows code to check any Mangled objects100// to see if they contain anything valid using code such as:101//102// Mangled mangled(...);103// if (mangled)104// { ...105Mangled::operator bool() const { return m_mangled || m_demangled; }106107// Clear the mangled and demangled values.108void Mangled::Clear() {109m_mangled.Clear();110m_demangled.Clear();111}112113// Compare the string values.114int Mangled::Compare(const Mangled &a, const Mangled &b) {115return ConstString::Compare(a.GetName(ePreferMangled),116b.GetName(ePreferMangled));117}118119void Mangled::SetValue(ConstString name) {120if (name) {121if (cstring_is_mangled(name.GetStringRef())) {122m_demangled.Clear();123m_mangled = name;124} else {125m_demangled = name;126m_mangled.Clear();127}128} else {129m_demangled.Clear();130m_mangled.Clear();131}132}133134// Local helpers for different demangling implementations.135static char *GetMSVCDemangledStr(llvm::StringRef M) {136char *demangled_cstr = llvm::microsoftDemangle(137M, nullptr, nullptr,138llvm::MSDemangleFlags(139llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |140llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));141142if (Log *log = GetLog(LLDBLog::Demangle)) {143if (demangled_cstr && demangled_cstr[0])144LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);145else146LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());147}148149return demangled_cstr;150}151152static char *GetItaniumDemangledStr(const char *M) {153char *demangled_cstr = nullptr;154155llvm::ItaniumPartialDemangler ipd;156bool err = ipd.partialDemangle(M);157if (!err) {158// Default buffer and size (will realloc in case it's too small).159size_t demangled_size = 80;160demangled_cstr = static_cast<char *>(std::malloc(demangled_size));161demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);162163assert(demangled_cstr &&164"finishDemangle must always succeed if partialDemangle did");165assert(demangled_cstr[demangled_size - 1] == '\0' &&166"Expected demangled_size to return length including trailing null");167}168169if (Log *log = GetLog(LLDBLog::Demangle)) {170if (demangled_cstr)171LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);172else173LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);174}175176return demangled_cstr;177}178179static char *GetRustV0DemangledStr(llvm::StringRef M) {180char *demangled_cstr = llvm::rustDemangle(M);181182if (Log *log = GetLog(LLDBLog::Demangle)) {183if (demangled_cstr && demangled_cstr[0])184LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);185else186LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",187static_cast<std::string_view>(M));188}189190return demangled_cstr;191}192193static char *GetDLangDemangledStr(llvm::StringRef M) {194char *demangled_cstr = llvm::dlangDemangle(M);195196if (Log *log = GetLog(LLDBLog::Demangle)) {197if (demangled_cstr && demangled_cstr[0])198LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);199else200LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",201static_cast<std::string_view>(M));202}203204return demangled_cstr;205}206207// Explicit demangling for scheduled requests during batch processing. This208// makes use of ItaniumPartialDemangler's rich demangle info209bool Mangled::GetRichManglingInfo(RichManglingContext &context,210SkipMangledNameFn *skip_mangled_name) {211// Others are not meant to arrive here. ObjC names or C's main() for example212// have their names stored in m_demangled, while m_mangled is empty.213assert(m_mangled);214215// Check whether or not we are interested in this name at all.216ManglingScheme scheme = GetManglingScheme(m_mangled.GetStringRef());217if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))218return false;219220switch (scheme) {221case eManglingSchemeNone:222// The current mangled_name_filter would allow llvm_unreachable here.223return false;224225case eManglingSchemeItanium:226// We want the rich mangling info here, so we don't care whether or not227// there is a demangled string in the pool already.228return context.FromItaniumName(m_mangled);229230case eManglingSchemeMSVC: {231// We have no rich mangling for MSVC-mangled names yet, so first try to232// demangle it if necessary.233if (!m_demangled && !m_mangled.GetMangledCounterpart(m_demangled)) {234if (char *d = GetMSVCDemangledStr(m_mangled)) {235// Without the rich mangling info we have to demangle the full name.236// Copy it to string pool and connect the counterparts to accelerate237// later access in GetDemangledName().238m_demangled.SetStringWithMangledCounterpart(llvm::StringRef(d),239m_mangled);240::free(d);241} else {242m_demangled.SetCString("");243}244}245246if (m_demangled.IsEmpty()) {247// Cannot demangle it, so don't try parsing.248return false;249} else {250// Demangled successfully, we can try and parse it with251// CPlusPlusLanguage::MethodName.252return context.FromCxxMethodName(m_demangled);253}254}255256case eManglingSchemeRustV0:257case eManglingSchemeD:258case eManglingSchemeSwift:259// Rich demangling scheme is not supported260return false;261}262llvm_unreachable("Fully covered switch above!");263}264265// Generate the demangled name on demand using this accessor. Code in this266// class will need to use this accessor if it wishes to decode the demangled267// name. The result is cached and will be kept until a new string value is268// supplied to this object, or until the end of the object's lifetime.269ConstString Mangled::GetDemangledName() const {270// Check to make sure we have a valid mangled name and that we haven't271// already decoded our mangled name.272if (m_mangled && m_demangled.IsNull()) {273// Don't bother running anything that isn't mangled274const char *mangled_name = m_mangled.GetCString();275ManglingScheme mangling_scheme =276GetManglingScheme(m_mangled.GetStringRef());277if (mangling_scheme != eManglingSchemeNone &&278!m_mangled.GetMangledCounterpart(m_demangled)) {279// We didn't already mangle this name, demangle it and if all goes well280// add it to our map.281char *demangled_name = nullptr;282switch (mangling_scheme) {283case eManglingSchemeMSVC:284demangled_name = GetMSVCDemangledStr(mangled_name);285break;286case eManglingSchemeItanium: {287demangled_name = GetItaniumDemangledStr(mangled_name);288break;289}290case eManglingSchemeRustV0:291demangled_name = GetRustV0DemangledStr(m_mangled);292break;293case eManglingSchemeD:294demangled_name = GetDLangDemangledStr(m_mangled);295break;296case eManglingSchemeSwift:297// Demangling a swift name requires the swift compiler. This is298// explicitly unsupported on llvm.org.299break;300case eManglingSchemeNone:301llvm_unreachable("eManglingSchemeNone was handled already");302}303if (demangled_name) {304m_demangled.SetStringWithMangledCounterpart(305llvm::StringRef(demangled_name), m_mangled);306free(demangled_name);307}308}309if (m_demangled.IsNull()) {310// Set the demangled string to the empty string to indicate we tried to311// parse it once and failed.312m_demangled.SetCString("");313}314}315316return m_demangled;317}318319ConstString Mangled::GetDisplayDemangledName() const {320if (Language *lang = Language::FindPlugin(GuessLanguage()))321return lang->GetDisplayDemangledName(*this);322return GetDemangledName();323}324325bool Mangled::NameMatches(const RegularExpression ®ex) const {326if (m_mangled && regex.Execute(m_mangled.GetStringRef()))327return true;328329ConstString demangled = GetDemangledName();330return demangled && regex.Execute(demangled.GetStringRef());331}332333// Get the demangled name if there is one, else return the mangled name.334ConstString Mangled::GetName(Mangled::NamePreference preference) const {335if (preference == ePreferMangled && m_mangled)336return m_mangled;337338// Call the accessor to make sure we get a demangled name in case it hasn't339// been demangled yet...340ConstString demangled = GetDemangledName();341342if (preference == ePreferDemangledWithoutArguments) {343if (Language *lang = Language::FindPlugin(GuessLanguage())) {344return lang->GetDemangledFunctionNameWithoutArguments(*this);345}346}347if (preference == ePreferDemangled) {348if (demangled)349return demangled;350return m_mangled;351}352return demangled;353}354355// Dump a Mangled object to stream "s". We don't force our demangled name to be356// computed currently (we don't use the accessor).357void Mangled::Dump(Stream *s) const {358if (m_mangled) {359*s << ", mangled = " << m_mangled;360}361if (m_demangled) {362const char *demangled = m_demangled.AsCString();363s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");364}365}366367// Dumps a debug version of this string with extra object and state information368// to stream "s".369void Mangled::DumpDebug(Stream *s) const {370s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),371static_cast<const void *>(this));372m_mangled.DumpDebug(s);373s->Printf(", demangled = ");374m_demangled.DumpDebug(s);375}376377// Return the size in byte that this object takes in memory. The size includes378// the size of the objects it owns, and not the strings that it references379// because they are shared strings.380size_t Mangled::MemorySize() const {381return m_mangled.MemorySize() + m_demangled.MemorySize();382}383384// We "guess" the language because we can't determine a symbol's language from385// it's name. For example, a Pascal symbol can be mangled using the C++386// Itanium scheme, and defined in a compilation unit within the same module as387// other C++ units. In addition, different targets could have different ways388// of mangling names from a given language, likewise the compilation units389// within those targets.390lldb::LanguageType Mangled::GuessLanguage() const {391lldb::LanguageType result = lldb::eLanguageTypeUnknown;392// Ask each language plugin to check if the mangled name belongs to it.393Language::ForEach([this, &result](Language *l) {394if (l->SymbolNameFitsToLanguage(*this)) {395result = l->GetLanguageType();396return false;397}398return true;399});400return result;401}402403// Dump OBJ to the supplied stream S.404Stream &operator<<(Stream &s, const Mangled &obj) {405if (obj.GetMangledName())406s << "mangled = '" << obj.GetMangledName() << "'";407408ConstString demangled = obj.GetDemangledName();409if (demangled)410s << ", demangled = '" << demangled << '\'';411else412s << ", demangled = <error>";413return s;414}415416// When encoding Mangled objects we can get away with encoding as little417// information as is required. The enumeration below helps us to efficiently418// encode Mangled objects.419enum MangledEncoding {420/// If the Mangled object has neither a mangled name or demangled name we can421/// encode the object with one zero byte using the Empty enumeration.422Empty = 0u,423/// If the Mangled object has only a demangled name and no mangled named, we424/// can encode only the demangled name.425DemangledOnly = 1u,426/// If the mangle name can calculate the demangled name (it is the427/// mangled/demangled counterpart), then we only need to encode the mangled428/// name as the demangled name can be recomputed.429MangledOnly = 2u,430/// If we have a Mangled object with two different names that are not related431/// then we need to save both strings. This can happen if we have a name that432/// isn't a true mangled name, but we want to be able to lookup a symbol by433/// name and type in the symbol table. We do this for Objective C symbols like434/// "OBJC_CLASS_$_NSValue" where the mangled named will be set to435/// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to436/// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it437/// would fail, but in these cases we want these unrelated names to be438/// preserved.439MangledAndDemangled = 3u440};441442bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,443const StringTableReader &strtab) {444m_mangled.Clear();445m_demangled.Clear();446MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);447switch (encoding) {448case Empty:449return true;450451case DemangledOnly:452m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));453return true;454455case MangledOnly:456m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));457return true;458459case MangledAndDemangled:460m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));461m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));462return true;463}464return false;465}466/// The encoding format for the Mangled object is as follows:467///468/// uint8_t encoding;469/// char str1[]; (only if DemangledOnly, MangledOnly)470/// char str2[]; (only if MangledAndDemangled)471///472/// The strings are stored as NULL terminated UTF8 strings and str1 and str2473/// are only saved if we need them based on the encoding.474///475/// Some mangled names have a mangled name that can be demangled by the built476/// in demanglers. These kinds of mangled objects know when the mangled and477/// demangled names are the counterparts for each other. This is done because478/// demangling is very expensive and avoiding demangling the same name twice479/// saves us a lot of compute time. For these kinds of names we only need to480/// save the mangled name and have the encoding set to "MangledOnly".481///482/// If a mangled obejct has only a demangled name, then we save only that string483/// and have the encoding set to "DemangledOnly".484///485/// Some mangled objects have both mangled and demangled names, but the486/// demangled name can not be computed from the mangled name. This is often used487/// for runtime named, like Objective C runtime V2 and V3 names. Both these488/// names must be saved and the encoding is set to "MangledAndDemangled".489///490/// For a Mangled object with no names, we only need to set the encoding to491/// "Empty" and not store any string values.492void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {493MangledEncoding encoding = Empty;494if (m_mangled) {495encoding = MangledOnly;496if (m_demangled) {497// We have both mangled and demangled names. If the demangled name is the498// counterpart of the mangled name, then we only need to save the mangled499// named. If they are different, we need to save both.500ConstString s;501if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled))502encoding = MangledAndDemangled;503}504} else if (m_demangled) {505encoding = DemangledOnly;506}507file.AppendU8(encoding);508switch (encoding) {509case Empty:510break;511case DemangledOnly:512file.AppendU32(strtab.Add(m_demangled));513break;514case MangledOnly:515file.AppendU32(strtab.Add(m_mangled));516break;517case MangledAndDemangled:518file.AppendU32(strtab.Add(m_mangled));519file.AppendU32(strtab.Add(m_demangled));520break;521}522}523524525