Path: blob/main/contrib/llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
39642 views
//===-- CPlusPlusNameParser.cpp -------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "CPlusPlusNameParser.h"910#include "clang/Basic/IdentifierTable.h"11#include "clang/Basic/TokenKinds.h"12#include "llvm/ADT/StringMap.h"13#include "llvm/Support/Threading.h"14#include <optional>1516using namespace lldb;17using namespace lldb_private;18using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;19using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;20namespace tok = clang::tok;2122std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {23m_next_token_index = 0;24std::optional<ParsedFunction> result(std::nullopt);2526// Try to parse the name as function without a return type specified e.g.27// main(int, char*[])28{29Bookmark start_position = SetBookmark();30result = ParseFunctionImpl(false);31if (result && !HasMoreTokens())32return result;33}3435// Try to parse the name as function with function pointer return type e.g.36// void (*get_func(const char*))()37result = ParseFuncPtr(true);38if (result)39return result;4041// Finally try to parse the name as a function with non-function return type42// e.g. int main(int, char*[])43result = ParseFunctionImpl(true);44if (HasMoreTokens())45return std::nullopt;46return result;47}4849std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {50m_next_token_index = 0;51std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();52if (!name_ranges)53return std::nullopt;54if (HasMoreTokens())55return std::nullopt;56ParsedName result;57result.basename = GetTextForRange(name_ranges->basename_range);58result.context = GetTextForRange(name_ranges->context_range);59return result;60}6162bool CPlusPlusNameParser::HasMoreTokens() {63return m_next_token_index < m_tokens.size();64}6566void CPlusPlusNameParser::Advance() { ++m_next_token_index; }6768void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }6970bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {71if (!HasMoreTokens())72return false;7374if (!Peek().is(kind))75return false;7677Advance();78return true;79}8081template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {82if (!HasMoreTokens())83return false;8485if (!Peek().isOneOf(kinds...))86return false;8788Advance();89return true;90}9192CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {93return Bookmark(m_next_token_index);94}9596size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }9798clang::Token &CPlusPlusNameParser::Peek() {99assert(HasMoreTokens());100return m_tokens[m_next_token_index];101}102103std::optional<ParsedFunction>104CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {105Bookmark start_position = SetBookmark();106107ParsedFunction result;108if (expect_return_type) {109size_t return_start = GetCurrentPosition();110// Consume return type if it's expected.111if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())112return std::nullopt;113114size_t return_end = GetCurrentPosition();115result.return_type = GetTextForRange(Range(return_start, return_end));116}117118auto maybe_name = ParseFullNameImpl();119if (!maybe_name) {120return std::nullopt;121}122123size_t argument_start = GetCurrentPosition();124if (!ConsumeArguments()) {125return std::nullopt;126}127128size_t qualifiers_start = GetCurrentPosition();129SkipFunctionQualifiers();130size_t end_position = GetCurrentPosition();131132result.name.basename = GetTextForRange(maybe_name->basename_range);133result.name.context = GetTextForRange(maybe_name->context_range);134result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));135result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));136start_position.Remove();137return result;138}139140std::optional<ParsedFunction>141CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {142// This function parses a function definition143// that returns a pointer type.144// E.g., double (*(*func(long))(int))(float)145146// Step 1:147// Remove the return type of the innermost148// function pointer type.149//150// Leaves us with:151// (*(*func(long))(int))(float)152Bookmark start_position = SetBookmark();153if (expect_return_type) {154// Consume return type.155if (!ConsumeTypename())156return std::nullopt;157}158159// Step 2:160//161// Skip a pointer and parenthesis pair.162//163// Leaves us with:164// (*func(long))(int))(float)165if (!ConsumeToken(tok::l_paren))166return std::nullopt;167if (!ConsumePtrsAndRefs())168return std::nullopt;169170// Step 3:171//172// Consume inner function name. This will fail unless173// we stripped all the pointers on the left hand side174// of the function name.175{176Bookmark before_inner_function_pos = SetBookmark();177auto maybe_inner_function_name = ParseFunctionImpl(false);178if (maybe_inner_function_name)179if (ConsumeToken(tok::r_paren))180if (ConsumeArguments()) {181SkipFunctionQualifiers();182start_position.Remove();183before_inner_function_pos.Remove();184return maybe_inner_function_name;185}186}187188// Step 4:189//190// Parse the remaining string as a function pointer again.191// This time don't consume the inner-most typename since192// we're left with pointers only. This will strip another193// layer of pointers until we're left with the innermost194// function name/argument. I.e., func(long))(int))(float)195//196// Once we successfully stripped all pointers and gotten197// the innermost function name from ParseFunctionImpl above,198// we consume a single ')' and the arguments '(...)' that follows.199//200// Leaves us with:201// )(float)202//203// This is the remnant of the outer function pointers' arguments.204// Unwinding the recursive calls will remove the remaining205// arguments.206auto maybe_inner_function_ptr_name = ParseFuncPtr(false);207if (maybe_inner_function_ptr_name)208if (ConsumeToken(tok::r_paren))209if (ConsumeArguments()) {210SkipFunctionQualifiers();211start_position.Remove();212return maybe_inner_function_ptr_name;213}214215return std::nullopt;216}217218bool CPlusPlusNameParser::ConsumeArguments() {219return ConsumeBrackets(tok::l_paren, tok::r_paren);220}221222bool CPlusPlusNameParser::ConsumeTemplateArgs() {223Bookmark start_position = SetBookmark();224if (!HasMoreTokens() || Peek().getKind() != tok::less)225return false;226Advance();227228// Consuming template arguments is a bit trickier than consuming function229// arguments, because '<' '>' brackets are not always trivially balanced. In230// some rare cases tokens '<' and '>' can appear inside template arguments as231// arithmetic or shift operators not as template brackets. Examples:232// std::enable_if<(10u)<(64), bool>233// f<A<operator<(X,Y)::Subclass>>234// Good thing that compiler makes sure that really ambiguous cases of '>'235// usage should be enclosed within '()' brackets.236int template_counter = 1;237bool can_open_template = false;238while (HasMoreTokens() && template_counter > 0) {239tok::TokenKind kind = Peek().getKind();240switch (kind) {241case tok::greatergreater:242template_counter -= 2;243can_open_template = false;244Advance();245break;246case tok::greater:247--template_counter;248can_open_template = false;249Advance();250break;251case tok::less:252// '<' is an attempt to open a subteamplte253// check if parser is at the point where it's actually possible,254// otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No255// need to do the same for '>' because compiler actually makes sure that256// '>' always surrounded by brackets to avoid ambiguity.257if (can_open_template)258++template_counter;259can_open_template = false;260Advance();261break;262case tok::kw_operator: // C++ operator overloading.263if (!ConsumeOperator())264return false;265can_open_template = true;266break;267case tok::raw_identifier:268can_open_template = true;269Advance();270break;271case tok::l_square:272// Handle templates tagged with an ABI tag.273// An example demangled/prettified version is:274// func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)275if (ConsumeAbiTag())276can_open_template = true;277else if (ConsumeBrackets(tok::l_square, tok::r_square))278can_open_template = false;279else280return false;281break;282case tok::l_paren:283if (!ConsumeArguments())284return false;285can_open_template = false;286break;287default:288can_open_template = false;289Advance();290break;291}292}293294if (template_counter != 0) {295return false;296}297start_position.Remove();298return true;299}300301bool CPlusPlusNameParser::ConsumeAbiTag() {302Bookmark start_position = SetBookmark();303if (!ConsumeToken(tok::l_square))304return false;305306if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&307Peek().getRawIdentifier() == "abi")308Advance();309else310return false;311312if (!ConsumeToken(tok::colon))313return false;314315// Consume the actual tag string (and allow some special characters)316while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,317tok::numeric_constant))318;319320if (!ConsumeToken(tok::r_square))321return false;322323start_position.Remove();324return true;325}326327bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {328Bookmark start_position = SetBookmark();329if (!ConsumeToken(tok::l_paren)) {330return false;331}332constexpr llvm::StringLiteral g_anonymous("anonymous");333if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&334Peek().getRawIdentifier() == g_anonymous) {335Advance();336} else {337return false;338}339340if (!ConsumeToken(tok::kw_namespace)) {341return false;342}343344if (!ConsumeToken(tok::r_paren)) {345return false;346}347start_position.Remove();348return true;349}350351bool CPlusPlusNameParser::ConsumeLambda() {352Bookmark start_position = SetBookmark();353if (!ConsumeToken(tok::l_brace)) {354return false;355}356constexpr llvm::StringLiteral g_lambda("lambda");357if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&358Peek().getRawIdentifier() == g_lambda) {359// Put the matched brace back so we can use ConsumeBrackets360TakeBack();361} else {362return false;363}364365if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {366return false;367}368369start_position.Remove();370return true;371}372373bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,374tok::TokenKind right) {375Bookmark start_position = SetBookmark();376if (!HasMoreTokens() || Peek().getKind() != left)377return false;378Advance();379380int counter = 1;381while (HasMoreTokens() && counter > 0) {382tok::TokenKind kind = Peek().getKind();383if (kind == right)384--counter;385else if (kind == left)386++counter;387Advance();388}389390assert(counter >= 0);391if (counter > 0) {392return false;393}394start_position.Remove();395return true;396}397398bool CPlusPlusNameParser::ConsumeOperator() {399Bookmark start_position = SetBookmark();400if (!ConsumeToken(tok::kw_operator))401return false;402403if (!HasMoreTokens()) {404return false;405}406407const auto &token = Peek();408409// When clang generates debug info it adds template parameters to names.410// Since clang doesn't add a space between the name and the template parameter411// in some cases we are not generating valid C++ names e.g.:412//413// operator<<A::B>414//415// In some of these cases we will not parse them correctly. This fixes the416// issue by detecting this case and inserting tok::less in place of417// tok::lessless and returning successfully that we consumed the operator.418if (token.getKind() == tok::lessless) {419// Make sure we have more tokens before attempting to look ahead one more.420if (m_next_token_index + 1 < m_tokens.size()) {421// Look ahead two tokens.422clang::Token n_token = m_tokens[m_next_token_index + 1];423// If we find ( or < then this is indeed operator<< no need for fix.424if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {425clang::Token tmp_tok;426tmp_tok.startToken();427tmp_tok.setLength(1);428tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));429tmp_tok.setKind(tok::less);430431m_tokens[m_next_token_index] = tmp_tok;432433start_position.Remove();434return true;435}436}437}438439switch (token.getKind()) {440case tok::kw_new:441case tok::kw_delete:442// This is 'new' or 'delete' operators.443Advance();444// Check for array new/delete.445if (HasMoreTokens() && Peek().is(tok::l_square)) {446// Consume the '[' and ']'.447if (!ConsumeBrackets(tok::l_square, tok::r_square))448return false;449}450break;451452#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \453case tok::Token: \454Advance(); \455break;456#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)457#include "clang/Basic/OperatorKinds.def"458#undef OVERLOADED_OPERATOR459#undef OVERLOADED_OPERATOR_MULTI460461case tok::l_paren:462// Call operator consume '(' ... ')'.463if (ConsumeBrackets(tok::l_paren, tok::r_paren))464break;465return false;466467case tok::l_square:468// This is a [] operator.469// Consume the '[' and ']'.470if (ConsumeBrackets(tok::l_square, tok::r_square))471break;472return false;473474default:475// This might be a cast operator.476if (ConsumeTypename())477break;478return false;479}480start_position.Remove();481return true;482}483484void CPlusPlusNameParser::SkipTypeQualifiers() {485while (ConsumeToken(tok::kw_const, tok::kw_volatile))486;487}488489void CPlusPlusNameParser::SkipFunctionQualifiers() {490while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))491;492}493494bool CPlusPlusNameParser::ConsumeBuiltinType() {495bool result = false;496bool continue_parsing = true;497// Built-in types can be made of a few keywords like 'unsigned long long498// int'. This function consumes all built-in type keywords without checking499// if they make sense like 'unsigned char void'.500while (continue_parsing && HasMoreTokens()) {501switch (Peek().getKind()) {502case tok::kw_short:503case tok::kw_long:504case tok::kw___int64:505case tok::kw___int128:506case tok::kw_signed:507case tok::kw_unsigned:508case tok::kw_void:509case tok::kw_char:510case tok::kw_int:511case tok::kw_half:512case tok::kw_float:513case tok::kw_double:514case tok::kw___float128:515case tok::kw_wchar_t:516case tok::kw_bool:517case tok::kw_char16_t:518case tok::kw_char32_t:519result = true;520Advance();521break;522default:523continue_parsing = false;524break;525}526}527return result;528}529530void CPlusPlusNameParser::SkipPtrsAndRefs() {531// Ignoring result.532ConsumePtrsAndRefs();533}534535bool CPlusPlusNameParser::ConsumePtrsAndRefs() {536bool found = false;537SkipTypeQualifiers();538while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,539tok::kw_volatile)) {540found = true;541SkipTypeQualifiers();542}543return found;544}545546bool CPlusPlusNameParser::ConsumeDecltype() {547Bookmark start_position = SetBookmark();548if (!ConsumeToken(tok::kw_decltype))549return false;550551if (!ConsumeArguments())552return false;553554start_position.Remove();555return true;556}557558bool CPlusPlusNameParser::ConsumeTypename() {559Bookmark start_position = SetBookmark();560SkipTypeQualifiers();561if (!ConsumeBuiltinType() && !ConsumeDecltype()) {562if (!ParseFullNameImpl())563return false;564}565SkipPtrsAndRefs();566start_position.Remove();567return true;568}569570std::optional<CPlusPlusNameParser::ParsedNameRanges>571CPlusPlusNameParser::ParseFullNameImpl() {572// Name parsing state machine.573enum class State {574Beginning, // start of the name575AfterTwoColons, // right after ::576AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)577AfterTemplate, // right after template brackets (<something>)578AfterOperator, // right after name of C++ operator579};580581Bookmark start_position = SetBookmark();582State state = State::Beginning;583bool continue_parsing = true;584std::optional<size_t> last_coloncolon_position;585586while (continue_parsing && HasMoreTokens()) {587const auto &token = Peek();588switch (token.getKind()) {589case tok::raw_identifier: // Just a name.590if (state != State::Beginning && state != State::AfterTwoColons) {591continue_parsing = false;592break;593}594Advance();595state = State::AfterIdentifier;596break;597case tok::l_square: {598// Handles types or functions that were tagged599// with, e.g.,600// [[gnu::abi_tag("tag1","tag2")]] func()601// and demangled/prettified into:602// func[abi:tag1][abi:tag2]()603604// ABI tags only appear after a method or type name605const bool valid_state =606state == State::AfterIdentifier || state == State::AfterOperator;607if (!valid_state || !ConsumeAbiTag()) {608continue_parsing = false;609}610611break;612}613case tok::l_paren: {614if (state == State::Beginning || state == State::AfterTwoColons) {615// (anonymous namespace)616if (ConsumeAnonymousNamespace()) {617state = State::AfterIdentifier;618break;619}620}621622// Type declared inside a function 'func()::Type'623if (state != State::AfterIdentifier && state != State::AfterTemplate &&624state != State::AfterOperator) {625continue_parsing = false;626break;627}628Bookmark l_paren_position = SetBookmark();629// Consume the '(' ... ') [const]'.630if (!ConsumeArguments()) {631continue_parsing = false;632break;633}634SkipFunctionQualifiers();635636// Consume '::'637size_t coloncolon_position = GetCurrentPosition();638if (!ConsumeToken(tok::coloncolon)) {639continue_parsing = false;640break;641}642l_paren_position.Remove();643last_coloncolon_position = coloncolon_position;644state = State::AfterTwoColons;645break;646}647case tok::l_brace:648if (state == State::Beginning || state == State::AfterTwoColons) {649if (ConsumeLambda()) {650state = State::AfterIdentifier;651break;652}653}654continue_parsing = false;655break;656case tok::coloncolon: // Type nesting delimiter.657if (state != State::Beginning && state != State::AfterIdentifier &&658state != State::AfterTemplate) {659continue_parsing = false;660break;661}662last_coloncolon_position = GetCurrentPosition();663Advance();664state = State::AfterTwoColons;665break;666case tok::less: // Template brackets.667if (state != State::AfterIdentifier && state != State::AfterOperator) {668continue_parsing = false;669break;670}671if (!ConsumeTemplateArgs()) {672continue_parsing = false;673break;674}675state = State::AfterTemplate;676break;677case tok::kw_operator: // C++ operator overloading.678if (state != State::Beginning && state != State::AfterTwoColons) {679continue_parsing = false;680break;681}682if (!ConsumeOperator()) {683continue_parsing = false;684break;685}686state = State::AfterOperator;687break;688case tok::tilde: // Destructor.689if (state != State::Beginning && state != State::AfterTwoColons) {690continue_parsing = false;691break;692}693Advance();694if (ConsumeToken(tok::raw_identifier)) {695state = State::AfterIdentifier;696} else {697TakeBack();698continue_parsing = false;699}700break;701default:702continue_parsing = false;703break;704}705}706707if (state == State::AfterIdentifier || state == State::AfterOperator ||708state == State::AfterTemplate) {709ParsedNameRanges result;710if (last_coloncolon_position) {711result.context_range =712Range(start_position.GetSavedPosition(), *last_coloncolon_position);713result.basename_range =714Range(*last_coloncolon_position + 1, GetCurrentPosition());715} else {716result.basename_range =717Range(start_position.GetSavedPosition(), GetCurrentPosition());718}719start_position.Remove();720return result;721} else {722return std::nullopt;723}724}725726llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {727if (range.empty())728return llvm::StringRef();729assert(range.begin_index < range.end_index);730assert(range.begin_index < m_tokens.size());731assert(range.end_index <= m_tokens.size());732clang::Token &first_token = m_tokens[range.begin_index];733clang::Token &last_token = m_tokens[range.end_index - 1];734clang::SourceLocation start_loc = first_token.getLocation();735clang::SourceLocation end_loc = last_token.getLocation();736unsigned start_pos = start_loc.getRawEncoding();737unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();738return m_text.take_front(end_pos).drop_front(start_pos);739}740741static const clang::LangOptions &GetLangOptions() {742static clang::LangOptions g_options;743static llvm::once_flag g_once_flag;744llvm::call_once(g_once_flag, []() {745g_options.LineComment = true;746g_options.C99 = true;747g_options.C11 = true;748g_options.CPlusPlus = true;749g_options.CPlusPlus11 = true;750g_options.CPlusPlus14 = true;751g_options.CPlusPlus17 = true;752g_options.CPlusPlus20 = true;753});754return g_options;755}756757static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {758static llvm::StringMap<tok::TokenKind> g_map{759#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},760#include "clang/Basic/TokenKinds.def"761#undef KEYWORD762};763return g_map;764}765766void CPlusPlusNameParser::ExtractTokens() {767if (m_text.empty())768return;769clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),770m_text.data(), m_text.data() + m_text.size());771const auto &kw_map = GetKeywordsMap();772clang::Token token;773for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);774lexer.LexFromRawLexer(token)) {775if (token.is(clang::tok::raw_identifier)) {776auto it = kw_map.find(token.getRawIdentifier());777if (it != kw_map.end()) {778token.setKind(it->getValue());779}780}781782m_tokens.push_back(token);783}784}785786787