Path: blob/main/contrib/llvm-project/clang/lib/Tooling/Syntax/BuildTree.cpp
35271 views
//===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7#include "clang/Tooling/Syntax/BuildTree.h"8#include "clang/AST/ASTFwd.h"9#include "clang/AST/Decl.h"10#include "clang/AST/DeclBase.h"11#include "clang/AST/DeclCXX.h"12#include "clang/AST/DeclarationName.h"13#include "clang/AST/Expr.h"14#include "clang/AST/ExprCXX.h"15#include "clang/AST/IgnoreExpr.h"16#include "clang/AST/OperationKinds.h"17#include "clang/AST/RecursiveASTVisitor.h"18#include "clang/AST/Stmt.h"19#include "clang/AST/TypeLoc.h"20#include "clang/AST/TypeLocVisitor.h"21#include "clang/Basic/LLVM.h"22#include "clang/Basic/SourceLocation.h"23#include "clang/Basic/SourceManager.h"24#include "clang/Basic/Specifiers.h"25#include "clang/Basic/TokenKinds.h"26#include "clang/Lex/Lexer.h"27#include "clang/Lex/LiteralSupport.h"28#include "clang/Tooling/Syntax/Nodes.h"29#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"30#include "clang/Tooling/Syntax/Tokens.h"31#include "clang/Tooling/Syntax/Tree.h"32#include "llvm/ADT/ArrayRef.h"33#include "llvm/ADT/DenseMap.h"34#include "llvm/ADT/PointerUnion.h"35#include "llvm/ADT/STLExtras.h"36#include "llvm/ADT/ScopeExit.h"37#include "llvm/ADT/SmallVector.h"38#include "llvm/Support/Allocator.h"39#include "llvm/Support/Casting.h"40#include "llvm/Support/Compiler.h"41#include "llvm/Support/FormatVariadic.h"42#include "llvm/Support/MemoryBuffer.h"43#include "llvm/Support/raw_ostream.h"44#include <cstddef>45#include <map>4647using namespace clang;4849// Ignores the implicit `CXXConstructExpr` for copy/move constructor calls50// generated by the compiler, as well as in implicit conversions like the one51// wrapping `1` in `X x = 1;`.52static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) {53if (auto *C = dyn_cast<CXXConstructExpr>(E)) {54auto NumArgs = C->getNumArgs();55if (NumArgs == 1 || (NumArgs > 1 && isa<CXXDefaultArgExpr>(C->getArg(1)))) {56Expr *A = C->getArg(0);57if (C->getParenOrBraceRange().isInvalid())58return A;59}60}61return E;62}6364// In:65// struct X {66// X(int)67// };68// X x = X(1);69// Ignores the implicit `CXXFunctionalCastExpr` that wraps70// `CXXConstructExpr X(1)`.71static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) {72if (auto *F = dyn_cast<CXXFunctionalCastExpr>(E)) {73if (F->getCastKind() == CK_ConstructorConversion)74return F->getSubExpr();75}76return E;77}7879static Expr *IgnoreImplicit(Expr *E) {80return IgnoreExprNodes(E, IgnoreImplicitSingleStep,81IgnoreImplicitConstructorSingleStep,82IgnoreCXXFunctionalCastExprWrappingConstructor);83}8485LLVM_ATTRIBUTE_UNUSED86static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; }8788namespace {89/// Get start location of the Declarator from the TypeLoc.90/// E.g.:91/// loc of `(` in `int (a)`92/// loc of `*` in `int *(a)`93/// loc of the first `(` in `int (*a)(int)`94/// loc of the `*` in `int *(a)(int)`95/// loc of the first `*` in `const int *const *volatile a;`96///97/// It is non-trivial to get the start location because TypeLocs are stored98/// inside out. In the example above `*volatile` is the TypeLoc returned99/// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()`100/// returns.101struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> {102SourceLocation VisitParenTypeLoc(ParenTypeLoc T) {103auto L = Visit(T.getInnerLoc());104if (L.isValid())105return L;106return T.getLParenLoc();107}108109// Types spelled in the prefix part of the declarator.110SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) {111return HandlePointer(T);112}113114SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) {115return HandlePointer(T);116}117118SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) {119return HandlePointer(T);120}121122SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) {123return HandlePointer(T);124}125126SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) {127return HandlePointer(T);128}129130// All other cases are not important, as they are either part of declaration131// specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on132// existing declarators (e.g. QualifiedTypeLoc). They cannot start the133// declarator themselves, but their underlying type can.134SourceLocation VisitTypeLoc(TypeLoc T) {135auto N = T.getNextTypeLoc();136if (!N)137return SourceLocation();138return Visit(N);139}140141SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) {142if (T.getTypePtr()->hasTrailingReturn())143return SourceLocation(); // avoid recursing into the suffix of declarator.144return VisitTypeLoc(T);145}146147private:148template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) {149auto L = Visit(T.getPointeeLoc());150if (L.isValid())151return L;152return T.getLocalSourceRange().getBegin();153}154};155} // namespace156157static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) {158auto FirstDefaultArg =159llvm::find_if(Args, [](auto It) { return isa<CXXDefaultArgExpr>(It); });160return llvm::make_range(Args.begin(), FirstDefaultArg);161}162163static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) {164switch (E.getOperator()) {165// Comparison166case OO_EqualEqual:167case OO_ExclaimEqual:168case OO_Greater:169case OO_GreaterEqual:170case OO_Less:171case OO_LessEqual:172case OO_Spaceship:173// Assignment174case OO_Equal:175case OO_SlashEqual:176case OO_PercentEqual:177case OO_CaretEqual:178case OO_PipeEqual:179case OO_LessLessEqual:180case OO_GreaterGreaterEqual:181case OO_PlusEqual:182case OO_MinusEqual:183case OO_StarEqual:184case OO_AmpEqual:185// Binary computation186case OO_Slash:187case OO_Percent:188case OO_Caret:189case OO_Pipe:190case OO_LessLess:191case OO_GreaterGreater:192case OO_AmpAmp:193case OO_PipePipe:194case OO_ArrowStar:195case OO_Comma:196return syntax::NodeKind::BinaryOperatorExpression;197case OO_Tilde:198case OO_Exclaim:199return syntax::NodeKind::PrefixUnaryOperatorExpression;200// Prefix/Postfix increment/decrement201case OO_PlusPlus:202case OO_MinusMinus:203switch (E.getNumArgs()) {204case 1:205return syntax::NodeKind::PrefixUnaryOperatorExpression;206case 2:207return syntax::NodeKind::PostfixUnaryOperatorExpression;208default:209llvm_unreachable("Invalid number of arguments for operator");210}211// Operators that can be unary or binary212case OO_Plus:213case OO_Minus:214case OO_Star:215case OO_Amp:216switch (E.getNumArgs()) {217case 1:218return syntax::NodeKind::PrefixUnaryOperatorExpression;219case 2:220return syntax::NodeKind::BinaryOperatorExpression;221default:222llvm_unreachable("Invalid number of arguments for operator");223}224return syntax::NodeKind::BinaryOperatorExpression;225// Not yet supported by SyntaxTree226case OO_New:227case OO_Delete:228case OO_Array_New:229case OO_Array_Delete:230case OO_Coawait:231case OO_Subscript:232case OO_Arrow:233return syntax::NodeKind::UnknownExpression;234case OO_Call:235return syntax::NodeKind::CallExpression;236case OO_Conditional: // not overloadable237case NUM_OVERLOADED_OPERATORS:238case OO_None:239llvm_unreachable("Not an overloadable operator");240}241llvm_unreachable("Unknown OverloadedOperatorKind enum");242}243244/// Get the start of the qualified name. In the examples below it gives the245/// location of the `^`:246/// `int ^a;`247/// `int *^a;`248/// `int ^a::S::f(){}`249static SourceLocation getQualifiedNameStart(NamedDecl *D) {250assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) &&251"only DeclaratorDecl and TypedefNameDecl are supported.");252253auto DN = D->getDeclName();254bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo();255if (IsAnonymous)256return SourceLocation();257258if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) {259if (DD->getQualifierLoc()) {260return DD->getQualifierLoc().getBeginLoc();261}262}263264return D->getLocation();265}266267/// Gets the range of the initializer inside an init-declarator C++ [dcl.decl].268/// `int a;` -> range of ``,269/// `int *a = nullptr` -> range of `= nullptr`.270/// `int a{}` -> range of `{}`.271/// `int a()` -> range of `()`.272static SourceRange getInitializerRange(Decl *D) {273if (auto *V = dyn_cast<VarDecl>(D)) {274auto *I = V->getInit();275// Initializers in range-based-for are not part of the declarator276if (I && !V->isCXXForRangeDecl())277return I->getSourceRange();278}279280return SourceRange();281}282283/// Gets the range of declarator as defined by the C++ grammar. E.g.284/// `int a;` -> range of `a`,285/// `int *a;` -> range of `*a`,286/// `int a[10];` -> range of `a[10]`,287/// `int a[1][2][3];` -> range of `a[1][2][3]`,288/// `int *a = nullptr` -> range of `*a = nullptr`.289/// `int S::f(){}` -> range of `S::f()`.290/// FIXME: \p Name must be a source range.291static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T,292SourceLocation Name,293SourceRange Initializer) {294SourceLocation Start = GetStartLoc().Visit(T);295SourceLocation End = T.getEndLoc();296if (Name.isValid()) {297if (Start.isInvalid())298Start = Name;299// End of TypeLoc could be invalid if the type is invalid, fallback to the300// NameLoc.301if (End.isInvalid() || SM.isBeforeInTranslationUnit(End, Name))302End = Name;303}304if (Initializer.isValid()) {305auto InitializerEnd = Initializer.getEnd();306assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) ||307End == InitializerEnd);308End = InitializerEnd;309}310return SourceRange(Start, End);311}312313namespace {314/// All AST hierarchy roots that can be represented as pointers.315using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>;316/// Maintains a mapping from AST to syntax tree nodes. This class will get more317/// complicated as we support more kinds of AST nodes, e.g. TypeLocs.318/// FIXME: expose this as public API.319class ASTToSyntaxMapping {320public:321void add(ASTPtr From, syntax::Tree *To) {322assert(To != nullptr);323assert(!From.isNull());324325bool Added = Nodes.insert({From, To}).second;326(void)Added;327assert(Added && "mapping added twice");328}329330void add(NestedNameSpecifierLoc From, syntax::Tree *To) {331assert(To != nullptr);332assert(From.hasQualifier());333334bool Added = NNSNodes.insert({From, To}).second;335(void)Added;336assert(Added && "mapping added twice");337}338339syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); }340341syntax::Tree *find(NestedNameSpecifierLoc P) const {342return NNSNodes.lookup(P);343}344345private:346llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes;347llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes;348};349} // namespace350351/// A helper class for constructing the syntax tree while traversing a clang352/// AST.353///354/// At each point of the traversal we maintain a list of pending nodes.355/// Initially all tokens are added as pending nodes. When processing a clang AST356/// node, the clients need to:357/// - create a corresponding syntax node,358/// - assign roles to all pending child nodes with 'markChild' and359/// 'markChildToken',360/// - replace the child nodes with the new syntax node in the pending list361/// with 'foldNode'.362///363/// Note that all children are expected to be processed when building a node.364///365/// Call finalize() to finish building the tree and consume the root node.366class syntax::TreeBuilder {367public:368TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM)369: Arena(Arena),370TBTM(TBTM),371Pending(Arena, TBTM.tokenBuffer()) {372for (const auto &T : TBTM.tokenBuffer().expandedTokens())373LocationToToken.insert({T.location(), &T});374}375376llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); }377const SourceManager &sourceManager() const {378return TBTM.sourceManager();379}380381/// Populate children for \p New node, assuming it covers tokens from \p382/// Range.383void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) {384assert(New);385Pending.foldChildren(TBTM.tokenBuffer(), Range, New);386if (From)387Mapping.add(From, New);388}389390void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) {391// FIXME: add mapping for TypeLocs392foldNode(Range, New, nullptr);393}394395void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,396NestedNameSpecifierLoc From) {397assert(New);398Pending.foldChildren(TBTM.tokenBuffer(), Range, New);399if (From)400Mapping.add(From, New);401}402403/// Populate children for \p New list, assuming it covers tokens from a404/// subrange of \p SuperRange.405void foldList(ArrayRef<syntax::Token> SuperRange, syntax::List *New,406ASTPtr From) {407assert(New);408auto ListRange = Pending.shrinkToFitList(SuperRange);409Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New);410if (From)411Mapping.add(From, New);412}413414/// Notifies that we should not consume trailing semicolon when computing415/// token range of \p D.416void noticeDeclWithoutSemicolon(Decl *D);417418/// Mark the \p Child node with a corresponding \p Role. All marked children419/// should be consumed by foldNode.420/// When called on expressions (clang::Expr is derived from clang::Stmt),421/// wraps expressions into expression statement.422void markStmtChild(Stmt *Child, NodeRole Role);423/// Should be called for expressions in non-statement position to avoid424/// wrapping into expression statement.425void markExprChild(Expr *Child, NodeRole Role);426/// Set role for a token starting at \p Loc.427void markChildToken(SourceLocation Loc, NodeRole R);428/// Set role for \p T.429void markChildToken(const syntax::Token *T, NodeRole R);430431/// Set role for \p N.432void markChild(syntax::Node *N, NodeRole R);433/// Set role for the syntax node matching \p N.434void markChild(ASTPtr N, NodeRole R);435/// Set role for the syntax node matching \p N.436void markChild(NestedNameSpecifierLoc N, NodeRole R);437438/// Finish building the tree and consume the root node.439syntax::TranslationUnit *finalize() && {440auto Tokens = TBTM.tokenBuffer().expandedTokens();441assert(!Tokens.empty());442assert(Tokens.back().kind() == tok::eof);443444// Build the root of the tree, consuming all the children.445Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(),446new (Arena.getAllocator()) syntax::TranslationUnit);447448auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());449TU->assertInvariantsRecursive();450return TU;451}452453/// Finds a token starting at \p L. The token must exist if \p L is valid.454const syntax::Token *findToken(SourceLocation L) const;455456/// Finds the syntax tokens corresponding to the \p SourceRange.457ArrayRef<syntax::Token> getRange(SourceRange Range) const {458assert(Range.isValid());459return getRange(Range.getBegin(), Range.getEnd());460}461462/// Finds the syntax tokens corresponding to the passed source locations.463/// \p First is the start position of the first token and \p Last is the start464/// position of the last token.465ArrayRef<syntax::Token> getRange(SourceLocation First,466SourceLocation Last) const {467assert(First.isValid());468assert(Last.isValid());469assert(First == Last ||470TBTM.sourceManager().isBeforeInTranslationUnit(First, Last));471return llvm::ArrayRef(findToken(First), std::next(findToken(Last)));472}473474ArrayRef<syntax::Token>475getTemplateRange(const ClassTemplateSpecializationDecl *D) const {476auto Tokens = getRange(D->getSourceRange());477return maybeAppendSemicolon(Tokens, D);478}479480/// Returns true if \p D is the last declarator in a chain and is thus481/// reponsible for creating SimpleDeclaration for the whole chain.482bool isResponsibleForCreatingDeclaration(const Decl *D) const {483assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) &&484"only DeclaratorDecl and TypedefNameDecl are supported.");485486const Decl *Next = D->getNextDeclInContext();487488// There's no next sibling, this one is responsible.489if (Next == nullptr) {490return true;491}492493// Next sibling is not the same type, this one is responsible.494if (D->getKind() != Next->getKind()) {495return true;496}497// Next sibling doesn't begin at the same loc, it must be a different498// declaration, so this declarator is responsible.499if (Next->getBeginLoc() != D->getBeginLoc()) {500return true;501}502503// NextT is a member of the same declaration, and we need the last member to504// create declaration. This one is not responsible.505return false;506}507508ArrayRef<syntax::Token> getDeclarationRange(Decl *D) {509ArrayRef<syntax::Token> Tokens;510// We want to drop the template parameters for specializations.511if (const auto *S = dyn_cast<TagDecl>(D))512Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc());513else514Tokens = getRange(D->getSourceRange());515return maybeAppendSemicolon(Tokens, D);516}517518ArrayRef<syntax::Token> getExprRange(const Expr *E) const {519return getRange(E->getSourceRange());520}521522/// Find the adjusted range for the statement, consuming the trailing523/// semicolon when needed.524ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const {525auto Tokens = getRange(S->getSourceRange());526if (isa<CompoundStmt>(S))527return Tokens;528529// Some statements miss a trailing semicolon, e.g. 'return', 'continue' and530// all statements that end with those. Consume this semicolon here.531if (Tokens.back().kind() == tok::semi)532return Tokens;533return withTrailingSemicolon(Tokens);534}535536private:537ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens,538const Decl *D) const {539if (isa<NamespaceDecl>(D))540return Tokens;541if (DeclsWithoutSemicolons.count(D))542return Tokens;543// FIXME: do not consume trailing semicolon on function definitions.544// Most declarations own a semicolon in syntax trees, but not in clang AST.545return withTrailingSemicolon(Tokens);546}547548ArrayRef<syntax::Token>549withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const {550assert(!Tokens.empty());551assert(Tokens.back().kind() != tok::eof);552// We never consume 'eof', so looking at the next token is ok.553if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)554return llvm::ArrayRef(Tokens.begin(), Tokens.end() + 1);555return Tokens;556}557558void setRole(syntax::Node *N, NodeRole R) {559assert(N->getRole() == NodeRole::Detached);560N->setRole(R);561}562563/// A collection of trees covering the input tokens.564/// When created, each tree corresponds to a single token in the file.565/// Clients call 'foldChildren' to attach one or more subtrees to a parent566/// node and update the list of trees accordingly.567///568/// Ensures that added nodes properly nest and cover the whole token stream.569struct Forest {570Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) {571assert(!TB.expandedTokens().empty());572assert(TB.expandedTokens().back().kind() == tok::eof);573// Create all leaf nodes.574// Note that we do not have 'eof' in the tree.575for (const auto &T : TB.expandedTokens().drop_back()) {576auto *L = new (A.getAllocator())577syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T));578L->Original = true;579L->CanModify = TB.spelledForExpanded(T).has_value();580Trees.insert(Trees.end(), {&T, L});581}582}583584void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) {585assert(!Range.empty());586auto It = Trees.lower_bound(Range.begin());587assert(It != Trees.end() && "no node found");588assert(It->first == Range.begin() && "no child with the specified range");589assert((std::next(It) == Trees.end() ||590std::next(It)->first == Range.end()) &&591"no child with the specified range");592assert(It->second->getRole() == NodeRole::Detached &&593"re-assigning role for a child");594It->second->setRole(Role);595}596597/// Shrink \p Range to a subrange that only contains tokens of a list.598/// List elements and delimiters should already have correct roles.599ArrayRef<syntax::Token> shrinkToFitList(ArrayRef<syntax::Token> Range) {600auto BeginChildren = Trees.lower_bound(Range.begin());601assert((BeginChildren == Trees.end() ||602BeginChildren->first == Range.begin()) &&603"Range crosses boundaries of existing subtrees");604605auto EndChildren = Trees.lower_bound(Range.end());606assert(607(EndChildren == Trees.end() || EndChildren->first == Range.end()) &&608"Range crosses boundaries of existing subtrees");609610auto BelongsToList = [](decltype(Trees)::value_type KV) {611auto Role = KV.second->getRole();612return Role == syntax::NodeRole::ListElement ||613Role == syntax::NodeRole::ListDelimiter;614};615616auto BeginListChildren =617std::find_if(BeginChildren, EndChildren, BelongsToList);618619auto EndListChildren =620std::find_if_not(BeginListChildren, EndChildren, BelongsToList);621622return ArrayRef<syntax::Token>(BeginListChildren->first,623EndListChildren->first);624}625626/// Add \p Node to the forest and attach child nodes based on \p Tokens.627void foldChildren(const syntax::TokenBuffer &TB,628ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) {629// Attach children to `Node`.630assert(Node->getFirstChild() == nullptr && "node already has children");631632auto *FirstToken = Tokens.begin();633auto BeginChildren = Trees.lower_bound(FirstToken);634635assert((BeginChildren == Trees.end() ||636BeginChildren->first == FirstToken) &&637"fold crosses boundaries of existing subtrees");638auto EndChildren = Trees.lower_bound(Tokens.end());639assert(640(EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&641"fold crosses boundaries of existing subtrees");642643for (auto It = BeginChildren; It != EndChildren; ++It) {644auto *C = It->second;645if (C->getRole() == NodeRole::Detached)646C->setRole(NodeRole::Unknown);647Node->appendChildLowLevel(C);648}649650// Mark that this node came from the AST and is backed by the source code.651Node->Original = true;652Node->CanModify =653TB.spelledForExpanded(Tokens).has_value();654655Trees.erase(BeginChildren, EndChildren);656Trees.insert({FirstToken, Node});657}658659// EXPECTS: all tokens were consumed and are owned by a single root node.660syntax::Node *finalize() && {661assert(Trees.size() == 1);662auto *Root = Trees.begin()->second;663Trees = {};664return Root;665}666667std::string str(const syntax::TokenBufferTokenManager &STM) const {668std::string R;669for (auto It = Trees.begin(); It != Trees.end(); ++It) {670unsigned CoveredTokens =671It != Trees.end()672? (std::next(It)->first - It->first)673: STM.tokenBuffer().expandedTokens().end() - It->first;674675R += std::string(676formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(),677It->first->text(STM.sourceManager()), CoveredTokens));678R += It->second->dump(STM);679}680return R;681}682683private:684/// Maps from the start token to a subtree starting at that token.685/// Keys in the map are pointers into the array of expanded tokens, so686/// pointer order corresponds to the order of preprocessor tokens.687std::map<const syntax::Token *, syntax::Node *> Trees;688};689690/// For debugging purposes.691std::string str() { return Pending.str(TBTM); }692693syntax::Arena &Arena;694TokenBufferTokenManager& TBTM;695/// To quickly find tokens by their start location.696llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken;697Forest Pending;698llvm::DenseSet<Decl *> DeclsWithoutSemicolons;699ASTToSyntaxMapping Mapping;700};701702namespace {703class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {704public:705explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder)706: Builder(Builder), Context(Context) {}707708bool shouldTraversePostOrder() const { return true; }709710bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) {711return processDeclaratorAndDeclaration(DD);712}713714bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) {715return processDeclaratorAndDeclaration(TD);716}717718bool VisitDecl(Decl *D) {719assert(!D->isImplicit());720Builder.foldNode(Builder.getDeclarationRange(D),721new (allocator()) syntax::UnknownDeclaration(), D);722return true;723}724725// RAV does not call WalkUpFrom* on explicit instantiations, so we have to726// override Traverse.727// FIXME: make RAV call WalkUpFrom* instead.728bool729TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) {730if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C))731return false;732if (C->isExplicitSpecialization())733return true; // we are only interested in explicit instantiations.734auto *Declaration =735cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C));736foldExplicitTemplateInstantiation(737Builder.getTemplateRange(C),738Builder.findToken(C->getExternKeywordLoc()),739Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C);740return true;741}742743bool WalkUpFromTemplateDecl(TemplateDecl *S) {744foldTemplateDeclaration(745Builder.getDeclarationRange(S),746Builder.findToken(S->getTemplateParameters()->getTemplateLoc()),747Builder.getDeclarationRange(S->getTemplatedDecl()), S);748return true;749}750751bool WalkUpFromTagDecl(TagDecl *C) {752// FIXME: build the ClassSpecifier node.753if (!C->isFreeStanding()) {754assert(C->getNumTemplateParameterLists() == 0);755return true;756}757handleFreeStandingTagDecl(C);758return true;759}760761syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) {762assert(C->isFreeStanding());763// Class is a declaration specifier and needs a spanning declaration node.764auto DeclarationRange = Builder.getDeclarationRange(C);765syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration;766Builder.foldNode(DeclarationRange, Result, nullptr);767768// Build TemplateDeclaration nodes if we had template parameters.769auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) {770const auto *TemplateKW = Builder.findToken(L.getTemplateLoc());771auto R = llvm::ArrayRef(TemplateKW, DeclarationRange.end());772Result =773foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr);774DeclarationRange = R;775};776if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C))777ConsumeTemplateParameters(*S->getTemplateParameters());778for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I)779ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1));780return Result;781}782783bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {784// We do not want to call VisitDecl(), the declaration for translation785// unit is built by finalize().786return true;787}788789bool WalkUpFromCompoundStmt(CompoundStmt *S) {790using NodeRole = syntax::NodeRole;791792Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen);793for (auto *Child : S->body())794Builder.markStmtChild(Child, NodeRole::Statement);795Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen);796797Builder.foldNode(Builder.getStmtRange(S),798new (allocator()) syntax::CompoundStatement, S);799return true;800}801802// Some statements are not yet handled by syntax trees.803bool WalkUpFromStmt(Stmt *S) {804Builder.foldNode(Builder.getStmtRange(S),805new (allocator()) syntax::UnknownStatement, S);806return true;807}808809bool TraverseIfStmt(IfStmt *S) {810bool Result = [&, this]() {811if (S->getInit() && !TraverseStmt(S->getInit())) {812return false;813}814// In cases where the condition is an initialized declaration in a815// statement, we want to preserve the declaration and ignore the816// implicit condition expression in the syntax tree.817if (S->hasVarStorage()) {818if (!TraverseStmt(S->getConditionVariableDeclStmt()))819return false;820} else if (S->getCond() && !TraverseStmt(S->getCond()))821return false;822823if (S->getThen() && !TraverseStmt(S->getThen()))824return false;825if (S->getElse() && !TraverseStmt(S->getElse()))826return false;827return true;828}();829WalkUpFromIfStmt(S);830return Result;831}832833bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {834// We override to traverse range initializer as VarDecl.835// RAV traverses it as a statement, we produce invalid node kinds in that836// case.837// FIXME: should do this in RAV instead?838bool Result = [&, this]() {839if (S->getInit() && !TraverseStmt(S->getInit()))840return false;841if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable()))842return false;843if (S->getRangeInit() && !TraverseStmt(S->getRangeInit()))844return false;845if (S->getBody() && !TraverseStmt(S->getBody()))846return false;847return true;848}();849WalkUpFromCXXForRangeStmt(S);850return Result;851}852853bool TraverseStmt(Stmt *S) {854if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) {855// We want to consume the semicolon, make sure SimpleDeclaration does not.856for (auto *D : DS->decls())857Builder.noticeDeclWithoutSemicolon(D);858} else if (auto *E = dyn_cast_or_null<Expr>(S)) {859return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E));860}861return RecursiveASTVisitor::TraverseStmt(S);862}863864bool TraverseOpaqueValueExpr(OpaqueValueExpr *VE) {865// OpaqueValue doesn't correspond to concrete syntax, ignore it.866return true;867}868869// Some expressions are not yet handled by syntax trees.870bool WalkUpFromExpr(Expr *E) {871assert(!isImplicitExpr(E) && "should be handled by TraverseStmt");872Builder.foldNode(Builder.getExprRange(E),873new (allocator()) syntax::UnknownExpression, E);874return true;875}876877bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) {878// The semantic AST node `UserDefinedLiteral` (UDL) may have one child node879// referencing the location of the UDL suffix (`_w` in `1.2_w`). The880// UDL suffix location does not point to the beginning of a token, so we881// can't represent the UDL suffix as a separate syntax tree node.882883return WalkUpFromUserDefinedLiteral(S);884}885886syntax::UserDefinedLiteralExpression *887buildUserDefinedLiteral(UserDefinedLiteral *S) {888switch (S->getLiteralOperatorKind()) {889case UserDefinedLiteral::LOK_Integer:890return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;891case UserDefinedLiteral::LOK_Floating:892return new (allocator()) syntax::FloatUserDefinedLiteralExpression;893case UserDefinedLiteral::LOK_Character:894return new (allocator()) syntax::CharUserDefinedLiteralExpression;895case UserDefinedLiteral::LOK_String:896return new (allocator()) syntax::StringUserDefinedLiteralExpression;897case UserDefinedLiteral::LOK_Raw:898case UserDefinedLiteral::LOK_Template:899// For raw literal operator and numeric literal operator template we900// cannot get the type of the operand in the semantic AST. We get this901// information from the token. As integer and floating point have the same902// token kind, we run `NumericLiteralParser` again to distinguish them.903auto TokLoc = S->getBeginLoc();904auto TokSpelling =905Builder.findToken(TokLoc)->text(Context.getSourceManager());906auto Literal =907NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(),908Context.getLangOpts(), Context.getTargetInfo(),909Context.getDiagnostics());910if (Literal.isIntegerLiteral())911return new (allocator()) syntax::IntegerUserDefinedLiteralExpression;912else {913assert(Literal.isFloatingLiteral());914return new (allocator()) syntax::FloatUserDefinedLiteralExpression;915}916}917llvm_unreachable("Unknown literal operator kind.");918}919920bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {921Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);922Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S);923return true;924}925926// FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the927// `DependentTemplateSpecializationType` case.928/// Given a nested-name-specifier return the range for the last name929/// specifier.930///931/// e.g. `std::T::template X<U>::` => `template X<U>::`932SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) {933auto SR = NNSLoc.getLocalSourceRange();934935// The method `NestedNameSpecifierLoc::getLocalSourceRange` *should*936// return the desired `SourceRange`, but there is a corner case. For a937// `DependentTemplateSpecializationType` this method returns its938// qualifiers as well, in other words in the example above this method939// returns `T::template X<U>::` instead of only `template X<U>::`940if (auto TL = NNSLoc.getTypeLoc()) {941if (auto DependentTL =942TL.getAs<DependentTemplateSpecializationTypeLoc>()) {943// The 'template' keyword is always present in dependent template944// specializations. Except in the case of incorrect code945// TODO: Treat the case of incorrect code.946SR.setBegin(DependentTL.getTemplateKeywordLoc());947}948}949950return SR;951}952953syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) {954switch (NNS.getKind()) {955case NestedNameSpecifier::Global:956return syntax::NodeKind::GlobalNameSpecifier;957case NestedNameSpecifier::Namespace:958case NestedNameSpecifier::NamespaceAlias:959case NestedNameSpecifier::Identifier:960return syntax::NodeKind::IdentifierNameSpecifier;961case NestedNameSpecifier::TypeSpecWithTemplate:962return syntax::NodeKind::SimpleTemplateNameSpecifier;963case NestedNameSpecifier::TypeSpec: {964const auto *NNSType = NNS.getAsType();965assert(NNSType);966if (isa<DecltypeType>(NNSType))967return syntax::NodeKind::DecltypeNameSpecifier;968if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>(969NNSType))970return syntax::NodeKind::SimpleTemplateNameSpecifier;971return syntax::NodeKind::IdentifierNameSpecifier;972}973default:974// FIXME: Support Microsoft's __super975llvm::report_fatal_error("We don't yet support the __super specifier",976true);977}978}979980syntax::NameSpecifier *981buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) {982assert(NNSLoc.hasQualifier());983auto NameSpecifierTokens =984Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back();985switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) {986case syntax::NodeKind::GlobalNameSpecifier:987return new (allocator()) syntax::GlobalNameSpecifier;988case syntax::NodeKind::IdentifierNameSpecifier: {989assert(NameSpecifierTokens.size() == 1);990Builder.markChildToken(NameSpecifierTokens.begin(),991syntax::NodeRole::Unknown);992auto *NS = new (allocator()) syntax::IdentifierNameSpecifier;993Builder.foldNode(NameSpecifierTokens, NS, nullptr);994return NS;995}996case syntax::NodeKind::SimpleTemplateNameSpecifier: {997// TODO: Build `SimpleTemplateNameSpecifier` children and implement998// accessors to them.999// Be aware, we cannot do that simply by calling `TraverseTypeLoc`,1000// some `TypeLoc`s have inside them the previous name specifier and1001// we want to treat them independently.1002auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier;1003Builder.foldNode(NameSpecifierTokens, NS, nullptr);1004return NS;1005}1006case syntax::NodeKind::DecltypeNameSpecifier: {1007const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>();1008if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL))1009return nullptr;1010auto *NS = new (allocator()) syntax::DecltypeNameSpecifier;1011// TODO: Implement accessor to `DecltypeNameSpecifier` inner1012// `DecltypeTypeLoc`.1013// For that add mapping from `TypeLoc` to `syntax::Node*` then:1014// Builder.markChild(TypeLoc, syntax::NodeRole);1015Builder.foldNode(NameSpecifierTokens, NS, nullptr);1016return NS;1017}1018default:1019llvm_unreachable("getChildKind() does not return this value");1020}1021}10221023// To build syntax tree nodes for NestedNameSpecifierLoc we override1024// Traverse instead of WalkUpFrom because we want to traverse the children1025// ourselves and build a list instead of a nested tree of name specifier1026// prefixes.1027bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) {1028if (!QualifierLoc)1029return true;1030for (auto It = QualifierLoc; It; It = It.getPrefix()) {1031auto *NS = buildNameSpecifier(It);1032if (!NS)1033return false;1034Builder.markChild(NS, syntax::NodeRole::ListElement);1035Builder.markChildToken(It.getEndLoc(), syntax::NodeRole::ListDelimiter);1036}1037Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()),1038new (allocator()) syntax::NestedNameSpecifier,1039QualifierLoc);1040return true;1041}10421043syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc,1044SourceLocation TemplateKeywordLoc,1045SourceRange UnqualifiedIdLoc,1046ASTPtr From) {1047if (QualifierLoc) {1048Builder.markChild(QualifierLoc, syntax::NodeRole::Qualifier);1049if (TemplateKeywordLoc.isValid())1050Builder.markChildToken(TemplateKeywordLoc,1051syntax::NodeRole::TemplateKeyword);1052}10531054auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId;1055Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId,1056nullptr);1057Builder.markChild(TheUnqualifiedId, syntax::NodeRole::UnqualifiedId);10581059auto IdExpressionBeginLoc =1060QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin();10611062auto *TheIdExpression = new (allocator()) syntax::IdExpression;1063Builder.foldNode(1064Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()),1065TheIdExpression, From);10661067return TheIdExpression;1068}10691070bool WalkUpFromMemberExpr(MemberExpr *S) {1071// For `MemberExpr` with implicit `this->` we generate a simple1072// `id-expression` syntax node, beacuse an implicit `member-expression` is1073// syntactically undistinguishable from an `id-expression`1074if (S->isImplicitAccess()) {1075buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),1076SourceRange(S->getMemberLoc(), S->getEndLoc()), S);1077return true;1078}10791080auto *TheIdExpression = buildIdExpression(1081S->getQualifierLoc(), S->getTemplateKeywordLoc(),1082SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr);10831084Builder.markChild(TheIdExpression, syntax::NodeRole::Member);10851086Builder.markExprChild(S->getBase(), syntax::NodeRole::Object);1087Builder.markChildToken(S->getOperatorLoc(), syntax::NodeRole::AccessToken);10881089Builder.foldNode(Builder.getExprRange(S),1090new (allocator()) syntax::MemberExpression, S);1091return true;1092}10931094bool WalkUpFromDeclRefExpr(DeclRefExpr *S) {1095buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),1096SourceRange(S->getLocation(), S->getEndLoc()), S);10971098return true;1099}11001101// Same logic as DeclRefExpr.1102bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) {1103buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(),1104SourceRange(S->getLocation(), S->getEndLoc()), S);11051106return true;1107}11081109bool WalkUpFromCXXThisExpr(CXXThisExpr *S) {1110if (!S->isImplicit()) {1111Builder.markChildToken(S->getLocation(),1112syntax::NodeRole::IntroducerKeyword);1113Builder.foldNode(Builder.getExprRange(S),1114new (allocator()) syntax::ThisExpression, S);1115}1116return true;1117}11181119bool WalkUpFromParenExpr(ParenExpr *S) {1120Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen);1121Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::SubExpression);1122Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen);1123Builder.foldNode(Builder.getExprRange(S),1124new (allocator()) syntax::ParenExpression, S);1125return true;1126}11271128bool WalkUpFromIntegerLiteral(IntegerLiteral *S) {1129Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);1130Builder.foldNode(Builder.getExprRange(S),1131new (allocator()) syntax::IntegerLiteralExpression, S);1132return true;1133}11341135bool WalkUpFromCharacterLiteral(CharacterLiteral *S) {1136Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);1137Builder.foldNode(Builder.getExprRange(S),1138new (allocator()) syntax::CharacterLiteralExpression, S);1139return true;1140}11411142bool WalkUpFromFloatingLiteral(FloatingLiteral *S) {1143Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);1144Builder.foldNode(Builder.getExprRange(S),1145new (allocator()) syntax::FloatingLiteralExpression, S);1146return true;1147}11481149bool WalkUpFromStringLiteral(StringLiteral *S) {1150Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken);1151Builder.foldNode(Builder.getExprRange(S),1152new (allocator()) syntax::StringLiteralExpression, S);1153return true;1154}11551156bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) {1157Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);1158Builder.foldNode(Builder.getExprRange(S),1159new (allocator()) syntax::BoolLiteralExpression, S);1160return true;1161}11621163bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) {1164Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken);1165Builder.foldNode(Builder.getExprRange(S),1166new (allocator()) syntax::CxxNullPtrExpression, S);1167return true;1168}11691170bool WalkUpFromUnaryOperator(UnaryOperator *S) {1171Builder.markChildToken(S->getOperatorLoc(),1172syntax::NodeRole::OperatorToken);1173Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::Operand);11741175if (S->isPostfix())1176Builder.foldNode(Builder.getExprRange(S),1177new (allocator()) syntax::PostfixUnaryOperatorExpression,1178S);1179else1180Builder.foldNode(Builder.getExprRange(S),1181new (allocator()) syntax::PrefixUnaryOperatorExpression,1182S);11831184return true;1185}11861187bool WalkUpFromBinaryOperator(BinaryOperator *S) {1188Builder.markExprChild(S->getLHS(), syntax::NodeRole::LeftHandSide);1189Builder.markChildToken(S->getOperatorLoc(),1190syntax::NodeRole::OperatorToken);1191Builder.markExprChild(S->getRHS(), syntax::NodeRole::RightHandSide);1192Builder.foldNode(Builder.getExprRange(S),1193new (allocator()) syntax::BinaryOperatorExpression, S);1194return true;1195}11961197/// Builds `CallArguments` syntax node from arguments that appear in source1198/// code, i.e. not default arguments.1199syntax::CallArguments *1200buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) {1201auto Args = dropDefaultArgs(ArgsAndDefaultArgs);1202for (auto *Arg : Args) {1203Builder.markExprChild(Arg, syntax::NodeRole::ListElement);1204const auto *DelimiterToken =1205std::next(Builder.findToken(Arg->getEndLoc()));1206if (DelimiterToken->kind() == clang::tok::TokenKind::comma)1207Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);1208}12091210auto *Arguments = new (allocator()) syntax::CallArguments;1211if (!Args.empty())1212Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(),1213(*(Args.end() - 1))->getEndLoc()),1214Arguments, nullptr);12151216return Arguments;1217}12181219bool WalkUpFromCallExpr(CallExpr *S) {1220Builder.markExprChild(S->getCallee(), syntax::NodeRole::Callee);12211222const auto *LParenToken =1223std::next(Builder.findToken(S->getCallee()->getEndLoc()));1224// FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed1225// the test on decltype desctructors.1226if (LParenToken->kind() == clang::tok::l_paren)1227Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen);12281229Builder.markChild(buildCallArguments(S->arguments()),1230syntax::NodeRole::Arguments);12311232Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen);12331234Builder.foldNode(Builder.getRange(S->getSourceRange()),1235new (allocator()) syntax::CallExpression, S);1236return true;1237}12381239bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) {1240// Ignore the implicit calls to default constructors.1241if ((S->getNumArgs() == 0 || isa<CXXDefaultArgExpr>(S->getArg(0))) &&1242S->getParenOrBraceRange().isInvalid())1243return true;1244return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S);1245}12461247bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) {1248// To construct a syntax tree of the same shape for calls to built-in and1249// user-defined operators, ignore the `DeclRefExpr` that refers to the1250// operator and treat it as a simple token. Do that by traversing1251// arguments instead of children.1252for (auto *child : S->arguments()) {1253// A postfix unary operator is declared as taking two operands. The1254// second operand is used to distinguish from its prefix counterpart. In1255// the semantic AST this "phantom" operand is represented as a1256// `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this1257// operand because it does not correspond to anything written in source1258// code.1259if (child->getSourceRange().isInvalid()) {1260assert(getOperatorNodeKind(*S) ==1261syntax::NodeKind::PostfixUnaryOperatorExpression);1262continue;1263}1264if (!TraverseStmt(child))1265return false;1266}1267return WalkUpFromCXXOperatorCallExpr(S);1268}12691270bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) {1271switch (getOperatorNodeKind(*S)) {1272case syntax::NodeKind::BinaryOperatorExpression:1273Builder.markExprChild(S->getArg(0), syntax::NodeRole::LeftHandSide);1274Builder.markChildToken(S->getOperatorLoc(),1275syntax::NodeRole::OperatorToken);1276Builder.markExprChild(S->getArg(1), syntax::NodeRole::RightHandSide);1277Builder.foldNode(Builder.getExprRange(S),1278new (allocator()) syntax::BinaryOperatorExpression, S);1279return true;1280case syntax::NodeKind::PrefixUnaryOperatorExpression:1281Builder.markChildToken(S->getOperatorLoc(),1282syntax::NodeRole::OperatorToken);1283Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand);1284Builder.foldNode(Builder.getExprRange(S),1285new (allocator()) syntax::PrefixUnaryOperatorExpression,1286S);1287return true;1288case syntax::NodeKind::PostfixUnaryOperatorExpression:1289Builder.markChildToken(S->getOperatorLoc(),1290syntax::NodeRole::OperatorToken);1291Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand);1292Builder.foldNode(Builder.getExprRange(S),1293new (allocator()) syntax::PostfixUnaryOperatorExpression,1294S);1295return true;1296case syntax::NodeKind::CallExpression: {1297Builder.markExprChild(S->getArg(0), syntax::NodeRole::Callee);12981299const auto *LParenToken =1300std::next(Builder.findToken(S->getArg(0)->getEndLoc()));1301// FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have1302// fixed the test on decltype desctructors.1303if (LParenToken->kind() == clang::tok::l_paren)1304Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen);13051306Builder.markChild(buildCallArguments(CallExpr::arg_range(1307S->arg_begin() + 1, S->arg_end())),1308syntax::NodeRole::Arguments);13091310Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen);13111312Builder.foldNode(Builder.getRange(S->getSourceRange()),1313new (allocator()) syntax::CallExpression, S);1314return true;1315}1316case syntax::NodeKind::UnknownExpression:1317return WalkUpFromExpr(S);1318default:1319llvm_unreachable("getOperatorNodeKind() does not return this value");1320}1321}13221323bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; }13241325bool WalkUpFromNamespaceDecl(NamespaceDecl *S) {1326auto Tokens = Builder.getDeclarationRange(S);1327if (Tokens.front().kind() == tok::coloncolon) {1328// Handle nested namespace definitions. Those start at '::' token, e.g.1329// namespace a^::b {}1330// FIXME: build corresponding nodes for the name of this namespace.1331return true;1332}1333Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S);1334return true;1335}13361337// FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test1338// results. Find test coverage or remove it.1339bool TraverseParenTypeLoc(ParenTypeLoc L) {1340// We reverse order of traversal to get the proper syntax structure.1341if (!WalkUpFromParenTypeLoc(L))1342return false;1343return TraverseTypeLoc(L.getInnerLoc());1344}13451346bool WalkUpFromParenTypeLoc(ParenTypeLoc L) {1347Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);1348Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);1349Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()),1350new (allocator()) syntax::ParenDeclarator, L);1351return true;1352}13531354// Declarator chunks, they are produced by type locs and some clang::Decls.1355bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) {1356Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen);1357Builder.markExprChild(L.getSizeExpr(), syntax::NodeRole::Size);1358Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen);1359Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()),1360new (allocator()) syntax::ArraySubscript, L);1361return true;1362}13631364syntax::ParameterDeclarationList *1365buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) {1366for (auto *P : Params) {1367Builder.markChild(P, syntax::NodeRole::ListElement);1368const auto *DelimiterToken = std::next(Builder.findToken(P->getEndLoc()));1369if (DelimiterToken->kind() == clang::tok::TokenKind::comma)1370Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);1371}1372auto *Parameters = new (allocator()) syntax::ParameterDeclarationList;1373if (!Params.empty())1374Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(),1375Params.back()->getEndLoc()),1376Parameters, nullptr);1377return Parameters;1378}13791380bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) {1381Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen);13821383Builder.markChild(buildParameterDeclarationList(L.getParams()),1384syntax::NodeRole::Parameters);13851386Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen);1387Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()),1388new (allocator()) syntax::ParametersAndQualifiers, L);1389return true;1390}13911392bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) {1393if (!L.getTypePtr()->hasTrailingReturn())1394return WalkUpFromFunctionTypeLoc(L);13951396auto *TrailingReturnTokens = buildTrailingReturn(L);1397// Finish building the node for parameters.1398Builder.markChild(TrailingReturnTokens, syntax::NodeRole::TrailingReturn);1399return WalkUpFromFunctionTypeLoc(L);1400}14011402bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) {1403// In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds1404// to "Y::*" but it points to a `ParenTypeLoc` that corresponds to1405// "(Y::*mp)" We thus reverse the order of traversal to get the proper1406// syntax structure.1407if (!WalkUpFromMemberPointerTypeLoc(L))1408return false;1409return TraverseTypeLoc(L.getPointeeLoc());1410}14111412bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) {1413auto SR = L.getLocalSourceRange();1414Builder.foldNode(Builder.getRange(SR),1415new (allocator()) syntax::MemberPointer, L);1416return true;1417}14181419// The code below is very regular, it could even be generated with some1420// preprocessor magic. We merely assign roles to the corresponding children1421// and fold resulting nodes.1422bool WalkUpFromDeclStmt(DeclStmt *S) {1423Builder.foldNode(Builder.getStmtRange(S),1424new (allocator()) syntax::DeclarationStatement, S);1425return true;1426}14271428bool WalkUpFromNullStmt(NullStmt *S) {1429Builder.foldNode(Builder.getStmtRange(S),1430new (allocator()) syntax::EmptyStatement, S);1431return true;1432}14331434bool WalkUpFromSwitchStmt(SwitchStmt *S) {1435Builder.markChildToken(S->getSwitchLoc(),1436syntax::NodeRole::IntroducerKeyword);1437Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);1438Builder.foldNode(Builder.getStmtRange(S),1439new (allocator()) syntax::SwitchStatement, S);1440return true;1441}14421443bool WalkUpFromCaseStmt(CaseStmt *S) {1444Builder.markChildToken(S->getKeywordLoc(),1445syntax::NodeRole::IntroducerKeyword);1446Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseValue);1447Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);1448Builder.foldNode(Builder.getStmtRange(S),1449new (allocator()) syntax::CaseStatement, S);1450return true;1451}14521453bool WalkUpFromDefaultStmt(DefaultStmt *S) {1454Builder.markChildToken(S->getKeywordLoc(),1455syntax::NodeRole::IntroducerKeyword);1456Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement);1457Builder.foldNode(Builder.getStmtRange(S),1458new (allocator()) syntax::DefaultStatement, S);1459return true;1460}14611462bool WalkUpFromIfStmt(IfStmt *S) {1463Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword);1464Stmt *ConditionStatement = S->getCond();1465if (S->hasVarStorage())1466ConditionStatement = S->getConditionVariableDeclStmt();1467Builder.markStmtChild(ConditionStatement, syntax::NodeRole::Condition);1468Builder.markStmtChild(S->getThen(), syntax::NodeRole::ThenStatement);1469Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::ElseKeyword);1470Builder.markStmtChild(S->getElse(), syntax::NodeRole::ElseStatement);1471Builder.foldNode(Builder.getStmtRange(S),1472new (allocator()) syntax::IfStatement, S);1473return true;1474}14751476bool WalkUpFromForStmt(ForStmt *S) {1477Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);1478Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);1479Builder.foldNode(Builder.getStmtRange(S),1480new (allocator()) syntax::ForStatement, S);1481return true;1482}14831484bool WalkUpFromWhileStmt(WhileStmt *S) {1485Builder.markChildToken(S->getWhileLoc(),1486syntax::NodeRole::IntroducerKeyword);1487Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);1488Builder.foldNode(Builder.getStmtRange(S),1489new (allocator()) syntax::WhileStatement, S);1490return true;1491}14921493bool WalkUpFromContinueStmt(ContinueStmt *S) {1494Builder.markChildToken(S->getContinueLoc(),1495syntax::NodeRole::IntroducerKeyword);1496Builder.foldNode(Builder.getStmtRange(S),1497new (allocator()) syntax::ContinueStatement, S);1498return true;1499}15001501bool WalkUpFromBreakStmt(BreakStmt *S) {1502Builder.markChildToken(S->getBreakLoc(),1503syntax::NodeRole::IntroducerKeyword);1504Builder.foldNode(Builder.getStmtRange(S),1505new (allocator()) syntax::BreakStatement, S);1506return true;1507}15081509bool WalkUpFromReturnStmt(ReturnStmt *S) {1510Builder.markChildToken(S->getReturnLoc(),1511syntax::NodeRole::IntroducerKeyword);1512Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnValue);1513Builder.foldNode(Builder.getStmtRange(S),1514new (allocator()) syntax::ReturnStatement, S);1515return true;1516}15171518bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) {1519Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword);1520Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement);1521Builder.foldNode(Builder.getStmtRange(S),1522new (allocator()) syntax::RangeBasedForStatement, S);1523return true;1524}15251526bool WalkUpFromEmptyDecl(EmptyDecl *S) {1527Builder.foldNode(Builder.getDeclarationRange(S),1528new (allocator()) syntax::EmptyDeclaration, S);1529return true;1530}15311532bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) {1533Builder.markExprChild(S->getAssertExpr(), syntax::NodeRole::Condition);1534Builder.markExprChild(S->getMessage(), syntax::NodeRole::Message);1535Builder.foldNode(Builder.getDeclarationRange(S),1536new (allocator()) syntax::StaticAssertDeclaration, S);1537return true;1538}15391540bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) {1541Builder.foldNode(Builder.getDeclarationRange(S),1542new (allocator()) syntax::LinkageSpecificationDeclaration,1543S);1544return true;1545}15461547bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) {1548Builder.foldNode(Builder.getDeclarationRange(S),1549new (allocator()) syntax::NamespaceAliasDefinition, S);1550return true;1551}15521553bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) {1554Builder.foldNode(Builder.getDeclarationRange(S),1555new (allocator()) syntax::UsingNamespaceDirective, S);1556return true;1557}15581559bool WalkUpFromUsingDecl(UsingDecl *S) {1560Builder.foldNode(Builder.getDeclarationRange(S),1561new (allocator()) syntax::UsingDeclaration, S);1562return true;1563}15641565bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) {1566Builder.foldNode(Builder.getDeclarationRange(S),1567new (allocator()) syntax::UsingDeclaration, S);1568return true;1569}15701571bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) {1572Builder.foldNode(Builder.getDeclarationRange(S),1573new (allocator()) syntax::UsingDeclaration, S);1574return true;1575}15761577bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) {1578Builder.foldNode(Builder.getDeclarationRange(S),1579new (allocator()) syntax::TypeAliasDeclaration, S);1580return true;1581}15821583private:1584/// Folds SimpleDeclarator node (if present) and in case this is the last1585/// declarator in the chain it also folds SimpleDeclaration node.1586template <class T> bool processDeclaratorAndDeclaration(T *D) {1587auto Range = getDeclaratorRange(1588Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(),1589getQualifiedNameStart(D), getInitializerRange(D));15901591// There doesn't have to be a declarator (e.g. `void foo(int)` only has1592// declaration, but no declarator).1593if (!Range.getBegin().isValid()) {1594Builder.markChild(new (allocator()) syntax::DeclaratorList,1595syntax::NodeRole::Declarators);1596Builder.foldNode(Builder.getDeclarationRange(D),1597new (allocator()) syntax::SimpleDeclaration, D);1598return true;1599}16001601auto *N = new (allocator()) syntax::SimpleDeclarator;1602Builder.foldNode(Builder.getRange(Range), N, nullptr);1603Builder.markChild(N, syntax::NodeRole::ListElement);16041605if (!Builder.isResponsibleForCreatingDeclaration(D)) {1606// If this is not the last declarator in the declaration we expect a1607// delimiter after it.1608const auto *DelimiterToken = std::next(Builder.findToken(Range.getEnd()));1609if (DelimiterToken->kind() == clang::tok::TokenKind::comma)1610Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter);1611} else {1612auto *DL = new (allocator()) syntax::DeclaratorList;1613auto DeclarationRange = Builder.getDeclarationRange(D);1614Builder.foldList(DeclarationRange, DL, nullptr);16151616Builder.markChild(DL, syntax::NodeRole::Declarators);1617Builder.foldNode(DeclarationRange,1618new (allocator()) syntax::SimpleDeclaration, D);1619}1620return true;1621}16221623/// Returns the range of the built node.1624syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) {1625assert(L.getTypePtr()->hasTrailingReturn());16261627auto ReturnedType = L.getReturnLoc();1628// Build node for the declarator, if any.1629auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType),1630ReturnedType.getEndLoc());1631syntax::SimpleDeclarator *ReturnDeclarator = nullptr;1632if (ReturnDeclaratorRange.isValid()) {1633ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator;1634Builder.foldNode(Builder.getRange(ReturnDeclaratorRange),1635ReturnDeclarator, nullptr);1636}16371638// Build node for trailing return type.1639auto Return = Builder.getRange(ReturnedType.getSourceRange());1640const auto *Arrow = Return.begin() - 1;1641assert(Arrow->kind() == tok::arrow);1642auto Tokens = llvm::ArrayRef(Arrow, Return.end());1643Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken);1644if (ReturnDeclarator)1645Builder.markChild(ReturnDeclarator, syntax::NodeRole::Declarator);1646auto *R = new (allocator()) syntax::TrailingReturnType;1647Builder.foldNode(Tokens, R, L);1648return R;1649}16501651void foldExplicitTemplateInstantiation(1652ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW,1653const syntax::Token *TemplateKW,1654syntax::SimpleDeclaration *InnerDeclaration, Decl *From) {1655assert(!ExternKW || ExternKW->kind() == tok::kw_extern);1656assert(TemplateKW && TemplateKW->kind() == tok::kw_template);1657Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword);1658Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);1659Builder.markChild(InnerDeclaration, syntax::NodeRole::Declaration);1660Builder.foldNode(1661Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From);1662}16631664syntax::TemplateDeclaration *foldTemplateDeclaration(1665ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW,1666ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) {1667assert(TemplateKW && TemplateKW->kind() == tok::kw_template);1668Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword);16691670auto *N = new (allocator()) syntax::TemplateDeclaration;1671Builder.foldNode(Range, N, From);1672Builder.markChild(N, syntax::NodeRole::Declaration);1673return N;1674}16751676/// A small helper to save some typing.1677llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }16781679syntax::TreeBuilder &Builder;1680const ASTContext &Context;1681};1682} // namespace16831684void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) {1685DeclsWithoutSemicolons.insert(D);1686}16871688void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {1689if (Loc.isInvalid())1690return;1691Pending.assignRole(*findToken(Loc), Role);1692}16931694void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) {1695if (!T)1696return;1697Pending.assignRole(*T, R);1698}16991700void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) {1701assert(N);1702setRole(N, R);1703}17041705void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) {1706auto *SN = Mapping.find(N);1707assert(SN != nullptr);1708setRole(SN, R);1709}1710void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) {1711auto *SN = Mapping.find(NNSLoc);1712assert(SN != nullptr);1713setRole(SN, R);1714}17151716void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) {1717if (!Child)1718return;17191720syntax::Tree *ChildNode;1721if (Expr *ChildExpr = dyn_cast<Expr>(Child)) {1722// This is an expression in a statement position, consume the trailing1723// semicolon and form an 'ExpressionStatement' node.1724markExprChild(ChildExpr, NodeRole::Expression);1725ChildNode = new (allocator()) syntax::ExpressionStatement;1726// (!) 'getStmtRange()' ensures this covers a trailing semicolon.1727Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode);1728} else {1729ChildNode = Mapping.find(Child);1730}1731assert(ChildNode != nullptr);1732setRole(ChildNode, Role);1733}17341735void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) {1736if (!Child)1737return;1738Child = IgnoreImplicit(Child);17391740syntax::Tree *ChildNode = Mapping.find(Child);1741assert(ChildNode != nullptr);1742setRole(ChildNode, Role);1743}17441745const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {1746if (L.isInvalid())1747return nullptr;1748auto It = LocationToToken.find(L);1749assert(It != LocationToToken.end());1750return It->second;1751}17521753syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A,1754TokenBufferTokenManager& TBTM,1755ASTContext &Context) {1756TreeBuilder Builder(A, TBTM);1757BuildTreeVisitor(Context, Builder).TraverseAST(Context);1758return std::move(Builder).finalize();1759}176017611762