Path: blob/main/contrib/llvm-project/clang/lib/Format/Macros.h
35233 views
//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file contains the main building blocks of macro support in10/// clang-format.11///12/// In order to not violate the requirement that clang-format can format files13/// in isolation, clang-format's macro support uses expansions users provide14/// as part of clang-format's style configuration.15///16/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support17/// one level of expansion (\see MacroExpander for a full description of what18/// is supported).19///20/// As part of parsing, clang-format uses the MacroExpander to expand the21/// spelled token streams into expanded token streams when it encounters a22/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines23/// from the expanded token stream.24/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor25/// matches the spelled token stream into unwrapped lines that best resemble the26/// structure of the expanded unwrapped lines. These reconstructed unwrapped27/// lines are aliasing the tokens in the expanded token stream, so that token28/// annotations will be reused when formatting the spelled macro calls.29///30/// When formatting, clang-format annotates and formats the expanded unwrapped31/// lines first, determining the token types. Next, it formats the spelled32/// unwrapped lines, keeping the token types fixed, while allowing other33/// formatting decisions to change.34///35//===----------------------------------------------------------------------===//3637#ifndef CLANG_LIB_FORMAT_MACROS_H38#define CLANG_LIB_FORMAT_MACROS_H3940#include <list>4142#include "FormatToken.h"43#include "llvm/ADT/DenseMap.h"4445namespace clang {46namespace format {4748struct UnwrappedLine;49struct UnwrappedLineNode;5051/// Takes a set of macro definitions as strings and allows expanding calls to52/// those macros.53///54/// For example:55/// Definition: A(x, y)=x + y56/// Call : A(int a = 1, 2)57/// Expansion : int a = 1 + 258///59/// Expansion does not check arity of the definition.60/// If fewer arguments than expected are provided, the remaining parameters61/// are considered empty:62/// Call : A(a)63/// Expansion: a +64/// If more arguments than expected are provided, they will be discarded.65///66/// The expander does not support:67/// - recursive expansion68/// - stringification69/// - concatenation70/// - variadic macros71///72/// Furthermore, only a single expansion of each macro argument is supported,73/// so that we cannot get conflicting formatting decisions from different74/// expansions.75/// Definition: A(x)=x+x76/// Call : A(id)77/// Expansion : id+x78///79class MacroExpander {80public:81using ArgsList = ArrayRef<SmallVector<FormatToken *, 8>>;8283/// Construct a macro expander from a set of macro definitions.84/// Macro definitions must be encoded as UTF-8.85///86/// Each entry in \p Macros must conform to the following simple87/// macro-definition language:88/// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>89/// <params> ::= <id-list> | ""90/// <id-list> ::= <id> | <id> "," <params>91/// <expansion> ::= "=" <tail> | <eof>92/// <tail> ::= <tok> <tail> | <eof>93///94/// Macros that cannot be parsed will be silently discarded.95///96MacroExpander(const std::vector<std::string> &Macros,97SourceManager &SourceMgr, const FormatStyle &Style,98llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,99IdentifierTable &IdentTable);100~MacroExpander();101102/// Returns whether any macro \p Name is defined, regardless of overloads.103bool defined(StringRef Name) const;104105/// Returns whetherh there is an object-like overload, i.e. where the macro106/// has no arguments and should not consume subsequent parentheses.107bool objectLike(StringRef Name) const;108109/// Returns whether macro \p Name provides an overload with the given arity.110bool hasArity(StringRef Name, unsigned Arity) const;111112/// Returns the expanded stream of format tokens for \p ID, where113/// each element in \p Args is a positional argument to the macro call.114/// If \p Args is not set, the object-like overload is used.115/// If \p Args is set, the overload with the arity equal to \c Args.size() is116/// used.117SmallVector<FormatToken *, 8>118expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;119120private:121struct Definition;122class DefinitionParser;123124void parseDefinition(const std::string &Macro);125126SourceManager &SourceMgr;127const FormatStyle &Style;128llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;129IdentifierTable &IdentTable;130SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;131llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;132llvm::StringMap<Definition> ObjectLike;133};134135/// Converts a sequence of UnwrappedLines containing expanded macros into a136/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be137/// broken into child lines, in a way that best conveys the structure of the138/// expanded code.139///140/// In the simplest case, a spelled UnwrappedLine contains one macro, and after141/// expanding it we have one expanded UnwrappedLine. In general, macro142/// expansions can span UnwrappedLines, and multiple macros can contribute143/// tokens to the same line. We keep consuming expanded lines until:144/// * all expansions that started have finished (we're not chopping any macros145/// in half)146/// * *and* we've reached the end of a *spelled* unwrapped line.147///148/// A single UnwrappedLine represents this chunk of code.149///150/// After this point, the state of the spelled/expanded stream is "in sync"151/// (both at the start of an UnwrappedLine, with no macros open), so the152/// Reconstructor can be thrown away and parsing can continue.153///154/// Given a mapping from the macro name identifier token in the macro call155/// to the tokens of the macro call, for example:156/// CLASSA -> CLASSA({public: void x();})157///158/// When getting the formatted lines of the expansion via the \c addLine method159/// (each '->' specifies a call to \c addLine ):160/// -> class A {161/// -> public:162/// -> void x();163/// -> };164///165/// Creates the tree of unwrapped lines containing the macro call tokens so that166/// the macro call tokens fit the semantic structure of the expanded formatted167/// lines:168/// -> CLASSA({169/// -> public:170/// -> void x();171/// -> })172class MacroCallReconstructor {173public:174/// Create an Reconstructor whose resulting \p UnwrappedLine will start at175/// \p Level, using the map from name identifier token to the corresponding176/// tokens of the spelled macro call.177MacroCallReconstructor(178unsigned Level,179const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>180&ActiveExpansions);181182/// For the given \p Line, match all occurences of tokens expanded from a183/// macro to unwrapped lines in the spelled macro call so that the resulting184/// tree of unwrapped lines best resembles the structure of unwrapped lines185/// passed in via \c addLine.186void addLine(const UnwrappedLine &Line);187188/// Check whether at the current state there is no open macro expansion189/// that needs to be processed to finish an macro call.190/// Only when \c finished() is true, \c takeResult() can be called to retrieve191/// the resulting \c UnwrappedLine.192/// If there are multiple subsequent macro calls within an unwrapped line in193/// the spelled token stream, the calling code may also continue to call194/// \c addLine() when \c finished() is true.195bool finished() const { return ActiveExpansions.empty(); }196197/// Retrieve the formatted \c UnwrappedLine containing the orginal198/// macro calls, formatted according to the expanded token stream received199/// via \c addLine().200/// Generally, this line tries to have the same structure as the expanded,201/// formatted unwrapped lines handed in via \c addLine(), with the exception202/// that for multiple top-level lines, each subsequent line will be the203/// child of the last token in its predecessor. This representation is chosen204/// because it is a precondition to the formatter that we get what looks like205/// a single statement in a single \c UnwrappedLine (i.e. matching parens).206///207/// If a token in a macro argument is a child of a token in the expansion,208/// the parent will be the corresponding token in the macro call.209/// For example:210/// #define C(a, b) class C { a b211/// C(int x;, int y;)212/// would expand to213/// class C { int x; int y;214/// where in a formatted line "int x;" and "int y;" would both be new separate215/// lines.216///217/// In the result, "int x;" will be a child of the opening parenthesis in "C("218/// and "int y;" will be a child of the "," token:219/// C (220/// \- int x;221/// ,222/// \- int y;223/// )224UnwrappedLine takeResult() &&;225226private:227void add(FormatToken *Token, FormatToken *ExpandedParent, bool First,228unsigned Level);229void prepareParent(FormatToken *ExpandedParent, bool First, unsigned Level);230FormatToken *getParentInResult(FormatToken *Parent);231void reconstruct(FormatToken *Token);232void startReconstruction(FormatToken *Token);233bool reconstructActiveCallUntil(FormatToken *Token);234void endReconstruction(FormatToken *Token);235bool processNextReconstructed();236void finalize();237238struct ReconstructedLine;239240void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);241UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);242void debug(const ReconstructedLine &Line, int Level);243ReconstructedLine &parentLine();244ReconstructedLine *currentLine();245void debugParentMap() const;246247#ifndef NDEBUG248enum ReconstructorState {249Start, // No macro expansion was found in the input yet.250InProgress, // During a macro reconstruction.251Finalized, // Past macro reconstruction, the result is finalized.252};253ReconstructorState State = Start;254#endif255256// Node in which we build up the resulting unwrapped line; this type is257// analogous to UnwrappedLineNode.258struct LineNode {259LineNode() = default;260LineNode(FormatToken *Tok) : Tok(Tok) {}261FormatToken *Tok = nullptr;262SmallVector<std::unique_ptr<ReconstructedLine>> Children;263};264265// Line in which we build up the resulting unwrapped line.266// FIXME: Investigate changing UnwrappedLine to a pointer type and using it267// instead of rolling our own type.268struct ReconstructedLine {269explicit ReconstructedLine(unsigned Level) : Level(Level) {}270unsigned Level;271SmallVector<std::unique_ptr<LineNode>> Tokens;272};273274// The line in which we collect the resulting reconstructed output.275// To reduce special cases in the algorithm, the first level of the line276// contains a single null token that has the reconstructed incoming277// lines as children.278// In the end, we stich the lines together so that each subsequent line279// is a child of the last token of the previous line. This is necessary280// in order to format the overall expression as a single logical line -281// if we created separate lines, we'd format them with their own top-level282// indent depending on the semantic structure, which is not desired.283ReconstructedLine Result;284285// Stack of currently "open" lines, where each line's predecessor's last286// token is the parent token for that line.287SmallVector<ReconstructedLine *> ActiveReconstructedLines;288289// Maps from the expanded token to the token that takes its place in the290// reconstructed token stream in terms of parent-child relationships.291// Note that it might take multiple steps to arrive at the correct292// parent in the output.293// Given: #define C(a, b) []() { a; b; }294// And a call: C(f(), g())295// The structure in the incoming formatted unwrapped line will be:296// []() {297// |- f();298// \- g();299// }300// with f and g being children of the opening brace.301// In the reconstructed call:302// C(f(), g())303// \- f()304// \- g()305// We want f to be a child of the opening parenthesis and g to be a child306// of the comma token in the macro call.307// Thus, we map308// { -> (309// and add310// ( -> ,311// once we're past the comma in the reconstruction.312llvm::DenseMap<FormatToken *, FormatToken *>313SpelledParentToReconstructedParent;314315// Keeps track of a single expansion while we're reconstructing tokens it316// generated.317struct Expansion {318// The identifier token of the macro call.319FormatToken *ID;320// Our current position in the reconstruction.321std::list<UnwrappedLineNode>::iterator SpelledI;322// The end of the reconstructed token sequence.323std::list<UnwrappedLineNode>::iterator SpelledE;324};325326// Stack of macro calls for which we're in the middle of an expansion.327SmallVector<Expansion> ActiveExpansions;328329struct MacroCallState {330MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,331FormatToken *MacroCallLParen);332333ReconstructedLine *Line;334335// The last token in the parent line or expansion, or nullptr if the macro336// expansion is on a top-level line.337//338// For example, in the macro call:339// auto f = []() { ID(1); };340// The MacroCallState for ID will have '{' as ParentLastToken.341//342// In the macro call:343// ID(ID(void f()));344// The MacroCallState of the outer ID will have nullptr as ParentLastToken,345// while the MacroCallState for the inner ID will have the '(' of the outer346// ID as ParentLastToken.347//348// In the macro call:349// ID2(a, ID(b));350// The MacroCallState of ID will have ',' as ParentLastToken.351FormatToken *ParentLastToken;352353// The l_paren of this MacroCallState's macro call.354FormatToken *MacroCallLParen;355};356357// Keeps track of the lines into which the opening brace/parenthesis &358// argument separating commas for each level in the macro call go in order to359// put the corresponding closing brace/parenthesis into the same line in the360// output and keep track of which parents in the expanded token stream map to361// which tokens in the reconstructed stream.362// When an opening brace/parenthesis has children, we want the structure of363// the output line to be:364// |- MACRO365// |- (366// | \- <argument>367// |- ,368// | \- <argument>369// \- )370SmallVector<MacroCallState> MacroCallStructure;371372// Maps from identifier of the macro call to an unwrapped line containing373// all tokens of the macro call.374const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>375&IdToReconstructed;376};377378} // namespace format379} // namespace clang380381#endif382383384