Path: blob/main/contrib/llvm-project/clang/lib/Format/MacroCallReconstructor.cpp
35234 views
//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//1//2// The LLVM Compiler Infrastructure3//4// This file is distributed under the University of Illinois Open Source5// License. See LICENSE.TXT for details.6//7//===----------------------------------------------------------------------===//8///9/// \file10/// This file contains the implementation of MacroCallReconstructor, which fits11/// an reconstructed macro call to a parsed set of UnwrappedLines.12///13//===----------------------------------------------------------------------===//1415#include "Macros.h"1617#include "UnwrappedLineParser.h"18#include "clang/Basic/TokenKinds.h"19#include "llvm/ADT/DenseSet.h"20#include "llvm/Support/Debug.h"21#include <cassert>2223#define DEBUG_TYPE "format-reconstruct"2425namespace clang {26namespace format {2728// Call \p Call for each token in the unwrapped line given, passing29// the token, its parent and whether it is the first token in the line.30template <typename T>31void forEachToken(const UnwrappedLine &Line, const T &Call,32FormatToken *Parent = nullptr) {33bool First = true;34for (const auto &N : Line.Tokens) {35Call(N.Tok, Parent, First, Line.Level);36First = false;37for (const auto &Child : N.Children)38forEachToken(Child, Call, N.Tok);39}40}4142MacroCallReconstructor::MacroCallReconstructor(43unsigned Level,44const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>45&ActiveExpansions)46: Result(Level), IdToReconstructed(ActiveExpansions) {47Result.Tokens.push_back(std::make_unique<LineNode>());48ActiveReconstructedLines.push_back(&Result);49}5051void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {52assert(State != Finalized);53LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");54forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First,55unsigned Level) { add(Token, Parent, First, Level); });56assert(InProgress || finished());57}5859UnwrappedLine MacroCallReconstructor::takeResult() && {60finalize();61assert(Result.Tokens.size() == 1 &&62Result.Tokens.front()->Children.size() == 1);63UnwrappedLine Final = createUnwrappedLine(64*Result.Tokens.front()->Children.front(), Result.Level);65assert(!Final.Tokens.empty());66return Final;67}6869// Reconstruct the position of the next \p Token, given its parent \p70// ExpandedParent in the incoming unwrapped line. \p First specifies whether it71// is the first token in a given unwrapped line.72void MacroCallReconstructor::add(FormatToken *Token,73FormatToken *ExpandedParent, bool First,74unsigned Level) {75LLVM_DEBUG(76llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "77<< (ExpandedParent ? ExpandedParent->TokenText : "<null>")78<< ", First: " << First << "\n");79// In order to be able to find the correct parent in the reconstructed token80// stream, we need to continue the last open reconstruction until we find the81// given token if it is part of the reconstructed token stream.82//83// Note that hidden tokens can be part of the reconstructed stream in nested84// macro calls.85// For example, given86// #define C(x, y) x y87// #define B(x) {x}88// And the call:89// C(a, B(b))90// The outer macro call will be C(a, {b}), and the hidden token '}' can be91// found in the reconstructed token stream of that expansion level.92// In the expanded token stream93// a {b}94// 'b' is a child of '{'. We need to continue the open expansion of the ','95// in the call of 'C' in order to correctly set the ',' as the parent of '{',96// so we later set the spelled token 'b' as a child of the ','.97if (!ActiveExpansions.empty() && Token->MacroCtx &&98(Token->MacroCtx->Role != MR_Hidden ||99ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {100if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))101First = true;102}103104prepareParent(ExpandedParent, First, Level);105106if (Token->MacroCtx) {107// If this token was generated by a macro call, add the reconstructed108// equivalent of the token.109reconstruct(Token);110} else {111// Otherwise, we add it to the current line.112appendToken(Token);113}114}115116// Adjusts the stack of active reconstructed lines so we're ready to push117// tokens. The tokens to be pushed are children of ExpandedParent in the118// expanded code.119//120// This may entail:121// - creating a new line, if the parent is on the active line122// - popping active lines, if the parent is further up the stack123//124// Postcondition:125// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its126// reconstructed replacement token as a parent (when possible) - that is, the127// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]128// is the parent of ActiveReconstructedLines.back() in the reconstructed129// unwrapped line.130void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,131bool NewLine, unsigned Level) {132LLVM_DEBUG({133llvm::dbgs() << "ParentMap:\n";134debugParentMap();135});136// We want to find the parent in the new unwrapped line, where the expanded137// parent might have been replaced during reconstruction.138FormatToken *Parent = getParentInResult(ExpandedParent);139LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "140<< (Parent ? Parent->TokenText : "<null>") << "\n");141142FormatToken *OpenMacroParent = nullptr;143if (!MacroCallStructure.empty()) {144// Inside a macro expansion, it is possible to lose track of the correct145// parent - either because it is already popped, for example because it was146// in a different macro argument (e.g. M({, })), or when we work on invalid147// code.148// Thus, we use the innermost macro call's parent as the parent at which149// we stop; this allows us to stay within the macro expansion and keeps150// any problems confined to the extent of the macro call.151OpenMacroParent =152getParentInResult(MacroCallStructure.back().MacroCallLParen);153LLVM_DEBUG(llvm::dbgs()154<< "MacroCallLParen: "155<< MacroCallStructure.back().MacroCallLParen->TokenText156<< ", OpenMacroParent: "157<< (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")158<< "\n");159}160if (NewLine ||161(!ActiveReconstructedLines.back()->Tokens.empty() &&162Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {163// If we are at the first token in a new line, we want to also164// create a new line in the resulting reconstructed unwrapped line.165while (ActiveReconstructedLines.back()->Tokens.empty() ||166(Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&167ActiveReconstructedLines.back()->Tokens.back()->Tok !=168OpenMacroParent)) {169ActiveReconstructedLines.pop_back();170assert(!ActiveReconstructedLines.empty());171}172assert(!ActiveReconstructedLines.empty());173ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(174std::make_unique<ReconstructedLine>(Level));175ActiveReconstructedLines.push_back(176&*ActiveReconstructedLines.back()->Tokens.back()->Children.back());177} else if (parentLine().Tokens.back()->Tok != Parent) {178// If we're not the first token in a new line, pop lines until we find179// the child of \c Parent in the stack.180while (Parent != parentLine().Tokens.back()->Tok &&181parentLine().Tokens.back()->Tok &&182parentLine().Tokens.back()->Tok != OpenMacroParent) {183ActiveReconstructedLines.pop_back();184assert(!ActiveReconstructedLines.empty());185}186}187assert(!ActiveReconstructedLines.empty());188}189190// For a given \p Parent in the incoming expanded token stream, find the191// corresponding parent in the output.192FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {193FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);194if (!Mapped)195return Parent;196for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))197Parent = Mapped;198// If we use a different token than the parent in the expanded token stream199// as parent, mark it as a special parent, so the formatting code knows it200// needs to have its children formatted.201Parent->MacroParent = true;202return Parent;203}204205// Reconstruct a \p Token that was expanded from a macro call.206void MacroCallReconstructor::reconstruct(FormatToken *Token) {207assert(Token->MacroCtx);208// A single token can be the only result of a macro call:209// Given: #define ID(x, y) ;210// And the call: ID(<some>, <tokens>)211// ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).212if (Token->MacroCtx->StartOfExpansion) {213startReconstruction(Token);214// If the order of tokens in the expanded token stream is not the215// same as the order of tokens in the reconstructed stream, we need216// to reconstruct tokens that arrive later in the stream.217if (Token->MacroCtx->Role != MR_Hidden)218reconstructActiveCallUntil(Token);219}220assert(!ActiveExpansions.empty());221if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {222assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());223if (Token->MacroCtx->Role != MR_Hidden) {224// The current token in the reconstructed token stream must be the token225// we're looking for - we either arrive here after startReconstruction,226// which initiates the stream to the first token, or after227// continueReconstructionUntil skipped until the expected token in the228// reconstructed stream at the start of add(...).229assert(ActiveExpansions.back().SpelledI->Tok == Token);230processNextReconstructed();231} else if (!currentLine()->Tokens.empty()) {232// Map all hidden tokens to the last visible token in the output.233// If the hidden token is a parent, we'll use the last visible234// token as the parent of the hidden token's children.235SpelledParentToReconstructedParent[Token] =236currentLine()->Tokens.back()->Tok;237} else {238for (auto I = ActiveReconstructedLines.rbegin(),239E = ActiveReconstructedLines.rend();240I != E; ++I) {241if (!(*I)->Tokens.empty()) {242SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;243break;244}245}246}247}248if (Token->MacroCtx->EndOfExpansion)249endReconstruction(Token);250}251252// Given a \p Token that starts an expansion, reconstruct the beginning of the253// macro call.254// For example, given: #define ID(x) x255// And the call: ID(int a)256// Reconstructs: ID(257void MacroCallReconstructor::startReconstruction(FormatToken *Token) {258assert(Token->MacroCtx);259assert(!Token->MacroCtx->ExpandedFrom.empty());260assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());261#ifndef NDEBUG262// Check that the token's reconstruction stack matches our current263// reconstruction stack.264for (size_t I = 0; I < ActiveExpansions.size(); ++I) {265assert(ActiveExpansions[I].ID ==266Token->MacroCtx267->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);268}269#endif270// Start reconstruction for all calls for which this token is the first token271// generated by the call.272// Note that the token's expanded from stack is inside-to-outside, and the273// expansions for which this token is not the first are the outermost ones.274ArrayRef<FormatToken *> StartedMacros =275ArrayRef(Token->MacroCtx->ExpandedFrom)276.drop_back(ActiveExpansions.size());277assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);278// We reconstruct macro calls outside-to-inside.279for (FormatToken *ID : llvm::reverse(StartedMacros)) {280// We found a macro call to be reconstructed; the next time our281// reconstruction stack is empty we know we finished an reconstruction.282#ifndef NDEBUG283State = InProgress;284#endif285// Put the reconstructed macro call's token into our reconstruction stack.286auto IU = IdToReconstructed.find(ID);287assert(IU != IdToReconstructed.end());288ActiveExpansions.push_back(289{ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});290// Process the macro call's identifier.291processNextReconstructed();292if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)293continue;294if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {295// Process the optional opening parenthesis.296processNextReconstructed();297}298}299}300301// Add all tokens in the reconstruction stream to the output until we find the302// given \p Token.303bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {304assert(!ActiveExpansions.empty());305bool PassedMacroComma = false;306// FIXME: If Token was already expanded earlier, due to307// a change in order, we will not find it, but need to308// skip it.309while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&310ActiveExpansions.back().SpelledI->Tok != Token) {311PassedMacroComma = processNextReconstructed() || PassedMacroComma;312}313return PassedMacroComma;314}315316// End all reconstructions for which \p Token is the final token.317void MacroCallReconstructor::endReconstruction(FormatToken *Token) {318assert(Token->MacroCtx &&319(ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));320for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {321LLVM_DEBUG([&] {322// Check all remaining tokens but the final closing parenthesis and323// optional trailing comment were already reconstructed at an inner324// expansion level.325for (auto T = ActiveExpansions.back().SpelledI;326T != ActiveExpansions.back().SpelledE; ++T) {327FormatToken *Token = T->Tok;328bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||329std::next(T)->Tok->isTrailingComment()) &&330!Token->MacroCtx && Token->is(tok::r_paren);331bool TrailingComment = Token->isTrailingComment();332bool PreviousLevel =333Token->MacroCtx &&334(ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());335if (!ClosingParen && !TrailingComment && !PreviousLevel)336llvm::dbgs() << "At token: " << Token->TokenText << "\n";337// In addition to the following cases, we can also run into this338// when a macro call had more arguments than expected; in that case,339// the comma and the remaining tokens in the macro call will340// potentially end up in the line when we finish the expansion.341// FIXME: Add the information which arguments are unused, and assert342// one of the cases below plus reconstructed macro argument tokens.343// assert(ClosingParen || TrailingComment || PreviousLevel);344}345}());346// Handle the remaining open tokens:347// - expand the closing parenthesis, if it exists, including an optional348// trailing comment349// - handle tokens that were already reconstructed at an inner expansion350// level351// - handle tokens when a macro call had more than the expected number of352// arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end353// up with the sequence ", b, c)" being open at the end of the354// reconstruction; we want to gracefully handle that case355//356// FIXME: See the above debug-check for what we will need to do to be357// able to assert this.358for (auto T = ActiveExpansions.back().SpelledI;359T != ActiveExpansions.back().SpelledE; ++T) {360processNextReconstructed();361}362ActiveExpansions.pop_back();363}364}365366void MacroCallReconstructor::debugParentMap() const {367llvm::DenseSet<FormatToken *> Values;368for (const auto &P : SpelledParentToReconstructedParent)369Values.insert(P.second);370371for (const auto &P : SpelledParentToReconstructedParent) {372if (Values.contains(P.first))373continue;374llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");375for (auto I = SpelledParentToReconstructedParent.find(P.first),376E = SpelledParentToReconstructedParent.end();377I != E; I = SpelledParentToReconstructedParent.find(I->second)) {378llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");379}380llvm::dbgs() << "\n";381}382}383384// If visible, add the next token of the reconstructed token sequence to the385// output. Returns whether reconstruction passed a comma that is part of a386// macro call.387bool MacroCallReconstructor::processNextReconstructed() {388FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;389++ActiveExpansions.back().SpelledI;390if (Token->MacroCtx) {391// Skip tokens that are not part of the macro call.392if (Token->MacroCtx->Role == MR_Hidden)393return false;394// Skip tokens we already expanded during an inner reconstruction.395// For example, given: #define ID(x) {x}396// And the call: ID(ID(f))397// We get two reconstructions:398// ID(f) -> {f}399// ID({f}) -> {{f}}400// We reconstruct f during the first reconstruction, and skip it during the401// second reconstruction.402if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())403return false;404}405// Tokens that do not have a macro context are tokens in that are part of the406// macro call that have not taken part in expansion.407if (!Token->MacroCtx) {408// Put the parentheses and commas of a macro call into the same line;409// if the arguments produce new unwrapped lines, they will become children410// of the corresponding opening parenthesis or comma tokens in the411// reconstructed call.412if (Token->is(tok::l_paren)) {413MacroCallStructure.push_back(MacroCallState(414currentLine(), parentLine().Tokens.back()->Tok, Token));415// All tokens that are children of the previous line's last token in the416// reconstructed token stream will now be children of the l_paren token.417// For example, for the line containing the macro calls:418// auto x = ID({ID(2)});419// We will build up a map <null> -> ( -> ( with the first and second420// l_paren of the macro call respectively. New lines that come in with a421// <null> parent will then become children of the l_paren token of the422// currently innermost macro call.423SpelledParentToReconstructedParent[MacroCallStructure.back()424.ParentLastToken] = Token;425appendToken(Token);426prepareParent(Token, /*NewLine=*/true,427MacroCallStructure.back().Line->Level);428Token->MacroParent = true;429return false;430}431if (!MacroCallStructure.empty()) {432if (Token->is(tok::comma)) {433// Make new lines inside the next argument children of the comma token.434SpelledParentToReconstructedParent435[MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;436Token->MacroParent = true;437appendToken(Token, MacroCallStructure.back().Line);438prepareParent(Token, /*NewLine=*/true,439MacroCallStructure.back().Line->Level);440return true;441}442if (Token->is(tok::r_paren)) {443appendToken(Token, MacroCallStructure.back().Line);444SpelledParentToReconstructedParent.erase(445MacroCallStructure.back().ParentLastToken);446MacroCallStructure.pop_back();447return false;448}449}450}451// Note that any tokens that are tagged with MR_None have been passed as452// arguments to the macro that have not been expanded, for example:453// Given: #define ID(X) x454// When calling: ID(a, b)455// 'b' will be part of the reconstructed token stream, but tagged MR_None.456// Given that erroring out in this case would be disruptive, we continue457// pushing the (unformatted) token.458// FIXME: This can lead to unfortunate formatting decisions - give the user459// a hint that their macro definition is broken.460appendToken(Token);461return false;462}463464void MacroCallReconstructor::finalize() {465#ifndef NDEBUG466assert(State != Finalized && finished());467State = Finalized;468#endif469470// We created corresponding unwrapped lines for each incoming line as children471// the the toplevel null token.472assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());473LLVM_DEBUG({474llvm::dbgs() << "Finalizing reconstructed lines:\n";475debug(Result, 0);476});477478// The first line becomes the top level line in the resulting unwrapped line.479LineNode &Top = *Result.Tokens.front();480auto *I = Top.Children.begin();481// Every subsequent line will become a child of the last token in the previous482// line, which is the token prior to the first token in the line.483LineNode *Last = (*I)->Tokens.back().get();484++I;485for (auto *E = Top.Children.end(); I != E; ++I) {486assert(Last->Children.empty());487Last->Children.push_back(std::move(*I));488489// Mark the previous line's last token as generated by a macro expansion490// so the formatting algorithm can take that into account.491Last->Tok->MacroParent = true;492493Last = Last->Children.back()->Tokens.back().get();494}495Top.Children.resize(1);496}497498void MacroCallReconstructor::appendToken(FormatToken *Token,499ReconstructedLine *L) {500L = L ? L : currentLine();501LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");502L->Tokens.push_back(std::make_unique<LineNode>(Token));503}504505UnwrappedLine506MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,507int Level) {508UnwrappedLine Result;509Result.Level = Level;510for (const auto &N : Line.Tokens) {511Result.Tokens.push_back(N->Tok);512UnwrappedLineNode &Current = Result.Tokens.back();513auto NumChildren =514std::count_if(N->Children.begin(), N->Children.end(),515[](const auto &Child) { return !Child->Tokens.empty(); });516if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) {517// If we only have one child, and the child is due to a macro expansion518// (either attached to a left parenthesis or comma), merge the child into519// the current line to prevent forced breaks for macro arguments.520auto *Child = std::find_if(521N->Children.begin(), N->Children.end(),522[](const auto &Child) { return !Child->Tokens.empty(); });523auto Line = createUnwrappedLine(**Child, Level);524Result.Tokens.splice(Result.Tokens.end(), Line.Tokens);525} else if (NumChildren > 0) {526// When there are multiple children with different indent, make sure that527// we indent them:528// 1. One level below the current line's level.529// 2. At the correct level relative to each other.530unsigned MinChildLevel =531std::min_element(N->Children.begin(), N->Children.end(),532[](const auto &E1, const auto &E2) {533return E1->Level < E2->Level;534})535->get()536->Level;537for (const auto &Child : N->Children) {538if (Child->Tokens.empty())539continue;540Current.Children.push_back(createUnwrappedLine(541*Child, Level + 1 + (Child->Level - MinChildLevel)));542}543}544}545return Result;546}547548void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {549for (int i = 0; i < Level; ++i)550llvm::dbgs() << " ";551for (const auto &N : Line.Tokens) {552if (!N)553continue;554if (N->Tok)555llvm::dbgs() << N->Tok->TokenText << " ";556for (const auto &Child : N->Children) {557llvm::dbgs() << "\n";558debug(*Child, Level + 1);559for (int i = 0; i < Level; ++i)560llvm::dbgs() << " ";561}562}563llvm::dbgs() << "\n";564}565566MacroCallReconstructor::ReconstructedLine &567MacroCallReconstructor::parentLine() {568return **std::prev(std::prev(ActiveReconstructedLines.end()));569}570571MacroCallReconstructor::ReconstructedLine *572MacroCallReconstructor::currentLine() {573return ActiveReconstructedLines.back();574}575576MacroCallReconstructor::MacroCallState::MacroCallState(577MacroCallReconstructor::ReconstructedLine *Line,578FormatToken *ParentLastToken, FormatToken *MacroCallLParen)579: Line(Line), ParentLastToken(ParentLastToken),580MacroCallLParen(MacroCallLParen) {581LLVM_DEBUG(582llvm::dbgs() << "ParentLastToken: "583<< (ParentLastToken ? ParentLastToken->TokenText : "<null>")584<< "\n");585586assert(MacroCallLParen->is(tok::l_paren));587}588589} // namespace format590} // namespace clang591592593