Path: blob/main/contrib/llvm-project/clang/lib/Format/WhitespaceManager.h
35233 views
//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// WhitespaceManager class manages whitespace around tokens and their10/// replacements.11///12//===----------------------------------------------------------------------===//1314#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H15#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H1617#include "TokenAnnotator.h"18#include "clang/Basic/SourceManager.h"1920namespace clang {21namespace format {2223/// Manages the whitespaces around tokens and their replacements.24///25/// This includes special handling for certain constructs, e.g. the alignment of26/// trailing line comments.27///28/// To guarantee correctness of alignment operations, the \c WhitespaceManager29/// must be informed about every token in the source file; for each token, there30/// must be exactly one call to either \c replaceWhitespace or31/// \c addUntouchableToken.32///33/// There may be multiple calls to \c breakToken for a given token.34class WhitespaceManager {35public:36WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,37bool UseCRLF)38: SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}3940bool useCRLF() const { return UseCRLF; }4142/// Infers whether the input is using CRLF.43static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF);4445/// Replaces the whitespace in front of \p Tok. Only call once for46/// each \c AnnotatedToken.47///48/// \p StartOfTokenColumn is the column at which the token will start after49/// this replacement. It is needed for determining how \p Spaces is turned50/// into tabs and spaces for some format styles.51void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,52unsigned StartOfTokenColumn, bool IsAligned = false,53bool InPPDirective = false);5455/// Adds information about an unchangeable token's whitespace.56///57/// Needs to be called for every token for which \c replaceWhitespace58/// was not called.59void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);6061llvm::Error addReplacement(const tooling::Replacement &Replacement);6263/// Inserts or replaces whitespace in the middle of a token.64///65/// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix66/// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars67/// characters.68///69/// Note: \p Spaces can be negative to retain information about initial70/// relative column offset between a line of a block comment and the start of71/// the comment. This negative offset may be compensated by trailing comment72/// alignment here. In all other cases negative \p Spaces will be truncated to73/// 0.74///75/// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is76/// used to align backslashes correctly.77void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,78unsigned ReplaceChars,79StringRef PreviousPostfix,80StringRef CurrentPrefix, bool InPPDirective,81unsigned Newlines, int Spaces);8283/// Returns all the \c Replacements created during formatting.84const tooling::Replacements &generateReplacements();8586/// Represents a change before a token, a break inside a token,87/// or the layout of an unchanged token (or whitespace within).88struct Change {89/// Functor to sort changes in original source order.90class IsBeforeInFile {91public:92IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}93bool operator()(const Change &C1, const Change &C2) const;9495private:96const SourceManager &SourceMgr;97};9899/// Creates a \c Change.100///101/// The generated \c Change will replace the characters at102/// \p OriginalWhitespaceRange with a concatenation of103/// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces104/// and \p CurrentLinePrefix.105///106/// \p StartOfTokenColumn and \p InPPDirective will be used to lay out107/// trailing comments and escaped newlines.108Change(const FormatToken &Tok, bool CreateReplacement,109SourceRange OriginalWhitespaceRange, int Spaces,110unsigned StartOfTokenColumn, unsigned NewlinesBefore,111StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,112bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);113114// The kind of the token whose whitespace this change replaces, or in which115// this change inserts whitespace.116// FIXME: Currently this is not set correctly for breaks inside comments, as117// the \c BreakableToken is still doing its own alignment.118const FormatToken *Tok;119120bool CreateReplacement;121// Changes might be in the middle of a token, so we cannot just keep the122// FormatToken around to query its information.123SourceRange OriginalWhitespaceRange;124unsigned StartOfTokenColumn;125unsigned NewlinesBefore;126std::string PreviousLinePostfix;127std::string CurrentLinePrefix;128bool IsAligned;129bool ContinuesPPDirective;130131// The number of spaces in front of the token or broken part of the token.132// This will be adapted when aligning tokens.133// Can be negative to retain information about the initial relative offset134// of the lines in a block comment. This is used when aligning trailing135// comments. Uncompensated negative offset is truncated to 0.136int Spaces;137138// If this change is inside of a token but not at the start of the token or139// directly after a newline.140bool IsInsideToken;141142// \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and143// \c EscapedNewlineColumn will be calculated in144// \c calculateLineBreakInformation.145bool IsTrailingComment;146unsigned TokenLength;147unsigned PreviousEndOfTokenColumn;148unsigned EscapedNewlineColumn;149150// These fields are used to retain correct relative line indentation in a151// block comment when aligning trailing comments.152//153// If this Change represents a continuation of a block comment,154// \c StartOfBlockComment is pointer to the first Change in the block155// comment. \c IndentationOffset is a relative column offset to this156// change, so that the correct column can be reconstructed at the end of157// the alignment process.158const Change *StartOfBlockComment;159int IndentationOffset;160161// Depth of conditionals. Computed from tracking fake parenthesis, except162// it does not increase the indent for "chained" conditionals.163int ConditionalsLevel;164165// A combination of indent, nesting and conditionals levels, which are used166// in tandem to compute lexical scope, for the purposes of deciding167// when to stop consecutive alignment runs.168std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {169return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,170ConditionalsLevel);171}172};173174private:175struct CellDescription {176unsigned Index = 0;177unsigned Cell = 0;178unsigned EndIndex = 0;179bool HasSplit = false;180CellDescription *NextColumnElement = nullptr;181182constexpr bool operator==(const CellDescription &Other) const {183return Index == Other.Index && Cell == Other.Cell &&184EndIndex == Other.EndIndex;185}186constexpr bool operator!=(const CellDescription &Other) const {187return !(*this == Other);188}189};190191struct CellDescriptions {192SmallVector<CellDescription> Cells;193SmallVector<unsigned> CellCounts;194unsigned InitialSpaces = 0;195196// Determine if every row in the array197// has the same number of columns.198bool isRectangular() const {199if (CellCounts.size() < 2)200return false;201202for (auto NumberOfColumns : CellCounts)203if (NumberOfColumns != CellCounts[0])204return false;205return true;206}207};208209/// Calculate \c IsTrailingComment, \c TokenLength for the last tokens210/// or token parts in a line and \c PreviousEndOfTokenColumn and211/// \c EscapedNewlineColumn for the first tokens or token parts in a line.212void calculateLineBreakInformation();213214/// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.215void alignConsecutiveMacros();216217/// Align consecutive assignments over all \c Changes.218void alignConsecutiveAssignments();219220/// Align consecutive bitfields over all \c Changes.221void alignConsecutiveBitFields();222223/// Align consecutive colon. For bitfields, TableGen DAGArgs and defintions.224void225alignConsecutiveColons(const FormatStyle::AlignConsecutiveStyle &AlignStyle,226TokenType Type);227228/// Align consecutive declarations over all \c Changes.229void alignConsecutiveDeclarations();230231/// Align consecutive declarations over all \c Changes.232void alignChainedConditionals();233234/// Align consecutive short case statements over all \c Changes.235void alignConsecutiveShortCaseStatements(bool IsExpr);236237/// Align consecutive TableGen DAGArg colon over all \c Changes.238void alignConsecutiveTableGenBreakingDAGArgColons();239240/// Align consecutive TableGen cond operator colon over all \c Changes.241void alignConsecutiveTableGenCondOperatorColons();242243/// Align consecutive TableGen definitions over all \c Changes.244void alignConsecutiveTableGenDefinitions();245246/// Align trailing comments over all \c Changes.247void alignTrailingComments();248249/// Align trailing comments from change \p Start to change \p End at250/// the specified \p Column.251void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);252253/// Align escaped newlines over all \c Changes.254void alignEscapedNewlines();255256/// Align escaped newlines from change \p Start to change \p End at257/// the specified \p Column.258void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);259260/// Align Array Initializers over all \c Changes.261void alignArrayInitializers();262263/// Align Array Initializers from change \p Start to change \p End at264/// the specified \p Column.265void alignArrayInitializers(unsigned Start, unsigned End);266267/// Align Array Initializers being careful to right justify the columns268/// as described by \p CellDescs.269void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs);270271/// Align Array Initializers being careful to left justify the columns272/// as described by \p CellDescs.273void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs);274275/// Calculate the cell width between two indexes.276unsigned calculateCellWidth(unsigned Start, unsigned End,277bool WithSpaces = false) const;278279/// Get a set of fully specified CellDescriptions between \p Start and280/// \p End of the change list.281CellDescriptions getCells(unsigned Start, unsigned End);282283/// Does this \p Cell contain a split element?284static bool isSplitCell(const CellDescription &Cell);285286/// Get the width of the preceding cells from \p Start to \p End.287template <typename I>288auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const {289auto NetWidth = InitialSpaces;290for (auto PrevIter = Start; PrevIter != End; ++PrevIter) {291// If we broke the line the initial spaces are already292// accounted for.293assert(PrevIter->Index < Changes.size());294if (Changes[PrevIter->Index].NewlinesBefore > 0)295NetWidth = 0;296NetWidth +=297calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1;298}299return NetWidth;300}301302/// Get the maximum width of a cell in a sequence of columns.303template <typename I>304unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const {305unsigned CellWidth =306calculateCellWidth(CellIter->Index, CellIter->EndIndex, true);307if (Changes[CellIter->Index].NewlinesBefore == 0)308CellWidth += NetWidth;309for (const auto *Next = CellIter->NextColumnElement; Next;310Next = Next->NextColumnElement) {311auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true);312if (Changes[Next->Index].NewlinesBefore == 0)313ThisWidth += NetWidth;314CellWidth = std::max(CellWidth, ThisWidth);315}316return CellWidth;317}318319/// Get The maximum width of all columns to a given cell.320template <typename I>321unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop,322unsigned InitialSpaces, unsigned CellCount,323unsigned MaxRowCount) const {324auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces);325auto RowCount = 1U;326auto Offset = std::distance(CellStart, CellStop);327for (const auto *Next = CellStop->NextColumnElement; Next;328Next = Next->NextColumnElement) {329if (RowCount >= MaxRowCount)330break;331auto Start = (CellStart + RowCount * CellCount);332auto End = Start + Offset;333MaxNetWidth =334std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces));335++RowCount;336}337return MaxNetWidth;338}339340/// Align a split cell with a newline to the first element in the cell.341void alignToStartOfCell(unsigned Start, unsigned End);342343/// Link the Cell pointers in the list of Cells.344static CellDescriptions linkCells(CellDescriptions &&CellDesc);345346/// Fill \c Replaces with the replacements for all effective changes.347void generateChanges();348349/// Stores \p Text as the replacement for the whitespace in \p Range.350void storeReplacement(SourceRange Range, StringRef Text);351void appendNewlineText(std::string &Text, unsigned Newlines);352void appendEscapedNewlineText(std::string &Text, unsigned Newlines,353unsigned PreviousEndOfTokenColumn,354unsigned EscapedNewlineColumn);355void appendIndentText(std::string &Text, unsigned IndentLevel,356unsigned Spaces, unsigned WhitespaceStartColumn,357bool IsAligned);358unsigned appendTabIndent(std::string &Text, unsigned Spaces,359unsigned Indentation);360361SmallVector<Change, 16> Changes;362const SourceManager &SourceMgr;363tooling::Replacements Replaces;364const FormatStyle &Style;365bool UseCRLF;366};367368} // namespace format369} // namespace clang370371#endif372373374