Path: blob/main/contrib/llvm-project/clang/lib/Format/ContinuationIndenter.h
35233 views
//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file implements an indenter that manages the indentation of10/// continuations.11///12//===----------------------------------------------------------------------===//1314#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H15#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H1617#include "Encoding.h"18#include "FormatToken.h"1920namespace clang {21class SourceManager;2223namespace format {2425class AnnotatedLine;26class BreakableToken;27struct FormatToken;28struct LineState;29struct ParenState;30struct RawStringFormatStyleManager;31class WhitespaceManager;3233struct RawStringFormatStyleManager {34llvm::StringMap<FormatStyle> DelimiterStyle;35llvm::StringMap<FormatStyle> EnclosingFunctionStyle;3637RawStringFormatStyleManager(const FormatStyle &CodeStyle);3839std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;4041std::optional<FormatStyle>42getEnclosingFunctionStyle(StringRef EnclosingFunction) const;43};4445class ContinuationIndenter {46public:47/// Constructs a \c ContinuationIndenter to format \p Line starting in48/// column \p FirstIndent.49ContinuationIndenter(const FormatStyle &Style,50const AdditionalKeywords &Keywords,51const SourceManager &SourceMgr,52WhitespaceManager &Whitespaces,53encoding::Encoding Encoding,54bool BinPackInconclusiveFunctions);5556/// Get the initial state, i.e. the state after placing \p Line's57/// first token at \p FirstIndent. When reformatting a fragment of code, as in58/// the case of formatting inside raw string literals, \p FirstStartColumn is59/// the column at which the state of the parent formatter is.60LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,61const AnnotatedLine *Line, bool DryRun);6263// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a64// better home.65/// Returns \c true, if a line break after \p State is allowed.66bool canBreak(const LineState &State);6768/// Returns \c true, if a line break after \p State is mandatory.69bool mustBreak(const LineState &State);7071/// Appends the next token to \p State and updates information72/// necessary for indentation.73///74/// Puts the token on the current line if \p Newline is \c false and adds a75/// line break and necessary indentation otherwise.76///77/// If \p DryRun is \c false, also creates and stores the required78/// \c Replacement.79unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,80unsigned ExtraSpaces = 0);8182/// Get the column limit for this line. This is the style's column83/// limit, potentially reduced for preprocessor definitions.84unsigned getColumnLimit(const LineState &State) const;8586private:87/// Mark the next token as consumed in \p State and modify its stacks88/// accordingly.89unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);9091/// Update 'State' according to the next token's fake left parentheses.92void moveStatePastFakeLParens(LineState &State, bool Newline);93/// Update 'State' according to the next token's fake r_parens.94void moveStatePastFakeRParens(LineState &State);9596/// Update 'State' according to the next token being one of "(<{[".97void moveStatePastScopeOpener(LineState &State, bool Newline);98/// Update 'State' according to the next token being one of ")>}]".99void moveStatePastScopeCloser(LineState &State);100/// Update 'State' with the next token opening a nested block.101void moveStateToNewBlock(LineState &State, bool NewLine);102103/// Reformats a raw string literal.104///105/// \returns An extra penalty induced by reformatting the token.106unsigned reformatRawStringLiteral(const FormatToken &Current,107LineState &State,108const FormatStyle &RawStringStyle,109bool DryRun, bool Newline);110111/// If the current token is at the end of the current line, handle112/// the transition to the next line.113unsigned handleEndOfLine(const FormatToken &Current, LineState &State,114bool DryRun, bool AllowBreak, bool Newline);115116/// If \p Current is a raw string that is configured to be reformatted,117/// return the style to be used.118std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,119const LineState &State);120121/// If the current token sticks out over the end of the line, break122/// it if possible.123///124/// \returns A pair (penalty, exceeded), where penalty is the extra penalty125/// when tokens are broken or lines exceed the column limit, and exceeded126/// indicates whether the algorithm purposefully left lines exceeding the127/// column limit.128///129/// The returned penalty will cover the cost of the additional line breaks130/// and column limit violation in all lines except for the last one. The131/// penalty for the column limit violation in the last line (and in single132/// line tokens) is handled in \c addNextStateToQueue.133///134/// \p Strict indicates whether reflowing is allowed to leave characters135/// protruding the column limit; if true, lines will be split strictly within136/// the column limit where possible; if false, words are allowed to protrude137/// over the column limit as long as the penalty is less than the penalty138/// of a break.139std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,140LineState &State,141bool AllowBreak, bool DryRun,142bool Strict);143144/// Returns the \c BreakableToken starting at \p Current, or nullptr145/// if the current token cannot be broken.146std::unique_ptr<BreakableToken>147createBreakableToken(const FormatToken &Current, LineState &State,148bool AllowBreak);149150/// Appends the next token to \p State and updates information151/// necessary for indentation.152///153/// Puts the token on the current line.154///155/// If \p DryRun is \c false, also creates and stores the required156/// \c Replacement.157void addTokenOnCurrentLine(LineState &State, bool DryRun,158unsigned ExtraSpaces);159160/// Appends the next token to \p State and updates information161/// necessary for indentation.162///163/// Adds a line break and necessary indentation.164///165/// If \p DryRun is \c false, also creates and stores the required166/// \c Replacement.167unsigned addTokenOnNewLine(LineState &State, bool DryRun);168169/// Calculate the new column for a line wrap before the next token.170unsigned getNewLineColumn(const LineState &State);171172/// Adds a multiline token to the \p State.173///174/// \returns Extra penalty for the first line of the literal: last line is175/// handled in \c addNextStateToQueue, and the penalty for other lines doesn't176/// matter, as we don't change them.177unsigned addMultilineToken(const FormatToken &Current, LineState &State);178179/// Returns \c true if the next token starts a multiline string180/// literal.181///182/// This includes implicitly concatenated strings, strings that will be broken183/// by clang-format and string literals with escaped newlines.184bool nextIsMultilineString(const LineState &State);185186FormatStyle Style;187const AdditionalKeywords &Keywords;188const SourceManager &SourceMgr;189WhitespaceManager &Whitespaces;190encoding::Encoding Encoding;191bool BinPackInconclusiveFunctions;192llvm::Regex CommentPragmasRegex;193const RawStringFormatStyleManager RawStringFormats;194};195196struct ParenState {197ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,198bool AvoidBinPacking, bool NoLineBreak)199: Tok(Tok), Indent(Indent), LastSpace(LastSpace),200NestedBlockIndent(Indent), IsAligned(false),201BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),202AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),203NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),204LastOperatorWrapped(true), ContainsLineBreak(false),205ContainsUnwrappedBuilder(false), AlignColons(true),206ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),207NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),208IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),209IsWrappedConditional(false), UnindentOperator(false) {}210211/// \brief The token opening this parenthesis level, or nullptr if this level212/// is opened by fake parenthesis.213///214/// Not considered for memoization as it will always have the same value at215/// the same token.216const FormatToken *Tok;217218/// The position to which a specific parenthesis level needs to be219/// indented.220unsigned Indent;221222/// The position of the last space on each level.223///224/// Used e.g. to break like:225/// functionCall(Parameter, otherCall(226/// OtherParameter));227unsigned LastSpace;228229/// If a block relative to this parenthesis level gets wrapped, indent230/// it this much.231unsigned NestedBlockIndent;232233/// The position the first "<<" operator encountered on each level.234///235/// Used to align "<<" operators. 0 if no such operator has been encountered236/// on a level.237unsigned FirstLessLess = 0;238239/// The column of a \c ? in a conditional expression;240unsigned QuestionColumn = 0;241242/// The position of the colon in an ObjC method declaration/call.243unsigned ColonPos = 0;244245/// The start of the most recent function in a builder-type call.246unsigned StartOfFunctionCall = 0;247248/// Contains the start of array subscript expressions, so that they249/// can be aligned.250unsigned StartOfArraySubscripts = 0;251252/// If a nested name specifier was broken over multiple lines, this253/// contains the start column of the second line. Otherwise 0.254unsigned NestedNameSpecifierContinuation = 0;255256/// If a call expression was broken over multiple lines, this257/// contains the start column of the second line. Otherwise 0.258unsigned CallContinuation = 0;259260/// The column of the first variable name in a variable declaration.261///262/// Used to align further variables if necessary.263unsigned VariablePos = 0;264265/// Whether this block's indentation is used for alignment.266bool IsAligned : 1;267268/// Whether a newline needs to be inserted before the block's closing269/// brace.270///271/// We only want to insert a newline before the closing brace if there also272/// was a newline after the beginning left brace.273bool BreakBeforeClosingBrace : 1;274275/// Whether a newline needs to be inserted before the block's closing276/// paren.277///278/// We only want to insert a newline before the closing paren if there also279/// was a newline after the beginning left paren.280bool BreakBeforeClosingParen : 1;281282/// Avoid bin packing, i.e. multiple parameters/elements on multiple283/// lines, in this context.284bool AvoidBinPacking : 1;285286/// Break after the next comma (or all the commas in this context if287/// \c AvoidBinPacking is \c true).288bool BreakBeforeParameter : 1;289290/// Line breaking in this context would break a formatting rule.291bool NoLineBreak : 1;292293/// Same as \c NoLineBreak, but is restricted until the end of the294/// operand (including the next ",").295bool NoLineBreakInOperand : 1;296297/// True if the last binary operator on this level was wrapped to the298/// next line.299bool LastOperatorWrapped : 1;300301/// \c true if this \c ParenState already contains a line-break.302///303/// The first line break in a certain \c ParenState causes extra penalty so304/// that clang-format prefers similar breaks, i.e. breaks in the same305/// parenthesis.306bool ContainsLineBreak : 1;307308/// \c true if this \c ParenState contains multiple segments of a309/// builder-type call on one line.310bool ContainsUnwrappedBuilder : 1;311312/// \c true if the colons of the curren ObjC method expression should313/// be aligned.314///315/// Not considered for memoization as it will always have the same value at316/// the same token.317bool AlignColons : 1;318319/// \c true if at least one selector name was found in the current320/// ObjC method expression.321///322/// Not considered for memoization as it will always have the same value at323/// the same token.324bool ObjCSelectorNameFound : 1;325326/// \c true if there are multiple nested blocks inside these parens.327///328/// Not considered for memoization as it will always have the same value at329/// the same token.330bool HasMultipleNestedBlocks : 1;331332/// The start of a nested block (e.g. lambda introducer in C++ or333/// "function" in JavaScript) is not wrapped to a new line.334bool NestedBlockInlined : 1;335336/// \c true if the current \c ParenState represents an Objective-C337/// array literal.338bool IsInsideObjCArrayLiteral : 1;339340bool IsCSharpGenericTypeConstraint : 1;341342/// \brief true if the current \c ParenState represents the false branch of343/// a chained conditional expression (e.g. else-if)344bool IsChainedConditional : 1;345346/// \brief true if there conditionnal was wrapped on the first operator (the347/// question mark)348bool IsWrappedConditional : 1;349350/// \brief Indicates the indent should be reduced by the length of the351/// operator.352bool UnindentOperator : 1;353354bool operator<(const ParenState &Other) const {355if (Indent != Other.Indent)356return Indent < Other.Indent;357if (LastSpace != Other.LastSpace)358return LastSpace < Other.LastSpace;359if (NestedBlockIndent != Other.NestedBlockIndent)360return NestedBlockIndent < Other.NestedBlockIndent;361if (FirstLessLess != Other.FirstLessLess)362return FirstLessLess < Other.FirstLessLess;363if (IsAligned != Other.IsAligned)364return IsAligned;365if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)366return BreakBeforeClosingBrace;367if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)368return BreakBeforeClosingParen;369if (QuestionColumn != Other.QuestionColumn)370return QuestionColumn < Other.QuestionColumn;371if (AvoidBinPacking != Other.AvoidBinPacking)372return AvoidBinPacking;373if (BreakBeforeParameter != Other.BreakBeforeParameter)374return BreakBeforeParameter;375if (NoLineBreak != Other.NoLineBreak)376return NoLineBreak;377if (LastOperatorWrapped != Other.LastOperatorWrapped)378return LastOperatorWrapped;379if (ColonPos != Other.ColonPos)380return ColonPos < Other.ColonPos;381if (StartOfFunctionCall != Other.StartOfFunctionCall)382return StartOfFunctionCall < Other.StartOfFunctionCall;383if (StartOfArraySubscripts != Other.StartOfArraySubscripts)384return StartOfArraySubscripts < Other.StartOfArraySubscripts;385if (CallContinuation != Other.CallContinuation)386return CallContinuation < Other.CallContinuation;387if (VariablePos != Other.VariablePos)388return VariablePos < Other.VariablePos;389if (ContainsLineBreak != Other.ContainsLineBreak)390return ContainsLineBreak;391if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)392return ContainsUnwrappedBuilder;393if (NestedBlockInlined != Other.NestedBlockInlined)394return NestedBlockInlined;395if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)396return IsCSharpGenericTypeConstraint;397if (IsChainedConditional != Other.IsChainedConditional)398return IsChainedConditional;399if (IsWrappedConditional != Other.IsWrappedConditional)400return IsWrappedConditional;401if (UnindentOperator != Other.UnindentOperator)402return UnindentOperator;403return false;404}405};406407/// The current state when indenting a unwrapped line.408///409/// As the indenting tries different combinations this is copied by value.410struct LineState {411/// The number of used columns in the current line.412unsigned Column;413414/// The token that needs to be next formatted.415FormatToken *NextToken;416417/// \c true if \p NextToken should not continue this line.418bool NoContinuation;419420/// The \c NestingLevel at the start of this line.421unsigned StartOfLineLevel;422423/// The lowest \c NestingLevel on the current line.424unsigned LowestLevelOnLine;425426/// The start column of the string literal, if we're in a string427/// literal sequence, 0 otherwise.428unsigned StartOfStringLiteral;429430/// Disallow line breaks for this line.431bool NoLineBreak;432433/// A stack keeping track of properties applying to parenthesis434/// levels.435SmallVector<ParenState> Stack;436437/// Ignore the stack of \c ParenStates for state comparison.438///439/// In long and deeply nested unwrapped lines, the current algorithm can440/// be insufficient for finding the best formatting with a reasonable amount441/// of time and memory. Setting this flag will effectively lead to the442/// algorithm not analyzing some combinations. However, these combinations443/// rarely contain the optimal solution: In short, accepting a higher444/// penalty early would need to lead to different values in the \c445/// ParenState stack (in an otherwise identical state) and these different446/// values would need to lead to a significant amount of avoided penalty447/// later.448///449/// FIXME: Come up with a better algorithm instead.450bool IgnoreStackForComparison;451452/// The indent of the first token.453unsigned FirstIndent;454455/// The line that is being formatted.456///457/// Does not need to be considered for memoization because it doesn't change.458const AnnotatedLine *Line;459460/// Comparison operator to be able to used \c LineState in \c map.461bool operator<(const LineState &Other) const {462if (NextToken != Other.NextToken)463return NextToken < Other.NextToken;464if (Column != Other.Column)465return Column < Other.Column;466if (NoContinuation != Other.NoContinuation)467return NoContinuation;468if (StartOfLineLevel != Other.StartOfLineLevel)469return StartOfLineLevel < Other.StartOfLineLevel;470if (LowestLevelOnLine != Other.LowestLevelOnLine)471return LowestLevelOnLine < Other.LowestLevelOnLine;472if (StartOfStringLiteral != Other.StartOfStringLiteral)473return StartOfStringLiteral < Other.StartOfStringLiteral;474if (IgnoreStackForComparison || Other.IgnoreStackForComparison)475return false;476return Stack < Other.Stack;477}478};479480} // end namespace format481} // end namespace clang482483#endif484485486