Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Format/ContinuationIndenter.h
35233 views
1
//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file implements an indenter that manages the indentation of
11
/// continuations.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16
#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17
18
#include "Encoding.h"
19
#include "FormatToken.h"
20
21
namespace clang {
22
class SourceManager;
23
24
namespace format {
25
26
class AnnotatedLine;
27
class BreakableToken;
28
struct FormatToken;
29
struct LineState;
30
struct ParenState;
31
struct RawStringFormatStyleManager;
32
class WhitespaceManager;
33
34
struct RawStringFormatStyleManager {
35
llvm::StringMap<FormatStyle> DelimiterStyle;
36
llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37
38
RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39
40
std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41
42
std::optional<FormatStyle>
43
getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44
};
45
46
class ContinuationIndenter {
47
public:
48
/// Constructs a \c ContinuationIndenter to format \p Line starting in
49
/// column \p FirstIndent.
50
ContinuationIndenter(const FormatStyle &Style,
51
const AdditionalKeywords &Keywords,
52
const SourceManager &SourceMgr,
53
WhitespaceManager &Whitespaces,
54
encoding::Encoding Encoding,
55
bool BinPackInconclusiveFunctions);
56
57
/// Get the initial state, i.e. the state after placing \p Line's
58
/// first token at \p FirstIndent. When reformatting a fragment of code, as in
59
/// the case of formatting inside raw string literals, \p FirstStartColumn is
60
/// the column at which the state of the parent formatter is.
61
LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62
const AnnotatedLine *Line, bool DryRun);
63
64
// FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65
// better home.
66
/// Returns \c true, if a line break after \p State is allowed.
67
bool canBreak(const LineState &State);
68
69
/// Returns \c true, if a line break after \p State is mandatory.
70
bool mustBreak(const LineState &State);
71
72
/// Appends the next token to \p State and updates information
73
/// necessary for indentation.
74
///
75
/// Puts the token on the current line if \p Newline is \c false and adds a
76
/// line break and necessary indentation otherwise.
77
///
78
/// If \p DryRun is \c false, also creates and stores the required
79
/// \c Replacement.
80
unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81
unsigned ExtraSpaces = 0);
82
83
/// Get the column limit for this line. This is the style's column
84
/// limit, potentially reduced for preprocessor definitions.
85
unsigned getColumnLimit(const LineState &State) const;
86
87
private:
88
/// Mark the next token as consumed in \p State and modify its stacks
89
/// accordingly.
90
unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91
92
/// Update 'State' according to the next token's fake left parentheses.
93
void moveStatePastFakeLParens(LineState &State, bool Newline);
94
/// Update 'State' according to the next token's fake r_parens.
95
void moveStatePastFakeRParens(LineState &State);
96
97
/// Update 'State' according to the next token being one of "(<{[".
98
void moveStatePastScopeOpener(LineState &State, bool Newline);
99
/// Update 'State' according to the next token being one of ")>}]".
100
void moveStatePastScopeCloser(LineState &State);
101
/// Update 'State' with the next token opening a nested block.
102
void moveStateToNewBlock(LineState &State, bool NewLine);
103
104
/// Reformats a raw string literal.
105
///
106
/// \returns An extra penalty induced by reformatting the token.
107
unsigned reformatRawStringLiteral(const FormatToken &Current,
108
LineState &State,
109
const FormatStyle &RawStringStyle,
110
bool DryRun, bool Newline);
111
112
/// If the current token is at the end of the current line, handle
113
/// the transition to the next line.
114
unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115
bool DryRun, bool AllowBreak, bool Newline);
116
117
/// If \p Current is a raw string that is configured to be reformatted,
118
/// return the style to be used.
119
std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120
const LineState &State);
121
122
/// If the current token sticks out over the end of the line, break
123
/// it if possible.
124
///
125
/// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126
/// when tokens are broken or lines exceed the column limit, and exceeded
127
/// indicates whether the algorithm purposefully left lines exceeding the
128
/// column limit.
129
///
130
/// The returned penalty will cover the cost of the additional line breaks
131
/// and column limit violation in all lines except for the last one. The
132
/// penalty for the column limit violation in the last line (and in single
133
/// line tokens) is handled in \c addNextStateToQueue.
134
///
135
/// \p Strict indicates whether reflowing is allowed to leave characters
136
/// protruding the column limit; if true, lines will be split strictly within
137
/// the column limit where possible; if false, words are allowed to protrude
138
/// over the column limit as long as the penalty is less than the penalty
139
/// of a break.
140
std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141
LineState &State,
142
bool AllowBreak, bool DryRun,
143
bool Strict);
144
145
/// Returns the \c BreakableToken starting at \p Current, or nullptr
146
/// if the current token cannot be broken.
147
std::unique_ptr<BreakableToken>
148
createBreakableToken(const FormatToken &Current, LineState &State,
149
bool AllowBreak);
150
151
/// Appends the next token to \p State and updates information
152
/// necessary for indentation.
153
///
154
/// Puts the token on the current line.
155
///
156
/// If \p DryRun is \c false, also creates and stores the required
157
/// \c Replacement.
158
void addTokenOnCurrentLine(LineState &State, bool DryRun,
159
unsigned ExtraSpaces);
160
161
/// Appends the next token to \p State and updates information
162
/// necessary for indentation.
163
///
164
/// Adds a line break and necessary indentation.
165
///
166
/// If \p DryRun is \c false, also creates and stores the required
167
/// \c Replacement.
168
unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169
170
/// Calculate the new column for a line wrap before the next token.
171
unsigned getNewLineColumn(const LineState &State);
172
173
/// Adds a multiline token to the \p State.
174
///
175
/// \returns Extra penalty for the first line of the literal: last line is
176
/// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177
/// matter, as we don't change them.
178
unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179
180
/// Returns \c true if the next token starts a multiline string
181
/// literal.
182
///
183
/// This includes implicitly concatenated strings, strings that will be broken
184
/// by clang-format and string literals with escaped newlines.
185
bool nextIsMultilineString(const LineState &State);
186
187
FormatStyle Style;
188
const AdditionalKeywords &Keywords;
189
const SourceManager &SourceMgr;
190
WhitespaceManager &Whitespaces;
191
encoding::Encoding Encoding;
192
bool BinPackInconclusiveFunctions;
193
llvm::Regex CommentPragmasRegex;
194
const RawStringFormatStyleManager RawStringFormats;
195
};
196
197
struct ParenState {
198
ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199
bool AvoidBinPacking, bool NoLineBreak)
200
: Tok(Tok), Indent(Indent), LastSpace(LastSpace),
201
NestedBlockIndent(Indent), IsAligned(false),
202
BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
203
AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
204
NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
205
LastOperatorWrapped(true), ContainsLineBreak(false),
206
ContainsUnwrappedBuilder(false), AlignColons(true),
207
ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
208
NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
209
IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
210
IsWrappedConditional(false), UnindentOperator(false) {}
211
212
/// \brief The token opening this parenthesis level, or nullptr if this level
213
/// is opened by fake parenthesis.
214
///
215
/// Not considered for memoization as it will always have the same value at
216
/// the same token.
217
const FormatToken *Tok;
218
219
/// The position to which a specific parenthesis level needs to be
220
/// indented.
221
unsigned Indent;
222
223
/// The position of the last space on each level.
224
///
225
/// Used e.g. to break like:
226
/// functionCall(Parameter, otherCall(
227
/// OtherParameter));
228
unsigned LastSpace;
229
230
/// If a block relative to this parenthesis level gets wrapped, indent
231
/// it this much.
232
unsigned NestedBlockIndent;
233
234
/// The position the first "<<" operator encountered on each level.
235
///
236
/// Used to align "<<" operators. 0 if no such operator has been encountered
237
/// on a level.
238
unsigned FirstLessLess = 0;
239
240
/// The column of a \c ? in a conditional expression;
241
unsigned QuestionColumn = 0;
242
243
/// The position of the colon in an ObjC method declaration/call.
244
unsigned ColonPos = 0;
245
246
/// The start of the most recent function in a builder-type call.
247
unsigned StartOfFunctionCall = 0;
248
249
/// Contains the start of array subscript expressions, so that they
250
/// can be aligned.
251
unsigned StartOfArraySubscripts = 0;
252
253
/// If a nested name specifier was broken over multiple lines, this
254
/// contains the start column of the second line. Otherwise 0.
255
unsigned NestedNameSpecifierContinuation = 0;
256
257
/// If a call expression was broken over multiple lines, this
258
/// contains the start column of the second line. Otherwise 0.
259
unsigned CallContinuation = 0;
260
261
/// The column of the first variable name in a variable declaration.
262
///
263
/// Used to align further variables if necessary.
264
unsigned VariablePos = 0;
265
266
/// Whether this block's indentation is used for alignment.
267
bool IsAligned : 1;
268
269
/// Whether a newline needs to be inserted before the block's closing
270
/// brace.
271
///
272
/// We only want to insert a newline before the closing brace if there also
273
/// was a newline after the beginning left brace.
274
bool BreakBeforeClosingBrace : 1;
275
276
/// Whether a newline needs to be inserted before the block's closing
277
/// paren.
278
///
279
/// We only want to insert a newline before the closing paren if there also
280
/// was a newline after the beginning left paren.
281
bool BreakBeforeClosingParen : 1;
282
283
/// Avoid bin packing, i.e. multiple parameters/elements on multiple
284
/// lines, in this context.
285
bool AvoidBinPacking : 1;
286
287
/// Break after the next comma (or all the commas in this context if
288
/// \c AvoidBinPacking is \c true).
289
bool BreakBeforeParameter : 1;
290
291
/// Line breaking in this context would break a formatting rule.
292
bool NoLineBreak : 1;
293
294
/// Same as \c NoLineBreak, but is restricted until the end of the
295
/// operand (including the next ",").
296
bool NoLineBreakInOperand : 1;
297
298
/// True if the last binary operator on this level was wrapped to the
299
/// next line.
300
bool LastOperatorWrapped : 1;
301
302
/// \c true if this \c ParenState already contains a line-break.
303
///
304
/// The first line break in a certain \c ParenState causes extra penalty so
305
/// that clang-format prefers similar breaks, i.e. breaks in the same
306
/// parenthesis.
307
bool ContainsLineBreak : 1;
308
309
/// \c true if this \c ParenState contains multiple segments of a
310
/// builder-type call on one line.
311
bool ContainsUnwrappedBuilder : 1;
312
313
/// \c true if the colons of the curren ObjC method expression should
314
/// be aligned.
315
///
316
/// Not considered for memoization as it will always have the same value at
317
/// the same token.
318
bool AlignColons : 1;
319
320
/// \c true if at least one selector name was found in the current
321
/// ObjC method expression.
322
///
323
/// Not considered for memoization as it will always have the same value at
324
/// the same token.
325
bool ObjCSelectorNameFound : 1;
326
327
/// \c true if there are multiple nested blocks inside these parens.
328
///
329
/// Not considered for memoization as it will always have the same value at
330
/// the same token.
331
bool HasMultipleNestedBlocks : 1;
332
333
/// The start of a nested block (e.g. lambda introducer in C++ or
334
/// "function" in JavaScript) is not wrapped to a new line.
335
bool NestedBlockInlined : 1;
336
337
/// \c true if the current \c ParenState represents an Objective-C
338
/// array literal.
339
bool IsInsideObjCArrayLiteral : 1;
340
341
bool IsCSharpGenericTypeConstraint : 1;
342
343
/// \brief true if the current \c ParenState represents the false branch of
344
/// a chained conditional expression (e.g. else-if)
345
bool IsChainedConditional : 1;
346
347
/// \brief true if there conditionnal was wrapped on the first operator (the
348
/// question mark)
349
bool IsWrappedConditional : 1;
350
351
/// \brief Indicates the indent should be reduced by the length of the
352
/// operator.
353
bool UnindentOperator : 1;
354
355
bool operator<(const ParenState &Other) const {
356
if (Indent != Other.Indent)
357
return Indent < Other.Indent;
358
if (LastSpace != Other.LastSpace)
359
return LastSpace < Other.LastSpace;
360
if (NestedBlockIndent != Other.NestedBlockIndent)
361
return NestedBlockIndent < Other.NestedBlockIndent;
362
if (FirstLessLess != Other.FirstLessLess)
363
return FirstLessLess < Other.FirstLessLess;
364
if (IsAligned != Other.IsAligned)
365
return IsAligned;
366
if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
367
return BreakBeforeClosingBrace;
368
if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
369
return BreakBeforeClosingParen;
370
if (QuestionColumn != Other.QuestionColumn)
371
return QuestionColumn < Other.QuestionColumn;
372
if (AvoidBinPacking != Other.AvoidBinPacking)
373
return AvoidBinPacking;
374
if (BreakBeforeParameter != Other.BreakBeforeParameter)
375
return BreakBeforeParameter;
376
if (NoLineBreak != Other.NoLineBreak)
377
return NoLineBreak;
378
if (LastOperatorWrapped != Other.LastOperatorWrapped)
379
return LastOperatorWrapped;
380
if (ColonPos != Other.ColonPos)
381
return ColonPos < Other.ColonPos;
382
if (StartOfFunctionCall != Other.StartOfFunctionCall)
383
return StartOfFunctionCall < Other.StartOfFunctionCall;
384
if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
385
return StartOfArraySubscripts < Other.StartOfArraySubscripts;
386
if (CallContinuation != Other.CallContinuation)
387
return CallContinuation < Other.CallContinuation;
388
if (VariablePos != Other.VariablePos)
389
return VariablePos < Other.VariablePos;
390
if (ContainsLineBreak != Other.ContainsLineBreak)
391
return ContainsLineBreak;
392
if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
393
return ContainsUnwrappedBuilder;
394
if (NestedBlockInlined != Other.NestedBlockInlined)
395
return NestedBlockInlined;
396
if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
397
return IsCSharpGenericTypeConstraint;
398
if (IsChainedConditional != Other.IsChainedConditional)
399
return IsChainedConditional;
400
if (IsWrappedConditional != Other.IsWrappedConditional)
401
return IsWrappedConditional;
402
if (UnindentOperator != Other.UnindentOperator)
403
return UnindentOperator;
404
return false;
405
}
406
};
407
408
/// The current state when indenting a unwrapped line.
409
///
410
/// As the indenting tries different combinations this is copied by value.
411
struct LineState {
412
/// The number of used columns in the current line.
413
unsigned Column;
414
415
/// The token that needs to be next formatted.
416
FormatToken *NextToken;
417
418
/// \c true if \p NextToken should not continue this line.
419
bool NoContinuation;
420
421
/// The \c NestingLevel at the start of this line.
422
unsigned StartOfLineLevel;
423
424
/// The lowest \c NestingLevel on the current line.
425
unsigned LowestLevelOnLine;
426
427
/// The start column of the string literal, if we're in a string
428
/// literal sequence, 0 otherwise.
429
unsigned StartOfStringLiteral;
430
431
/// Disallow line breaks for this line.
432
bool NoLineBreak;
433
434
/// A stack keeping track of properties applying to parenthesis
435
/// levels.
436
SmallVector<ParenState> Stack;
437
438
/// Ignore the stack of \c ParenStates for state comparison.
439
///
440
/// In long and deeply nested unwrapped lines, the current algorithm can
441
/// be insufficient for finding the best formatting with a reasonable amount
442
/// of time and memory. Setting this flag will effectively lead to the
443
/// algorithm not analyzing some combinations. However, these combinations
444
/// rarely contain the optimal solution: In short, accepting a higher
445
/// penalty early would need to lead to different values in the \c
446
/// ParenState stack (in an otherwise identical state) and these different
447
/// values would need to lead to a significant amount of avoided penalty
448
/// later.
449
///
450
/// FIXME: Come up with a better algorithm instead.
451
bool IgnoreStackForComparison;
452
453
/// The indent of the first token.
454
unsigned FirstIndent;
455
456
/// The line that is being formatted.
457
///
458
/// Does not need to be considered for memoization because it doesn't change.
459
const AnnotatedLine *Line;
460
461
/// Comparison operator to be able to used \c LineState in \c map.
462
bool operator<(const LineState &Other) const {
463
if (NextToken != Other.NextToken)
464
return NextToken < Other.NextToken;
465
if (Column != Other.Column)
466
return Column < Other.Column;
467
if (NoContinuation != Other.NoContinuation)
468
return NoContinuation;
469
if (StartOfLineLevel != Other.StartOfLineLevel)
470
return StartOfLineLevel < Other.StartOfLineLevel;
471
if (LowestLevelOnLine != Other.LowestLevelOnLine)
472
return LowestLevelOnLine < Other.LowestLevelOnLine;
473
if (StartOfStringLiteral != Other.StartOfStringLiteral)
474
return StartOfStringLiteral < Other.StartOfStringLiteral;
475
if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
476
return false;
477
return Stack < Other.Stack;
478
}
479
};
480
481
} // end namespace format
482
} // end namespace clang
483
484
#endif
485
486