Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Format/BreakableToken.cpp
35233 views
1
//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// Contains implementation of BreakableToken class and classes derived
11
/// from it.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#include "BreakableToken.h"
16
#include "ContinuationIndenter.h"
17
#include "clang/Basic/CharInfo.h"
18
#include "clang/Format/Format.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/Support/Debug.h"
21
#include <algorithm>
22
23
#define DEBUG_TYPE "format-token-breaker"
24
25
namespace clang {
26
namespace format {
27
28
static constexpr StringRef Blanks = " \t\v\f\r";
29
static bool IsBlank(char C) {
30
switch (C) {
31
case ' ':
32
case '\t':
33
case '\v':
34
case '\f':
35
case '\r':
36
return true;
37
default:
38
return false;
39
}
40
}
41
42
static StringRef getLineCommentIndentPrefix(StringRef Comment,
43
const FormatStyle &Style) {
44
static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45
"//!", "//:", "//"};
46
static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47
"//", "#"};
48
ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
49
if (Style.Language == FormatStyle::LK_TextProto)
50
KnownPrefixes = KnownTextProtoPrefixes;
51
52
assert(
53
llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54
return Lhs.size() > Rhs.size();
55
}));
56
57
for (StringRef KnownPrefix : KnownPrefixes) {
58
if (Comment.starts_with(KnownPrefix)) {
59
const auto PrefixLength =
60
Comment.find_first_not_of(' ', KnownPrefix.size());
61
return Comment.substr(0, PrefixLength);
62
}
63
}
64
return {};
65
}
66
67
static BreakableToken::Split
68
getCommentSplit(StringRef Text, unsigned ContentStartColumn,
69
unsigned ColumnLimit, unsigned TabWidth,
70
encoding::Encoding Encoding, const FormatStyle &Style,
71
bool DecorationEndsWithStar = false) {
72
LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73
<< "\", Column limit: " << ColumnLimit
74
<< ", Content start: " << ContentStartColumn << "\n");
75
if (ColumnLimit <= ContentStartColumn + 1)
76
return BreakableToken::Split(StringRef::npos, 0);
77
78
unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79
unsigned MaxSplitBytes = 0;
80
81
for (unsigned NumChars = 0;
82
NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83
unsigned BytesInChar =
84
encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
85
NumChars += encoding::columnWidthWithTabs(
86
Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
87
TabWidth, Encoding);
88
MaxSplitBytes += BytesInChar;
89
}
90
91
// In JavaScript, some @tags can be followed by {, and machinery that parses
92
// these comments will fail to understand the comment if followed by a line
93
// break. So avoid ever breaking before a {.
94
if (Style.isJavaScript()) {
95
StringRef::size_type SpaceOffset =
96
Text.find_first_of(Blanks, MaxSplitBytes);
97
if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98
Text[SpaceOffset + 1] == '{') {
99
MaxSplitBytes = SpaceOffset + 1;
100
}
101
}
102
103
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104
105
static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106
// Some spaces are unacceptable to break on, rewind past them.
107
while (SpaceOffset != StringRef::npos) {
108
// If a line-comment ends with `\`, the next line continues the comment,
109
// whether or not it starts with `//`. This is confusing and triggers
110
// -Wcomment.
111
// Avoid introducing multiline comments by not allowing a break right
112
// after '\'.
113
if (Style.isCpp()) {
114
StringRef::size_type LastNonBlank =
115
Text.find_last_not_of(Blanks, SpaceOffset);
116
if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117
SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118
continue;
119
}
120
}
121
122
// Do not split before a number followed by a dot: this would be interpreted
123
// as a numbered list, which would prevent re-flowing in subsequent passes.
124
if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125
SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126
continue;
127
}
128
129
// Avoid ever breaking before a @tag or a { in JavaScript.
130
if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131
(Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132
SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133
continue;
134
}
135
136
break;
137
}
138
139
if (SpaceOffset == StringRef::npos ||
140
// Don't break at leading whitespace.
141
Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142
// Make sure that we don't break at leading whitespace that
143
// reaches past MaxSplit.
144
StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145
if (FirstNonWhitespace == StringRef::npos) {
146
// If the comment is only whitespace, we cannot split.
147
return BreakableToken::Split(StringRef::npos, 0);
148
}
149
SpaceOffset = Text.find_first_of(
150
Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
151
}
152
if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153
// adaptStartOfLine will break after lines starting with /** if the comment
154
// is broken anywhere. Avoid emitting this break twice here.
155
// Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
156
// insert a break after /**, so this code must not insert the same break.
157
if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
158
return BreakableToken::Split(StringRef::npos, 0);
159
StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160
StringRef AfterCut = Text.substr(SpaceOffset);
161
// Don't trim the leading blanks if it would create a */ after the break.
162
if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163
AfterCut = AfterCut.ltrim(Blanks);
164
return BreakableToken::Split(BeforeCut.size(),
165
AfterCut.begin() - BeforeCut.end());
166
}
167
return BreakableToken::Split(StringRef::npos, 0);
168
}
169
170
static BreakableToken::Split
171
getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
172
unsigned TabWidth, encoding::Encoding Encoding) {
173
// FIXME: Reduce unit test case.
174
if (Text.empty())
175
return BreakableToken::Split(StringRef::npos, 0);
176
if (ColumnLimit <= UsedColumns)
177
return BreakableToken::Split(StringRef::npos, 0);
178
unsigned MaxSplit = ColumnLimit - UsedColumns;
179
StringRef::size_type SpaceOffset = 0;
180
StringRef::size_type SlashOffset = 0;
181
StringRef::size_type WordStartOffset = 0;
182
StringRef::size_type SplitPoint = 0;
183
for (unsigned Chars = 0;;) {
184
unsigned Advance;
185
if (Text[0] == '\\') {
186
Advance = encoding::getEscapeSequenceLength(Text);
187
Chars += Advance;
188
} else {
189
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
190
Chars += encoding::columnWidthWithTabs(
191
Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192
}
193
194
if (Chars > MaxSplit || Text.size() <= Advance)
195
break;
196
197
if (IsBlank(Text[0]))
198
SpaceOffset = SplitPoint;
199
if (Text[0] == '/')
200
SlashOffset = SplitPoint;
201
if (Advance == 1 && !isAlphanumeric(Text[0]))
202
WordStartOffset = SplitPoint;
203
204
SplitPoint += Advance;
205
Text = Text.substr(Advance);
206
}
207
208
if (SpaceOffset != 0)
209
return BreakableToken::Split(SpaceOffset + 1, 0);
210
if (SlashOffset != 0)
211
return BreakableToken::Split(SlashOffset + 1, 0);
212
if (WordStartOffset != 0)
213
return BreakableToken::Split(WordStartOffset + 1, 0);
214
if (SplitPoint != 0)
215
return BreakableToken::Split(SplitPoint, 0);
216
return BreakableToken::Split(StringRef::npos, 0);
217
}
218
219
bool switchesFormatting(const FormatToken &Token) {
220
assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221
"formatting regions are switched by comment tokens");
222
StringRef Content = Token.TokenText.substr(2).ltrim();
223
return Content.starts_with("clang-format on") ||
224
Content.starts_with("clang-format off");
225
}
226
227
unsigned
228
BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
229
Split Split) const {
230
// Example: consider the content
231
// lala lala
232
// - RemainingTokenColumns is the original number of columns, 10;
233
// - Split is (4, 2), denoting the two spaces between the two words;
234
//
235
// We compute the number of columns when the split is compressed into a single
236
// space, like:
237
// lala lala
238
//
239
// FIXME: Correctly measure the length of whitespace in Split.second so it
240
// works with tabs.
241
return RemainingTokenColumns + 1 - Split.second;
242
}
243
244
unsigned BreakableStringLiteral::getLineCount() const { return 1; }
245
246
unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
247
unsigned Offset,
248
StringRef::size_type Length,
249
unsigned StartColumn) const {
250
llvm_unreachable("Getting the length of a part of the string literal "
251
"indicates that the code tries to reflow it.");
252
}
253
254
unsigned
255
BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
256
unsigned StartColumn) const {
257
return UnbreakableTailLength + Postfix.size() +
258
encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
259
Style.TabWidth, Encoding);
260
}
261
262
unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
263
bool Break) const {
264
return StartColumn + Prefix.size();
265
}
266
267
BreakableStringLiteral::BreakableStringLiteral(
268
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269
StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
270
encoding::Encoding Encoding, const FormatStyle &Style)
271
: BreakableToken(Tok, InPPDirective, Encoding, Style),
272
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273
UnbreakableTailLength(UnbreakableTailLength) {
274
assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
275
Line = Tok.TokenText.substr(
276
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
277
}
278
279
BreakableToken::Split BreakableStringLiteral::getSplit(
280
unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281
unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
282
return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
283
ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
284
}
285
286
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
287
unsigned TailOffset, Split Split,
288
unsigned ContentIndent,
289
WhitespaceManager &Whitespaces) const {
290
Whitespaces.replaceWhitespaceInToken(
291
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
292
Prefix, InPPDirective, 1, StartColumn);
293
}
294
295
BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
296
const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
297
unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
298
encoding::Encoding Encoding, const FormatStyle &Style)
299
: BreakableStringLiteral(
300
Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
301
: QuoteStyle == AtDoubleQuotes ? "@\""
302
: "\"",
303
/*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
304
UnbreakableTailLength, InPPDirective, Encoding, Style),
305
BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
306
QuoteStyle(QuoteStyle) {
307
// Find the replacement text for inserting braces and quotes and line breaks.
308
// We don't create an allocated string concatenated from parts here because it
309
// has to outlive the BreakableStringliteral object. The brace replacements
310
// include a quote so that WhitespaceManager can tell it apart from whitespace
311
// replacements between the string and surrounding tokens.
312
313
// The option is not implemented in JavaScript.
314
bool SignOnNewLine =
315
!Style.isJavaScript() &&
316
Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
317
318
if (Style.isVerilog()) {
319
// In Verilog, all strings are quoted by double quotes, joined by commas,
320
// and wrapped in braces. The comma is always before the newline.
321
assert(QuoteStyle == DoubleQuotes);
322
LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
323
RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
324
Postfix = "\",";
325
Prefix = "\"";
326
} else {
327
// The plus sign may be on either line. And also C# and JavaScript have
328
// several quoting styles.
329
if (QuoteStyle == SingleQuotes) {
330
LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
331
RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
332
Postfix = SignOnNewLine ? "'" : "' +";
333
Prefix = SignOnNewLine ? "+ '" : "'";
334
} else {
335
if (QuoteStyle == AtDoubleQuotes) {
336
LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
337
Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338
} else {
339
LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
340
Prefix = SignOnNewLine ? "+ \"" : "\"";
341
}
342
RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
343
Postfix = SignOnNewLine ? "\"" : "\" +";
344
}
345
}
346
347
// Following lines are indented by the width of the brace and space if any.
348
ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
349
// The plus sign may need to be unindented depending on the style.
350
// FIXME: Add support for DontAlign.
351
if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
352
Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
353
ContinuationIndent -= 2;
354
}
355
}
356
357
unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
358
unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
359
return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
360
encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
361
Style.TabWidth, Encoding);
362
}
363
364
unsigned
365
BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
366
bool Break) const {
367
return std::max(
368
0,
369
static_cast<int>(StartColumn) +
370
(Break ? ContinuationIndent + static_cast<int>(Prefix.size())
371
: (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
372
: 0) +
373
(QuoteStyle == AtDoubleQuotes ? 2 : 1)));
374
}
375
376
void BreakableStringLiteralUsingOperators::insertBreak(
377
unsigned LineIndex, unsigned TailOffset, Split Split,
378
unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
379
Whitespaces.replaceWhitespaceInToken(
380
Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
381
Split.first,
382
/*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
383
/*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
384
/*Spaces=*/
385
std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
386
}
387
388
void BreakableStringLiteralUsingOperators::updateAfterBroken(
389
WhitespaceManager &Whitespaces) const {
390
// Add the braces required for breaking the token if they are needed.
391
if (!BracesNeeded)
392
return;
393
394
// To add a brace or parenthesis, we replace the quote (or the at sign) with a
395
// brace and another quote. This is because the rest of the program requires
396
// one replacement for each source range. If we replace the empty strings
397
// around the string, it may conflict with whitespace replacements between the
398
// string and adjacent tokens.
399
Whitespaces.replaceWhitespaceInToken(
400
Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
401
/*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
402
/*Spaces=*/0);
403
Whitespaces.replaceWhitespaceInToken(
404
Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
405
/*PreviousPostfix=*/RightBraceQuote,
406
/*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
407
}
408
409
BreakableComment::BreakableComment(const FormatToken &Token,
410
unsigned StartColumn, bool InPPDirective,
411
encoding::Encoding Encoding,
412
const FormatStyle &Style)
413
: BreakableToken(Token, InPPDirective, Encoding, Style),
414
StartColumn(StartColumn) {}
415
416
unsigned BreakableComment::getLineCount() const { return Lines.size(); }
417
418
BreakableToken::Split
419
BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
420
unsigned ColumnLimit, unsigned ContentStartColumn,
421
const llvm::Regex &CommentPragmasRegex) const {
422
// Don't break lines matching the comment pragmas regex.
423
if (CommentPragmasRegex.match(Content[LineIndex]))
424
return Split(StringRef::npos, 0);
425
return getCommentSplit(Content[LineIndex].substr(TailOffset),
426
ContentStartColumn, ColumnLimit, Style.TabWidth,
427
Encoding, Style);
428
}
429
430
void BreakableComment::compressWhitespace(
431
unsigned LineIndex, unsigned TailOffset, Split Split,
432
WhitespaceManager &Whitespaces) const {
433
StringRef Text = Content[LineIndex].substr(TailOffset);
434
// Text is relative to the content line, but Whitespaces operates relative to
435
// the start of the corresponding token, so compute the start of the Split
436
// that needs to be compressed into a single space relative to the start of
437
// its token.
438
unsigned BreakOffsetInToken =
439
Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
440
unsigned CharsToRemove = Split.second;
441
Whitespaces.replaceWhitespaceInToken(
442
tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
443
/*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
444
}
445
446
const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
447
return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
448
}
449
450
static bool mayReflowContent(StringRef Content) {
451
Content = Content.trim(Blanks);
452
// Lines starting with '@' or '\' commonly have special meaning.
453
// Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
454
bool hasSpecialMeaningPrefix = false;
455
for (StringRef Prefix :
456
{"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457
if (Content.starts_with(Prefix)) {
458
hasSpecialMeaningPrefix = true;
459
break;
460
}
461
}
462
463
// Numbered lists may also start with a number followed by '.'
464
// To avoid issues if a line starts with a number which is actually the end
465
// of a previous line, we only consider numbers with up to 2 digits.
466
static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
467
hasSpecialMeaningPrefix =
468
hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
469
470
// Simple heuristic for what to reflow: content should contain at least two
471
// characters and either the first or second character must be
472
// non-punctuation.
473
return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
474
!Content.ends_with("\\") &&
475
// Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
476
// true, then the first code point must be 1 byte long.
477
(!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
478
}
479
480
BreakableBlockComment::BreakableBlockComment(
481
const FormatToken &Token, unsigned StartColumn,
482
unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
483
encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
484
: BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
485
DelimitersOnNewline(false),
486
UnbreakableTailLength(Token.UnbreakableTailLength) {
487
assert(Tok.is(TT_BlockComment) &&
488
"block comment section must start with a block comment");
489
490
StringRef TokenText(Tok.TokenText);
491
assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492
TokenText.substr(2, TokenText.size() - 4)
493
.split(Lines, UseCRLF ? "\r\n" : "\n");
494
495
int IndentDelta = StartColumn - OriginalStartColumn;
496
Content.resize(Lines.size());
497
Content[0] = Lines[0];
498
ContentColumn.resize(Lines.size());
499
// Account for the initial '/*'.
500
ContentColumn[0] = StartColumn + 2;
501
Tokens.resize(Lines.size());
502
for (size_t i = 1; i < Lines.size(); ++i)
503
adjustWhitespace(i, IndentDelta);
504
505
// Align decorations with the column of the star on the first line,
506
// that is one column after the start "/*".
507
DecorationColumn = StartColumn + 1;
508
509
// Account for comment decoration patterns like this:
510
//
511
// /*
512
// ** blah blah blah
513
// */
514
if (Lines.size() >= 2 && Content[1].starts_with("**") &&
515
static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
516
DecorationColumn = StartColumn;
517
}
518
519
Decoration = "* ";
520
if (Lines.size() == 1 && !FirstInLine) {
521
// Comments for which FirstInLine is false can start on arbitrary column,
522
// and available horizontal space can be too small to align consecutive
523
// lines with the first one.
524
// FIXME: We could, probably, align them to current indentation level, but
525
// now we just wrap them without stars.
526
Decoration = "";
527
}
528
for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
529
const StringRef &Text = Content[i];
530
if (i + 1 == e) {
531
// If the last line is empty, the closing "*/" will have a star.
532
if (Text.empty())
533
break;
534
} else if (!Text.empty() && Decoration.starts_with(Text)) {
535
continue;
536
}
537
while (!Text.starts_with(Decoration))
538
Decoration = Decoration.drop_back(1);
539
}
540
541
LastLineNeedsDecoration = true;
542
IndentAtLineBreak = ContentColumn[0] + 1;
543
for (size_t i = 1, e = Lines.size(); i < e; ++i) {
544
if (Content[i].empty()) {
545
if (i + 1 == e) {
546
// Empty last line means that we already have a star as a part of the
547
// trailing */. We also need to preserve whitespace, so that */ is
548
// correctly indented.
549
LastLineNeedsDecoration = false;
550
// Align the star in the last '*/' with the stars on the previous lines.
551
if (e >= 2 && !Decoration.empty())
552
ContentColumn[i] = DecorationColumn;
553
} else if (Decoration.empty()) {
554
// For all other lines, set the start column to 0 if they're empty, so
555
// we do not insert trailing whitespace anywhere.
556
ContentColumn[i] = 0;
557
}
558
continue;
559
}
560
561
// The first line already excludes the star.
562
// The last line excludes the star if LastLineNeedsDecoration is false.
563
// For all other lines, adjust the line to exclude the star and
564
// (optionally) the first whitespace.
565
unsigned DecorationSize = Decoration.starts_with(Content[i])
566
? Content[i].size()
567
: Decoration.size();
568
if (DecorationSize)
569
ContentColumn[i] = DecorationColumn + DecorationSize;
570
Content[i] = Content[i].substr(DecorationSize);
571
if (!Decoration.starts_with(Content[i])) {
572
IndentAtLineBreak =
573
std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
574
}
575
}
576
IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
577
578
// Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
579
if (Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) {
580
if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
581
// This is a multiline jsdoc comment.
582
DelimitersOnNewline = true;
583
} else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
584
// Detect a long single-line comment, like:
585
// /** long long long */
586
// Below, '2' is the width of '*/'.
587
unsigned EndColumn =
588
ContentColumn[0] +
589
encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],
590
Style.TabWidth, Encoding) +
591
2;
592
DelimitersOnNewline = EndColumn > Style.ColumnLimit;
593
}
594
}
595
596
LLVM_DEBUG({
597
llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598
llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599
for (size_t i = 0; i < Lines.size(); ++i) {
600
llvm::dbgs() << i << " |" << Content[i] << "| "
601
<< "CC=" << ContentColumn[i] << "| "
602
<< "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603
}
604
});
605
}
606
607
BreakableToken::Split BreakableBlockComment::getSplit(
608
unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609
unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610
// Don't break lines matching the comment pragmas regex.
611
if (CommentPragmasRegex.match(Content[LineIndex]))
612
return Split(StringRef::npos, 0);
613
return getCommentSplit(Content[LineIndex].substr(TailOffset),
614
ContentStartColumn, ColumnLimit, Style.TabWidth,
615
Encoding, Style, Decoration.ends_with("*"));
616
}
617
618
void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619
int IndentDelta) {
620
// When in a preprocessor directive, the trailing backslash in a block comment
621
// is not needed, but can serve a purpose of uniformity with necessary escaped
622
// newlines outside the comment. In this case we remove it here before
623
// trimming the trailing whitespace. The backslash will be re-added later when
624
// inserting a line break.
625
size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
626
if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
627
--EndOfPreviousLine;
628
629
// Calculate the end of the non-whitespace text in the previous line.
630
EndOfPreviousLine =
631
Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
632
if (EndOfPreviousLine == StringRef::npos)
633
EndOfPreviousLine = 0;
634
else
635
++EndOfPreviousLine;
636
// Calculate the start of the non-whitespace text in the current line.
637
size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
638
if (StartOfLine == StringRef::npos)
639
StartOfLine = Lines[LineIndex].size();
640
641
StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
642
// Adjust Lines to only contain relevant text.
643
size_t PreviousContentOffset =
644
Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
645
Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
646
PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
647
Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
648
649
// Adjust the start column uniformly across all lines.
650
ContentColumn[LineIndex] =
651
encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
652
IndentDelta;
653
}
654
655
unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
656
unsigned Offset,
657
StringRef::size_type Length,
658
unsigned StartColumn) const {
659
return encoding::columnWidthWithTabs(
660
Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
661
Encoding);
662
}
663
664
unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
665
unsigned Offset,
666
unsigned StartColumn) const {
667
unsigned LineLength =
668
UnbreakableTailLength +
669
getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
670
if (LineIndex + 1 == Lines.size()) {
671
LineLength += 2;
672
// We never need a decoration when breaking just the trailing "*/" postfix.
673
bool HasRemainingText = Offset < Content[LineIndex].size();
674
if (!HasRemainingText) {
675
bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
676
if (HasDecoration)
677
LineLength -= Decoration.size();
678
}
679
}
680
return LineLength;
681
}
682
683
unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
684
bool Break) const {
685
if (Break)
686
return IndentAtLineBreak;
687
return std::max(0, ContentColumn[LineIndex]);
688
}
689
690
const llvm::StringSet<>
691
BreakableBlockComment::ContentIndentingJavadocAnnotations = {
692
"@param", "@return", "@returns", "@throws", "@type", "@template",
693
"@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694
};
695
696
unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
697
if (Style.Language != FormatStyle::LK_Java && !Style.isJavaScript())
698
return 0;
699
// The content at LineIndex 0 of a comment like:
700
// /** line 0 */
701
// is "* line 0", so we need to skip over the decoration in that case.
702
StringRef ContentWithNoDecoration = Content[LineIndex];
703
if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
704
ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
705
StringRef FirstWord = ContentWithNoDecoration.substr(
706
0, ContentWithNoDecoration.find_first_of(Blanks));
707
if (ContentIndentingJavadocAnnotations.contains(FirstWord))
708
return Style.ContinuationIndentWidth;
709
return 0;
710
}
711
712
void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
713
Split Split, unsigned ContentIndent,
714
WhitespaceManager &Whitespaces) const {
715
StringRef Text = Content[LineIndex].substr(TailOffset);
716
StringRef Prefix = Decoration;
717
// We need this to account for the case when we have a decoration "* " for all
718
// the lines except for the last one, where the star in "*/" acts as a
719
// decoration.
720
unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721
if (LineIndex + 1 == Lines.size() &&
722
Text.size() == Split.first + Split.second) {
723
// For the last line we need to break before "*/", but not to add "* ".
724
Prefix = "";
725
if (LocalIndentAtLineBreak >= 2)
726
LocalIndentAtLineBreak -= 2;
727
}
728
// The split offset is from the beginning of the line. Convert it to an offset
729
// from the beginning of the token text.
730
unsigned BreakOffsetInToken =
731
Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
732
unsigned CharsToRemove = Split.second;
733
assert(LocalIndentAtLineBreak >= Prefix.size());
734
std::string PrefixWithTrailingIndent = std::string(Prefix);
735
PrefixWithTrailingIndent.append(ContentIndent, ' ');
736
Whitespaces.replaceWhitespaceInToken(
737
tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
738
PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
739
/*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
740
PrefixWithTrailingIndent.size());
741
}
742
743
BreakableToken::Split BreakableBlockComment::getReflowSplit(
744
unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745
if (!mayReflow(LineIndex, CommentPragmasRegex))
746
return Split(StringRef::npos, 0);
747
748
// If we're reflowing into a line with content indent, only reflow the next
749
// line if its starting whitespace matches the content indent.
750
size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
751
if (LineIndex) {
752
unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
753
if (PreviousContentIndent && Trimmed != StringRef::npos &&
754
Trimmed != PreviousContentIndent) {
755
return Split(StringRef::npos, 0);
756
}
757
}
758
759
return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
760
}
761
762
bool BreakableBlockComment::introducesBreakBeforeToken() const {
763
// A break is introduced when we want delimiters on newline.
764
return DelimitersOnNewline &&
765
Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
766
}
767
768
void BreakableBlockComment::reflow(unsigned LineIndex,
769
WhitespaceManager &Whitespaces) const {
770
StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
771
// Here we need to reflow.
772
assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
773
"Reflowing whitespace within a token");
774
// This is the offset of the end of the last line relative to the start of
775
// the token text in the token.
776
unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
777
Content[LineIndex - 1].size() -
778
tokenAt(LineIndex).TokenText.data();
779
unsigned WhitespaceLength = TrimmedContent.data() -
780
tokenAt(LineIndex).TokenText.data() -
781
WhitespaceOffsetInToken;
782
Whitespaces.replaceWhitespaceInToken(
783
tokenAt(LineIndex), WhitespaceOffsetInToken,
784
/*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
785
/*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
786
/*Spaces=*/0);
787
}
788
789
void BreakableBlockComment::adaptStartOfLine(
790
unsigned LineIndex, WhitespaceManager &Whitespaces) const {
791
if (LineIndex == 0) {
792
if (DelimitersOnNewline) {
793
// Since we're breaking at index 1 below, the break position and the
794
// break length are the same.
795
// Note: this works because getCommentSplit is careful never to split at
796
// the beginning of a line.
797
size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
798
if (BreakLength != StringRef::npos) {
799
insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
800
Whitespaces);
801
}
802
}
803
return;
804
}
805
// Here no reflow with the previous line will happen.
806
// Fix the decoration of the line at LineIndex.
807
StringRef Prefix = Decoration;
808
if (Content[LineIndex].empty()) {
809
if (LineIndex + 1 == Lines.size()) {
810
if (!LastLineNeedsDecoration) {
811
// If the last line was empty, we don't need a prefix, as the */ will
812
// line up with the decoration (if it exists).
813
Prefix = "";
814
}
815
} else if (!Decoration.empty()) {
816
// For other empty lines, if we do have a decoration, adapt it to not
817
// contain a trailing whitespace.
818
Prefix = Prefix.substr(0, 1);
819
}
820
} else if (ContentColumn[LineIndex] == 1) {
821
// This line starts immediately after the decorating *.
822
Prefix = Prefix.substr(0, 1);
823
}
824
// This is the offset of the end of the last line relative to the start of the
825
// token text in the token.
826
unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
827
Content[LineIndex - 1].size() -
828
tokenAt(LineIndex).TokenText.data();
829
unsigned WhitespaceLength = Content[LineIndex].data() -
830
tokenAt(LineIndex).TokenText.data() -
831
WhitespaceOffsetInToken;
832
Whitespaces.replaceWhitespaceInToken(
833
tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
834
InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
835
}
836
837
BreakableToken::Split
838
BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
839
if (DelimitersOnNewline) {
840
// Replace the trailing whitespace of the last line with a newline.
841
// In case the last line is empty, the ending '*/' is already on its own
842
// line.
843
StringRef Line = Content.back().substr(TailOffset);
844
StringRef TrimmedLine = Line.rtrim(Blanks);
845
if (!TrimmedLine.empty())
846
return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
847
}
848
return Split(StringRef::npos, 0);
849
}
850
851
bool BreakableBlockComment::mayReflow(
852
unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853
// Content[LineIndex] may exclude the indent after the '*' decoration. In that
854
// case, we compute the start of the comment pragma manually.
855
StringRef IndentContent = Content[LineIndex];
856
if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
857
IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
858
return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
859
mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
860
!switchesFormatting(tokenAt(LineIndex));
861
}
862
863
BreakableLineCommentSection::BreakableLineCommentSection(
864
const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
865
encoding::Encoding Encoding, const FormatStyle &Style)
866
: BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
867
assert(Tok.is(TT_LineComment) &&
868
"line comment section must start with a line comment");
869
FormatToken *LineTok = nullptr;
870
const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
871
// How many spaces we changed in the first line of the section, this will be
872
// applied in all following lines
873
int FirstLineSpaceChange = 0;
874
for (const FormatToken *CurrentTok = &Tok;
875
CurrentTok && CurrentTok->is(TT_LineComment);
876
CurrentTok = CurrentTok->Next) {
877
LastLineTok = LineTok;
878
StringRef TokenText(CurrentTok->TokenText);
879
assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
880
"unsupported line comment prefix, '//' and '#' are supported");
881
size_t FirstLineIndex = Lines.size();
882
TokenText.split(Lines, "\n");
883
Content.resize(Lines.size());
884
ContentColumn.resize(Lines.size());
885
PrefixSpaceChange.resize(Lines.size());
886
Tokens.resize(Lines.size());
887
Prefix.resize(Lines.size());
888
OriginalPrefix.resize(Lines.size());
889
for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
890
Lines[i] = Lines[i].ltrim(Blanks);
891
StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
892
OriginalPrefix[i] = IndentPrefix;
893
const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
894
895
// This lambda also considers multibyte character that is not handled in
896
// functions like isPunctuation provided by CharInfo.
897
const auto NoSpaceBeforeFirstCommentChar = [&]() {
898
assert(Lines[i].size() > IndentPrefix.size());
899
const char FirstCommentChar = Lines[i][IndentPrefix.size()];
900
const unsigned FirstCharByteSize =
901
encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
902
if (encoding::columnWidth(
903
Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
904
Encoding) != 1) {
905
return false;
906
}
907
// In C-like comments, add a space before #. For example this is useful
908
// to preserve the relative indentation when commenting out code with
909
// #includes.
910
//
911
// In languages using # as the comment leader such as proto, don't
912
// add a space to support patterns like:
913
// #########
914
// # section
915
// #########
916
if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
917
return false;
918
return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
919
isHorizontalWhitespace(FirstCommentChar);
920
};
921
922
// On the first line of the comment section we calculate how many spaces
923
// are to be added or removed, all lines after that just get only the
924
// change and we will not look at the maximum anymore. Additionally to the
925
// actual first line, we calculate that when the non space Prefix changes,
926
// e.g. from "///" to "//".
927
if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
928
OriginalPrefix[i - 1].rtrim(Blanks)) {
929
if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
930
!NoSpaceBeforeFirstCommentChar()) {
931
FirstLineSpaceChange = Minimum - SpacesInPrefix;
932
} else if (static_cast<unsigned>(SpacesInPrefix) >
933
Style.SpacesInLineCommentPrefix.Maximum) {
934
FirstLineSpaceChange =
935
Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
936
} else {
937
FirstLineSpaceChange = 0;
938
}
939
}
940
941
if (Lines[i].size() != IndentPrefix.size()) {
942
PrefixSpaceChange[i] = FirstLineSpaceChange;
943
944
if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
945
PrefixSpaceChange[i] +=
946
Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
947
}
948
949
assert(Lines[i].size() > IndentPrefix.size());
950
const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
951
const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
952
const bool LineRequiresLeadingSpace =
953
!NoSpaceBeforeFirstCommentChar() ||
954
(FirstNonSpace == '}' && FirstLineSpaceChange != 0);
955
const bool AllowsSpaceChange =
956
!IsFormatComment &&
957
(SpacesInPrefix != 0 || LineRequiresLeadingSpace);
958
959
if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
960
Prefix[i] = IndentPrefix.str();
961
Prefix[i].append(PrefixSpaceChange[i], ' ');
962
} else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
963
Prefix[i] = IndentPrefix
964
.drop_back(std::min<std::size_t>(
965
-PrefixSpaceChange[i], SpacesInPrefix))
966
.str();
967
} else {
968
Prefix[i] = IndentPrefix.str();
969
}
970
} else {
971
// If the IndentPrefix is the whole line, there is no content and we
972
// drop just all space
973
Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
974
}
975
976
Tokens[i] = LineTok;
977
Content[i] = Lines[i].substr(IndentPrefix.size());
978
ContentColumn[i] =
979
StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,
980
Style.TabWidth, Encoding);
981
982
// Calculate the end of the non-whitespace text in this line.
983
size_t EndOfLine = Content[i].find_last_not_of(Blanks);
984
if (EndOfLine == StringRef::npos)
985
EndOfLine = Content[i].size();
986
else
987
++EndOfLine;
988
Content[i] = Content[i].substr(0, EndOfLine);
989
}
990
LineTok = CurrentTok->Next;
991
if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
992
// A line comment section needs to broken by a line comment that is
993
// preceded by at least two newlines. Note that we put this break here
994
// instead of breaking at a previous stage during parsing, since that
995
// would split the contents of the enum into two unwrapped lines in this
996
// example, which is undesirable:
997
// enum A {
998
// a, // comment about a
999
//
1000
// // comment about b
1001
// b
1002
// };
1003
//
1004
// FIXME: Consider putting separate line comment sections as children to
1005
// the unwrapped line instead.
1006
break;
1007
}
1008
}
1009
}
1010
1011
unsigned
1012
BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
1013
StringRef::size_type Length,
1014
unsigned StartColumn) const {
1015
return encoding::columnWidthWithTabs(
1016
Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1017
Encoding);
1018
}
1019
1020
unsigned
1021
BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1022
bool /*Break*/) const {
1023
return ContentColumn[LineIndex];
1024
}
1025
1026
void BreakableLineCommentSection::insertBreak(
1027
unsigned LineIndex, unsigned TailOffset, Split Split,
1028
unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
1029
StringRef Text = Content[LineIndex].substr(TailOffset);
1030
// Compute the offset of the split relative to the beginning of the token
1031
// text.
1032
unsigned BreakOffsetInToken =
1033
Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
1034
unsigned CharsToRemove = Split.second;
1035
Whitespaces.replaceWhitespaceInToken(
1036
tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1037
Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1038
/*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
1039
}
1040
1041
BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1042
unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1043
if (!mayReflow(LineIndex, CommentPragmasRegex))
1044
return Split(StringRef::npos, 0);
1045
1046
size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1047
1048
// In a line comment section each line is a separate token; thus, after a
1049
// split we replace all whitespace before the current line comment token
1050
// (which does not need to be included in the split), plus the start of the
1051
// line up to where the content starts.
1052
return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1053
}
1054
1055
void BreakableLineCommentSection::reflow(unsigned LineIndex,
1056
WhitespaceManager &Whitespaces) const {
1057
if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1058
// Reflow happens between tokens. Replace the whitespace between the
1059
// tokens by the empty string.
1060
Whitespaces.replaceWhitespace(
1061
*Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
1062
/*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
1063
/*InPPDirective=*/false);
1064
} else if (LineIndex > 0) {
1065
// In case we're reflowing after the '\' in:
1066
//
1067
// // line comment \
1068
// // line 2
1069
//
1070
// the reflow happens inside the single comment token (it is a single line
1071
// comment with an unescaped newline).
1072
// Replace the whitespace between the '\' and '//' with the empty string.
1073
//
1074
// Offset points to after the '\' relative to start of the token.
1075
unsigned Offset = Lines[LineIndex - 1].data() +
1076
Lines[LineIndex - 1].size() -
1077
tokenAt(LineIndex - 1).TokenText.data();
1078
// WhitespaceLength is the number of chars between the '\' and the '//' on
1079
// the next line.
1080
unsigned WhitespaceLength =
1081
Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
1082
Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1083
/*ReplaceChars=*/WhitespaceLength,
1084
/*PreviousPostfix=*/"",
1085
/*CurrentPrefix=*/"",
1086
/*InPPDirective=*/false,
1087
/*Newlines=*/0,
1088
/*Spaces=*/0);
1089
}
1090
// Replace the indent and prefix of the token with the reflow prefix.
1091
unsigned Offset =
1092
Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
1093
unsigned WhitespaceLength =
1094
Content[LineIndex].data() - Lines[LineIndex].data();
1095
Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1096
/*ReplaceChars=*/WhitespaceLength,
1097
/*PreviousPostfix=*/"",
1098
/*CurrentPrefix=*/ReflowPrefix,
1099
/*InPPDirective=*/false,
1100
/*Newlines=*/0,
1101
/*Spaces=*/0);
1102
}
1103
1104
void BreakableLineCommentSection::adaptStartOfLine(
1105
unsigned LineIndex, WhitespaceManager &Whitespaces) const {
1106
// If this is the first line of a token, we need to inform Whitespace Manager
1107
// about it: either adapt the whitespace range preceding it, or mark it as an
1108
// untouchable token.
1109
// This happens for instance here:
1110
// // line 1 \
1111
// // line 2
1112
if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1113
// This is the first line for the current token, but no reflow with the
1114
// previous token is necessary. However, we still may need to adjust the
1115
// start column. Note that ContentColumn[LineIndex] is the expected
1116
// content column after a possible update to the prefix, hence the prefix
1117
// length change is included.
1118
unsigned LineColumn =
1119
ContentColumn[LineIndex] -
1120
(Content[LineIndex].data() - Lines[LineIndex].data()) +
1121
(OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1122
1123
// We always want to create a replacement instead of adding an untouchable
1124
// token, even if LineColumn is the same as the original column of the
1125
// token. This is because WhitespaceManager doesn't align trailing
1126
// comments if they are untouchable.
1127
Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1128
/*Newlines=*/1,
1129
/*Spaces=*/LineColumn,
1130
/*StartOfTokenColumn=*/LineColumn,
1131
/*IsAligned=*/true,
1132
/*InPPDirective=*/false);
1133
}
1134
if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1135
// Adjust the prefix if necessary.
1136
const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1137
const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1138
Whitespaces.replaceWhitespaceInToken(
1139
tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1140
/*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1141
/*Newlines=*/0, /*Spaces=*/SpacesToAdd);
1142
}
1143
}
1144
1145
void BreakableLineCommentSection::updateNextToken(LineState &State) const {
1146
if (LastLineTok)
1147
State.NextToken = LastLineTok->Next;
1148
}
1149
1150
bool BreakableLineCommentSection::mayReflow(
1151
unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1152
// Line comments have the indent as part of the prefix, so we need to
1153
// recompute the start of the line.
1154
StringRef IndentContent = Content[LineIndex];
1155
if (Lines[LineIndex].starts_with("//"))
1156
IndentContent = Lines[LineIndex].substr(2);
1157
// FIXME: Decide whether we want to reflow non-regular indents:
1158
// Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
1159
// OriginalPrefix[LineIndex-1]. That means we don't reflow
1160
// // text that protrudes
1161
// // into text with different indent
1162
// We do reflow in that case in block comments.
1163
return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
1164
mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
1165
!switchesFormatting(tokenAt(LineIndex)) &&
1166
OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1167
}
1168
1169
} // namespace format
1170
} // namespace clang
1171
1172