Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp
35234 views
1
//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file contains the implementation of the UnwrappedLineParser,
11
/// which turns a stream of tokens into UnwrappedLines.
12
///
13
//===----------------------------------------------------------------------===//
14
15
#include "UnwrappedLineParser.h"
16
#include "FormatToken.h"
17
#include "FormatTokenLexer.h"
18
#include "FormatTokenSource.h"
19
#include "Macros.h"
20
#include "TokenAnnotator.h"
21
#include "clang/Basic/TokenKinds.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/raw_os_ostream.h"
26
#include "llvm/Support/raw_ostream.h"
27
28
#include <algorithm>
29
#include <utility>
30
31
#define DEBUG_TYPE "format-parser"
32
33
namespace clang {
34
namespace format {
35
36
namespace {
37
38
void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39
StringRef Prefix = "", bool PrintText = false) {
40
OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41
<< ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42
bool NewLine = false;
43
for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44
E = Line.Tokens.end();
45
I != E; ++I) {
46
if (NewLine) {
47
OS << Prefix;
48
NewLine = false;
49
}
50
OS << I->Tok->Tok.getName() << "["
51
<< "T=" << (unsigned)I->Tok->getType()
52
<< ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53
<< "\"] ";
54
for (SmallVectorImpl<UnwrappedLine>::const_iterator
55
CI = I->Children.begin(),
56
CE = I->Children.end();
57
CI != CE; ++CI) {
58
OS << "\n";
59
printLine(OS, *CI, (Prefix + " ").str());
60
NewLine = true;
61
}
62
}
63
if (!NewLine)
64
OS << "\n";
65
}
66
67
LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68
printLine(llvm::dbgs(), Line);
69
}
70
71
class ScopedDeclarationState {
72
public:
73
ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74
bool MustBeDeclaration)
75
: Line(Line), Stack(Stack) {
76
Line.MustBeDeclaration = MustBeDeclaration;
77
Stack.push_back(MustBeDeclaration);
78
}
79
~ScopedDeclarationState() {
80
Stack.pop_back();
81
if (!Stack.empty())
82
Line.MustBeDeclaration = Stack.back();
83
else
84
Line.MustBeDeclaration = true;
85
}
86
87
private:
88
UnwrappedLine &Line;
89
llvm::BitVector &Stack;
90
};
91
92
} // end anonymous namespace
93
94
std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
95
llvm::raw_os_ostream OS(Stream);
96
printLine(OS, Line);
97
return Stream;
98
}
99
100
class ScopedLineState {
101
public:
102
ScopedLineState(UnwrappedLineParser &Parser,
103
bool SwitchToPreprocessorLines = false)
104
: Parser(Parser), OriginalLines(Parser.CurrentLines) {
105
if (SwitchToPreprocessorLines)
106
Parser.CurrentLines = &Parser.PreprocessorDirectives;
107
else if (!Parser.Line->Tokens.empty())
108
Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
109
PreBlockLine = std::move(Parser.Line);
110
Parser.Line = std::make_unique<UnwrappedLine>();
111
Parser.Line->Level = PreBlockLine->Level;
112
Parser.Line->PPLevel = PreBlockLine->PPLevel;
113
Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
114
Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
115
Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
116
}
117
118
~ScopedLineState() {
119
if (!Parser.Line->Tokens.empty())
120
Parser.addUnwrappedLine();
121
assert(Parser.Line->Tokens.empty());
122
Parser.Line = std::move(PreBlockLine);
123
if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
124
Parser.MustBreakBeforeNextToken = true;
125
Parser.CurrentLines = OriginalLines;
126
}
127
128
private:
129
UnwrappedLineParser &Parser;
130
131
std::unique_ptr<UnwrappedLine> PreBlockLine;
132
SmallVectorImpl<UnwrappedLine> *OriginalLines;
133
};
134
135
class CompoundStatementIndenter {
136
public:
137
CompoundStatementIndenter(UnwrappedLineParser *Parser,
138
const FormatStyle &Style, unsigned &LineLevel)
139
: CompoundStatementIndenter(Parser, LineLevel,
140
Style.BraceWrapping.AfterControlStatement,
141
Style.BraceWrapping.IndentBraces) {}
142
CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
143
bool WrapBrace, bool IndentBrace)
144
: LineLevel(LineLevel), OldLineLevel(LineLevel) {
145
if (WrapBrace)
146
Parser->addUnwrappedLine();
147
if (IndentBrace)
148
++LineLevel;
149
}
150
~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
151
152
private:
153
unsigned &LineLevel;
154
unsigned OldLineLevel;
155
};
156
157
UnwrappedLineParser::UnwrappedLineParser(
158
SourceManager &SourceMgr, const FormatStyle &Style,
159
const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
160
ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
161
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
162
IdentifierTable &IdentTable)
163
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
164
CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
165
LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
166
CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
167
Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
168
IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
169
? IG_Rejected
170
: IG_Inited),
171
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
172
Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
173
assert(IsCpp == LangOpts.CXXOperatorNames);
174
}
175
176
void UnwrappedLineParser::reset() {
177
PPBranchLevel = -1;
178
IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
179
? IG_Rejected
180
: IG_Inited;
181
IncludeGuardToken = nullptr;
182
Line.reset(new UnwrappedLine);
183
CommentsBeforeNextToken.clear();
184
FormatTok = nullptr;
185
MustBreakBeforeNextToken = false;
186
IsDecltypeAutoFunction = false;
187
PreprocessorDirectives.clear();
188
CurrentLines = &Lines;
189
DeclarationScopeStack.clear();
190
NestedTooDeep.clear();
191
NestedLambdas.clear();
192
PPStack.clear();
193
Line->FirstStartColumn = FirstStartColumn;
194
195
if (!Unexpanded.empty())
196
for (FormatToken *Token : AllTokens)
197
Token->MacroCtx.reset();
198
CurrentExpandedLines.clear();
199
ExpandedLines.clear();
200
Unexpanded.clear();
201
InExpansion = false;
202
Reconstruct.reset();
203
}
204
205
void UnwrappedLineParser::parse() {
206
IndexedTokenSource TokenSource(AllTokens);
207
Line->FirstStartColumn = FirstStartColumn;
208
do {
209
LLVM_DEBUG(llvm::dbgs() << "----\n");
210
reset();
211
Tokens = &TokenSource;
212
TokenSource.reset();
213
214
readToken();
215
parseFile();
216
217
// If we found an include guard then all preprocessor directives (other than
218
// the guard) are over-indented by one.
219
if (IncludeGuard == IG_Found) {
220
for (auto &Line : Lines)
221
if (Line.InPPDirective && Line.Level > 0)
222
--Line.Level;
223
}
224
225
// Create line with eof token.
226
assert(eof());
227
pushToken(FormatTok);
228
addUnwrappedLine();
229
230
// In a first run, format everything with the lines containing macro calls
231
// replaced by the expansion.
232
if (!ExpandedLines.empty()) {
233
LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234
for (const auto &Line : Lines) {
235
if (!Line.Tokens.empty()) {
236
auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
237
if (it != ExpandedLines.end()) {
238
for (const auto &Expanded : it->second) {
239
LLVM_DEBUG(printDebugInfo(Expanded));
240
Callback.consumeUnwrappedLine(Expanded);
241
}
242
continue;
243
}
244
}
245
LLVM_DEBUG(printDebugInfo(Line));
246
Callback.consumeUnwrappedLine(Line);
247
}
248
Callback.finishRun();
249
}
250
251
LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252
for (const UnwrappedLine &Line : Lines) {
253
LLVM_DEBUG(printDebugInfo(Line));
254
Callback.consumeUnwrappedLine(Line);
255
}
256
Callback.finishRun();
257
Lines.clear();
258
while (!PPLevelBranchIndex.empty() &&
259
PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
260
PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
261
PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
262
}
263
if (!PPLevelBranchIndex.empty()) {
264
++PPLevelBranchIndex.back();
265
assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
266
assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
267
}
268
} while (!PPLevelBranchIndex.empty());
269
}
270
271
void UnwrappedLineParser::parseFile() {
272
// The top-level context in a file always has declarations, except for pre-
273
// processor directives and JavaScript files.
274
bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
275
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
276
MustBeDeclaration);
277
if (Style.Language == FormatStyle::LK_TextProto)
278
parseBracedList();
279
else
280
parseLevel();
281
// Make sure to format the remaining tokens.
282
//
283
// LK_TextProto is special since its top-level is parsed as the body of a
284
// braced list, which does not necessarily have natural line separators such
285
// as a semicolon. Comments after the last entry that have been determined to
286
// not belong to that line, as in:
287
// key: value
288
// // endfile comment
289
// do not have a chance to be put on a line of their own until this point.
290
// Here we add this newline before end-of-file comments.
291
if (Style.Language == FormatStyle::LK_TextProto &&
292
!CommentsBeforeNextToken.empty()) {
293
addUnwrappedLine();
294
}
295
flushComments(true);
296
addUnwrappedLine();
297
}
298
299
void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
300
do {
301
switch (FormatTok->Tok.getKind()) {
302
case tok::l_brace:
303
return;
304
default:
305
if (FormatTok->is(Keywords.kw_where)) {
306
addUnwrappedLine();
307
nextToken();
308
parseCSharpGenericTypeConstraint();
309
break;
310
}
311
nextToken();
312
break;
313
}
314
} while (!eof());
315
}
316
317
void UnwrappedLineParser::parseCSharpAttribute() {
318
int UnpairedSquareBrackets = 1;
319
do {
320
switch (FormatTok->Tok.getKind()) {
321
case tok::r_square:
322
nextToken();
323
--UnpairedSquareBrackets;
324
if (UnpairedSquareBrackets == 0) {
325
addUnwrappedLine();
326
return;
327
}
328
break;
329
case tok::l_square:
330
++UnpairedSquareBrackets;
331
nextToken();
332
break;
333
default:
334
nextToken();
335
break;
336
}
337
} while (!eof());
338
}
339
340
bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341
if (!Lines.empty() && Lines.back().InPPDirective)
342
return true;
343
344
const FormatToken *Previous = Tokens->getPreviousToken();
345
return Previous && Previous->is(tok::comment) &&
346
(Previous->IsMultiline || Previous->NewlinesBefore > 0);
347
}
348
349
/// \brief Parses a level, that is ???.
350
/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351
/// \param IfKind The \p if statement kind in the level.
352
/// \param IfLeftBrace The left brace of the \p if block in the level.
353
/// \returns true if a simple block of if/else/for/while, or false otherwise.
354
/// (A simple block has a single statement.)
355
bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
356
IfStmtKind *IfKind,
357
FormatToken **IfLeftBrace) {
358
const bool InRequiresExpression =
359
OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
360
const bool IsPrecededByCommentOrPPDirective =
361
!Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
362
FormatToken *IfLBrace = nullptr;
363
bool HasDoWhile = false;
364
bool HasLabel = false;
365
unsigned StatementCount = 0;
366
bool SwitchLabelEncountered = false;
367
368
do {
369
if (FormatTok->isAttribute()) {
370
nextToken();
371
if (FormatTok->is(tok::l_paren))
372
parseParens();
373
continue;
374
}
375
tok::TokenKind Kind = FormatTok->Tok.getKind();
376
if (FormatTok->is(TT_MacroBlockBegin))
377
Kind = tok::l_brace;
378
else if (FormatTok->is(TT_MacroBlockEnd))
379
Kind = tok::r_brace;
380
381
auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
382
&HasLabel, &StatementCount] {
383
parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
384
HasDoWhile ? nullptr : &HasDoWhile,
385
HasLabel ? nullptr : &HasLabel);
386
++StatementCount;
387
assert(StatementCount > 0 && "StatementCount overflow!");
388
};
389
390
switch (Kind) {
391
case tok::comment:
392
nextToken();
393
addUnwrappedLine();
394
break;
395
case tok::l_brace:
396
if (InRequiresExpression) {
397
FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
398
} else if (FormatTok->Previous &&
399
FormatTok->Previous->ClosesRequiresClause) {
400
// We need the 'default' case here to correctly parse a function
401
// l_brace.
402
ParseDefault();
403
continue;
404
}
405
if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
406
if (tryToParseBracedList())
407
continue;
408
FormatTok->setFinalizedType(TT_BlockLBrace);
409
}
410
parseBlock();
411
++StatementCount;
412
assert(StatementCount > 0 && "StatementCount overflow!");
413
addUnwrappedLine();
414
break;
415
case tok::r_brace:
416
if (OpeningBrace) {
417
if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
418
!OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
419
return false;
420
}
421
if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
422
HasDoWhile || IsPrecededByCommentOrPPDirective ||
423
precededByCommentOrPPDirective()) {
424
return false;
425
}
426
const FormatToken *Next = Tokens->peekNextToken();
427
if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
428
return false;
429
if (IfLeftBrace)
430
*IfLeftBrace = IfLBrace;
431
return true;
432
}
433
nextToken();
434
addUnwrappedLine();
435
break;
436
case tok::kw_default: {
437
unsigned StoredPosition = Tokens->getPosition();
438
auto *Next = Tokens->getNextNonComment();
439
FormatTok = Tokens->setPosition(StoredPosition);
440
if (!Next->isOneOf(tok::colon, tok::arrow)) {
441
// default not followed by `:` or `->` is not a case label; treat it
442
// like an identifier.
443
parseStructuralElement();
444
break;
445
}
446
// Else, if it is 'default:', fall through to the case handling.
447
[[fallthrough]];
448
}
449
case tok::kw_case:
450
if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
451
(Style.isJavaScript() && Line->MustBeDeclaration)) {
452
// Proto: there are no switch/case statements
453
// Verilog: Case labels don't have this word. We handle case
454
// labels including default in TokenAnnotator.
455
// JavaScript: A 'case: string' style field declaration.
456
ParseDefault();
457
break;
458
}
459
if (!SwitchLabelEncountered &&
460
(Style.IndentCaseLabels ||
461
(OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
462
(Line->InPPDirective && Line->Level == 1))) {
463
++Line->Level;
464
}
465
SwitchLabelEncountered = true;
466
parseStructuralElement();
467
break;
468
case tok::l_square:
469
if (Style.isCSharp()) {
470
nextToken();
471
parseCSharpAttribute();
472
break;
473
}
474
if (handleCppAttributes())
475
break;
476
[[fallthrough]];
477
default:
478
ParseDefault();
479
break;
480
}
481
} while (!eof());
482
483
return false;
484
}
485
486
void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
487
// We'll parse forward through the tokens until we hit
488
// a closing brace or eof - note that getNextToken() will
489
// parse macros, so this will magically work inside macro
490
// definitions, too.
491
unsigned StoredPosition = Tokens->getPosition();
492
FormatToken *Tok = FormatTok;
493
const FormatToken *PrevTok = Tok->Previous;
494
// Keep a stack of positions of lbrace tokens. We will
495
// update information about whether an lbrace starts a
496
// braced init list or a different block during the loop.
497
struct StackEntry {
498
FormatToken *Tok;
499
const FormatToken *PrevTok;
500
};
501
SmallVector<StackEntry, 8> LBraceStack;
502
assert(Tok->is(tok::l_brace));
503
504
do {
505
auto *NextTok = Tokens->getNextNonComment();
506
507
if (!Line->InMacroBody && !Style.isTableGen()) {
508
// Skip PPDirective lines and comments.
509
while (NextTok->is(tok::hash)) {
510
NextTok = Tokens->getNextToken();
511
if (NextTok->is(tok::pp_not_keyword))
512
break;
513
do {
514
NextTok = Tokens->getNextToken();
515
} while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof));
516
517
while (NextTok->is(tok::comment))
518
NextTok = Tokens->getNextToken();
519
}
520
}
521
522
switch (Tok->Tok.getKind()) {
523
case tok::l_brace:
524
if (Style.isJavaScript() && PrevTok) {
525
if (PrevTok->isOneOf(tok::colon, tok::less)) {
526
// A ':' indicates this code is in a type, or a braced list
527
// following a label in an object literal ({a: {b: 1}}).
528
// A '<' could be an object used in a comparison, but that is nonsense
529
// code (can never return true), so more likely it is a generic type
530
// argument (`X<{a: string; b: number}>`).
531
// The code below could be confused by semicolons between the
532
// individual members in a type member list, which would normally
533
// trigger BK_Block. In both cases, this must be parsed as an inline
534
// braced init.
535
Tok->setBlockKind(BK_BracedInit);
536
} else if (PrevTok->is(tok::r_paren)) {
537
// `) { }` can only occur in function or method declarations in JS.
538
Tok->setBlockKind(BK_Block);
539
}
540
} else {
541
Tok->setBlockKind(BK_Unknown);
542
}
543
LBraceStack.push_back({Tok, PrevTok});
544
break;
545
case tok::r_brace:
546
if (LBraceStack.empty())
547
break;
548
if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
549
bool ProbablyBracedList = false;
550
if (Style.Language == FormatStyle::LK_Proto) {
551
ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
552
} else if (LBrace->isNot(TT_EnumLBrace)) {
553
// Using OriginalColumn to distinguish between ObjC methods and
554
// binary operators is a bit hacky.
555
bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
556
NextTok->OriginalColumn == 0;
557
558
// Try to detect a braced list. Note that regardless how we mark inner
559
// braces here, we will overwrite the BlockKind later if we parse a
560
// braced list (where all blocks inside are by default braced lists),
561
// or when we explicitly detect blocks (for example while parsing
562
// lambdas).
563
564
// If we already marked the opening brace as braced list, the closing
565
// must also be part of it.
566
ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
567
568
ProbablyBracedList = ProbablyBracedList ||
569
(Style.isJavaScript() &&
570
NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
571
Keywords.kw_as));
572
ProbablyBracedList =
573
ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
574
NextTok->is(tok::l_paren)));
575
576
// If there is a comma, semicolon or right paren after the closing
577
// brace, we assume this is a braced initializer list.
578
// FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
579
// braced list in JS.
580
ProbablyBracedList =
581
ProbablyBracedList ||
582
NextTok->isOneOf(tok::comma, tok::period, tok::colon,
583
tok::r_paren, tok::r_square, tok::ellipsis);
584
585
// Distinguish between braced list in a constructor initializer list
586
// followed by constructor body, or just adjacent blocks.
587
ProbablyBracedList =
588
ProbablyBracedList ||
589
(NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
590
LBraceStack.back().PrevTok->isOneOf(tok::identifier,
591
tok::greater));
592
593
ProbablyBracedList =
594
ProbablyBracedList ||
595
(NextTok->is(tok::identifier) &&
596
!PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
597
598
ProbablyBracedList = ProbablyBracedList ||
599
(NextTok->is(tok::semi) &&
600
(!ExpectClassBody || LBraceStack.size() != 1));
601
602
ProbablyBracedList =
603
ProbablyBracedList ||
604
(NextTok->isBinaryOperator() && !NextIsObjCMethod);
605
606
if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
607
// We can have an array subscript after a braced init
608
// list, but C++11 attributes are expected after blocks.
609
NextTok = Tokens->getNextToken();
610
ProbablyBracedList = NextTok->isNot(tok::l_square);
611
}
612
613
// Cpp macro definition body that is a nonempty braced list or block:
614
if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
615
!FormatTok->Previous && NextTok->is(tok::eof) &&
616
// A statement can end with only `;` (simple statement), a block
617
// closing brace (compound statement), or `:` (label statement).
618
// If PrevTok is a block opening brace, Tok ends an empty block.
619
!PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
620
ProbablyBracedList = true;
621
}
622
}
623
const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
624
Tok->setBlockKind(BlockKind);
625
LBrace->setBlockKind(BlockKind);
626
}
627
LBraceStack.pop_back();
628
break;
629
case tok::identifier:
630
if (Tok->isNot(TT_StatementMacro))
631
break;
632
[[fallthrough]];
633
case tok::at:
634
case tok::semi:
635
case tok::kw_if:
636
case tok::kw_while:
637
case tok::kw_for:
638
case tok::kw_switch:
639
case tok::kw_try:
640
case tok::kw___try:
641
if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
642
LBraceStack.back().Tok->setBlockKind(BK_Block);
643
break;
644
default:
645
break;
646
}
647
648
PrevTok = Tok;
649
Tok = NextTok;
650
} while (Tok->isNot(tok::eof) && !LBraceStack.empty());
651
652
// Assume other blocks for all unclosed opening braces.
653
for (const auto &Entry : LBraceStack)
654
if (Entry.Tok->is(BK_Unknown))
655
Entry.Tok->setBlockKind(BK_Block);
656
657
FormatTok = Tokens->setPosition(StoredPosition);
658
}
659
660
// Sets the token type of the directly previous right brace.
661
void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
662
if (auto Prev = FormatTok->getPreviousNonComment();
663
Prev && Prev->is(tok::r_brace)) {
664
Prev->setFinalizedType(Type);
665
}
666
}
667
668
template <class T>
669
static inline void hash_combine(std::size_t &seed, const T &v) {
670
std::hash<T> hasher;
671
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
672
}
673
674
size_t UnwrappedLineParser::computePPHash() const {
675
size_t h = 0;
676
for (const auto &i : PPStack) {
677
hash_combine(h, size_t(i.Kind));
678
hash_combine(h, i.Line);
679
}
680
return h;
681
}
682
683
// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
684
// is not null, subtracts its length (plus the preceding space) when computing
685
// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
686
// running the token annotator on it so that we can restore them afterward.
687
bool UnwrappedLineParser::mightFitOnOneLine(
688
UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
689
const auto ColumnLimit = Style.ColumnLimit;
690
if (ColumnLimit == 0)
691
return true;
692
693
auto &Tokens = ParsedLine.Tokens;
694
assert(!Tokens.empty());
695
696
const auto *LastToken = Tokens.back().Tok;
697
assert(LastToken);
698
699
SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
700
701
int Index = 0;
702
for (const auto &Token : Tokens) {
703
assert(Token.Tok);
704
auto &SavedToken = SavedTokens[Index++];
705
SavedToken.Tok = new FormatToken;
706
SavedToken.Tok->copyFrom(*Token.Tok);
707
SavedToken.Children = std::move(Token.Children);
708
}
709
710
AnnotatedLine Line(ParsedLine);
711
assert(Line.Last == LastToken);
712
713
TokenAnnotator Annotator(Style, Keywords);
714
Annotator.annotate(Line);
715
Annotator.calculateFormattingInformation(Line);
716
717
auto Length = LastToken->TotalLength;
718
if (OpeningBrace) {
719
assert(OpeningBrace != Tokens.front().Tok);
720
if (auto Prev = OpeningBrace->Previous;
721
Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
722
Length -= ColumnLimit;
723
}
724
Length -= OpeningBrace->TokenText.size() + 1;
725
}
726
727
if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
728
assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
729
Length -= FirstToken->TokenText.size() + 1;
730
}
731
732
Index = 0;
733
for (auto &Token : Tokens) {
734
const auto &SavedToken = SavedTokens[Index++];
735
Token.Tok->copyFrom(*SavedToken.Tok);
736
Token.Children = std::move(SavedToken.Children);
737
delete SavedToken.Tok;
738
}
739
740
// If these change PPLevel needs to be used for get correct indentation.
741
assert(!Line.InMacroBody);
742
assert(!Line.InPPDirective);
743
return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
744
}
745
746
FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
747
unsigned AddLevels, bool MunchSemi,
748
bool KeepBraces,
749
IfStmtKind *IfKind,
750
bool UnindentWhitesmithsBraces) {
751
auto HandleVerilogBlockLabel = [this]() {
752
// ":" name
753
if (Style.isVerilog() && FormatTok->is(tok::colon)) {
754
nextToken();
755
if (Keywords.isVerilogIdentifier(*FormatTok))
756
nextToken();
757
}
758
};
759
760
// Whether this is a Verilog-specific block that has a special header like a
761
// module.
762
const bool VerilogHierarchy =
763
Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
764
assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
765
(Style.isVerilog() &&
766
(Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
767
"'{' or macro block token expected");
768
FormatToken *Tok = FormatTok;
769
const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
770
auto Index = CurrentLines->size();
771
const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
772
FormatTok->setBlockKind(BK_Block);
773
774
// For Whitesmiths mode, jump to the next level prior to skipping over the
775
// braces.
776
if (!VerilogHierarchy && AddLevels > 0 &&
777
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
778
++Line->Level;
779
}
780
781
size_t PPStartHash = computePPHash();
782
783
const unsigned InitialLevel = Line->Level;
784
if (VerilogHierarchy) {
785
AddLevels += parseVerilogHierarchyHeader();
786
} else {
787
nextToken(/*LevelDifference=*/AddLevels);
788
HandleVerilogBlockLabel();
789
}
790
791
// Bail out if there are too many levels. Otherwise, the stack might overflow.
792
if (Line->Level > 300)
793
return nullptr;
794
795
if (MacroBlock && FormatTok->is(tok::l_paren))
796
parseParens();
797
798
size_t NbPreprocessorDirectives =
799
!parsingPPDirective() ? PreprocessorDirectives.size() : 0;
800
addUnwrappedLine();
801
size_t OpeningLineIndex =
802
CurrentLines->empty()
803
? (UnwrappedLine::kInvalidIndex)
804
: (CurrentLines->size() - 1 - NbPreprocessorDirectives);
805
806
// Whitesmiths is weird here. The brace needs to be indented for the namespace
807
// block, but the block itself may not be indented depending on the style
808
// settings. This allows the format to back up one level in those cases.
809
if (UnindentWhitesmithsBraces)
810
--Line->Level;
811
812
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
813
MustBeDeclaration);
814
if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
815
Line->Level += AddLevels;
816
817
FormatToken *IfLBrace = nullptr;
818
const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
819
820
if (eof())
821
return IfLBrace;
822
823
if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
824
: FormatTok->isNot(tok::r_brace)) {
825
Line->Level = InitialLevel;
826
FormatTok->setBlockKind(BK_Block);
827
return IfLBrace;
828
}
829
830
if (FormatTok->is(tok::r_brace)) {
831
FormatTok->setBlockKind(BK_Block);
832
if (Tok->is(TT_NamespaceLBrace))
833
FormatTok->setFinalizedType(TT_NamespaceRBrace);
834
}
835
836
const bool IsFunctionRBrace =
837
FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
838
839
auto RemoveBraces = [=]() mutable {
840
if (!SimpleBlock)
841
return false;
842
assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
843
assert(FormatTok->is(tok::r_brace));
844
const bool WrappedOpeningBrace = !Tok->Previous;
845
if (WrappedOpeningBrace && FollowedByComment)
846
return false;
847
const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
848
if (KeepBraces && !HasRequiredIfBraces)
849
return false;
850
if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
851
const FormatToken *Previous = Tokens->getPreviousToken();
852
assert(Previous);
853
if (Previous->is(tok::r_brace) && !Previous->Optional)
854
return false;
855
}
856
assert(!CurrentLines->empty());
857
auto &LastLine = CurrentLines->back();
858
if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
859
return false;
860
if (Tok->is(TT_ElseLBrace))
861
return true;
862
if (WrappedOpeningBrace) {
863
assert(Index > 0);
864
--Index; // The line above the wrapped l_brace.
865
Tok = nullptr;
866
}
867
return mightFitOnOneLine((*CurrentLines)[Index], Tok);
868
};
869
if (RemoveBraces()) {
870
Tok->MatchingParen = FormatTok;
871
FormatTok->MatchingParen = Tok;
872
}
873
874
size_t PPEndHash = computePPHash();
875
876
// Munch the closing brace.
877
nextToken(/*LevelDifference=*/-AddLevels);
878
879
// When this is a function block and there is an unnecessary semicolon
880
// afterwards then mark it as optional (so the RemoveSemi pass can get rid of
881
// it later).
882
if (Style.RemoveSemicolon && IsFunctionRBrace) {
883
while (FormatTok->is(tok::semi)) {
884
FormatTok->Optional = true;
885
nextToken();
886
}
887
}
888
889
HandleVerilogBlockLabel();
890
891
if (MacroBlock && FormatTok->is(tok::l_paren))
892
parseParens();
893
894
Line->Level = InitialLevel;
895
896
if (FormatTok->is(tok::kw_noexcept)) {
897
// A noexcept in a requires expression.
898
nextToken();
899
}
900
901
if (FormatTok->is(tok::arrow)) {
902
// Following the } or noexcept we can find a trailing return type arrow
903
// as part of an implicit conversion constraint.
904
nextToken();
905
parseStructuralElement();
906
}
907
908
if (MunchSemi && FormatTok->is(tok::semi))
909
nextToken();
910
911
if (PPStartHash == PPEndHash) {
912
Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
913
if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
914
// Update the opening line to add the forward reference as well
915
(*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
916
CurrentLines->size() - 1;
917
}
918
}
919
920
return IfLBrace;
921
}
922
923
static bool isGoogScope(const UnwrappedLine &Line) {
924
// FIXME: Closure-library specific stuff should not be hard-coded but be
925
// configurable.
926
if (Line.Tokens.size() < 4)
927
return false;
928
auto I = Line.Tokens.begin();
929
if (I->Tok->TokenText != "goog")
930
return false;
931
++I;
932
if (I->Tok->isNot(tok::period))
933
return false;
934
++I;
935
if (I->Tok->TokenText != "scope")
936
return false;
937
++I;
938
return I->Tok->is(tok::l_paren);
939
}
940
941
static bool isIIFE(const UnwrappedLine &Line,
942
const AdditionalKeywords &Keywords) {
943
// Look for the start of an immediately invoked anonymous function.
944
// https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
945
// This is commonly done in JavaScript to create a new, anonymous scope.
946
// Example: (function() { ... })()
947
if (Line.Tokens.size() < 3)
948
return false;
949
auto I = Line.Tokens.begin();
950
if (I->Tok->isNot(tok::l_paren))
951
return false;
952
++I;
953
if (I->Tok->isNot(Keywords.kw_function))
954
return false;
955
++I;
956
return I->Tok->is(tok::l_paren);
957
}
958
959
static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
960
const FormatToken &InitialToken) {
961
tok::TokenKind Kind = InitialToken.Tok.getKind();
962
if (InitialToken.is(TT_NamespaceMacro))
963
Kind = tok::kw_namespace;
964
965
switch (Kind) {
966
case tok::kw_namespace:
967
return Style.BraceWrapping.AfterNamespace;
968
case tok::kw_class:
969
return Style.BraceWrapping.AfterClass;
970
case tok::kw_union:
971
return Style.BraceWrapping.AfterUnion;
972
case tok::kw_struct:
973
return Style.BraceWrapping.AfterStruct;
974
case tok::kw_enum:
975
return Style.BraceWrapping.AfterEnum;
976
default:
977
return false;
978
}
979
}
980
981
void UnwrappedLineParser::parseChildBlock() {
982
assert(FormatTok->is(tok::l_brace));
983
FormatTok->setBlockKind(BK_Block);
984
const FormatToken *OpeningBrace = FormatTok;
985
nextToken();
986
{
987
bool SkipIndent = (Style.isJavaScript() &&
988
(isGoogScope(*Line) || isIIFE(*Line, Keywords)));
989
ScopedLineState LineState(*this);
990
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
991
/*MustBeDeclaration=*/false);
992
Line->Level += SkipIndent ? 0 : 1;
993
parseLevel(OpeningBrace);
994
flushComments(isOnNewLine(*FormatTok));
995
Line->Level -= SkipIndent ? 0 : 1;
996
}
997
nextToken();
998
}
999
1000
void UnwrappedLineParser::parsePPDirective() {
1001
assert(FormatTok->is(tok::hash) && "'#' expected");
1002
ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1003
1004
nextToken();
1005
1006
if (!FormatTok->Tok.getIdentifierInfo()) {
1007
parsePPUnknown();
1008
return;
1009
}
1010
1011
switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1012
case tok::pp_define:
1013
parsePPDefine();
1014
return;
1015
case tok::pp_if:
1016
parsePPIf(/*IfDef=*/false);
1017
break;
1018
case tok::pp_ifdef:
1019
case tok::pp_ifndef:
1020
parsePPIf(/*IfDef=*/true);
1021
break;
1022
case tok::pp_else:
1023
case tok::pp_elifdef:
1024
case tok::pp_elifndef:
1025
case tok::pp_elif:
1026
parsePPElse();
1027
break;
1028
case tok::pp_endif:
1029
parsePPEndIf();
1030
break;
1031
case tok::pp_pragma:
1032
parsePPPragma();
1033
break;
1034
default:
1035
parsePPUnknown();
1036
break;
1037
}
1038
}
1039
1040
void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1041
size_t Line = CurrentLines->size();
1042
if (CurrentLines == &PreprocessorDirectives)
1043
Line += Lines.size();
1044
1045
if (Unreachable ||
1046
(!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1047
PPStack.push_back({PP_Unreachable, Line});
1048
} else {
1049
PPStack.push_back({PP_Conditional, Line});
1050
}
1051
}
1052
1053
void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1054
++PPBranchLevel;
1055
assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1056
if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1057
PPLevelBranchIndex.push_back(0);
1058
PPLevelBranchCount.push_back(0);
1059
}
1060
PPChainBranchIndex.push(Unreachable ? -1 : 0);
1061
bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1062
conditionalCompilationCondition(Unreachable || Skip);
1063
}
1064
1065
void UnwrappedLineParser::conditionalCompilationAlternative() {
1066
if (!PPStack.empty())
1067
PPStack.pop_back();
1068
assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1069
if (!PPChainBranchIndex.empty())
1070
++PPChainBranchIndex.top();
1071
conditionalCompilationCondition(
1072
PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1073
PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1074
}
1075
1076
void UnwrappedLineParser::conditionalCompilationEnd() {
1077
assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1078
if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1079
if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1080
PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1081
}
1082
// Guard against #endif's without #if.
1083
if (PPBranchLevel > -1)
1084
--PPBranchLevel;
1085
if (!PPChainBranchIndex.empty())
1086
PPChainBranchIndex.pop();
1087
if (!PPStack.empty())
1088
PPStack.pop_back();
1089
}
1090
1091
void UnwrappedLineParser::parsePPIf(bool IfDef) {
1092
bool IfNDef = FormatTok->is(tok::pp_ifndef);
1093
nextToken();
1094
bool Unreachable = false;
1095
if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1096
Unreachable = true;
1097
if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1098
Unreachable = true;
1099
conditionalCompilationStart(Unreachable);
1100
FormatToken *IfCondition = FormatTok;
1101
// If there's a #ifndef on the first line, and the only lines before it are
1102
// comments, it could be an include guard.
1103
bool MaybeIncludeGuard = IfNDef;
1104
if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1105
for (auto &Line : Lines) {
1106
if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1107
MaybeIncludeGuard = false;
1108
IncludeGuard = IG_Rejected;
1109
break;
1110
}
1111
}
1112
}
1113
--PPBranchLevel;
1114
parsePPUnknown();
1115
++PPBranchLevel;
1116
if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1117
IncludeGuard = IG_IfNdefed;
1118
IncludeGuardToken = IfCondition;
1119
}
1120
}
1121
1122
void UnwrappedLineParser::parsePPElse() {
1123
// If a potential include guard has an #else, it's not an include guard.
1124
if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1125
IncludeGuard = IG_Rejected;
1126
// Don't crash when there is an #else without an #if.
1127
assert(PPBranchLevel >= -1);
1128
if (PPBranchLevel == -1)
1129
conditionalCompilationStart(/*Unreachable=*/true);
1130
conditionalCompilationAlternative();
1131
--PPBranchLevel;
1132
parsePPUnknown();
1133
++PPBranchLevel;
1134
}
1135
1136
void UnwrappedLineParser::parsePPEndIf() {
1137
conditionalCompilationEnd();
1138
parsePPUnknown();
1139
// If the #endif of a potential include guard is the last thing in the file,
1140
// then we found an include guard.
1141
if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1142
Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1143
IncludeGuard = IG_Found;
1144
}
1145
}
1146
1147
void UnwrappedLineParser::parsePPDefine() {
1148
nextToken();
1149
1150
if (!FormatTok->Tok.getIdentifierInfo()) {
1151
IncludeGuard = IG_Rejected;
1152
IncludeGuardToken = nullptr;
1153
parsePPUnknown();
1154
return;
1155
}
1156
1157
if (IncludeGuard == IG_IfNdefed &&
1158
IncludeGuardToken->TokenText == FormatTok->TokenText) {
1159
IncludeGuard = IG_Defined;
1160
IncludeGuardToken = nullptr;
1161
for (auto &Line : Lines) {
1162
if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1163
IncludeGuard = IG_Rejected;
1164
break;
1165
}
1166
}
1167
}
1168
1169
// In the context of a define, even keywords should be treated as normal
1170
// identifiers. Setting the kind to identifier is not enough, because we need
1171
// to treat additional keywords like __except as well, which are already
1172
// identifiers. Setting the identifier info to null interferes with include
1173
// guard processing above, and changes preprocessing nesting.
1174
FormatTok->Tok.setKind(tok::identifier);
1175
FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1176
nextToken();
1177
if (FormatTok->Tok.getKind() == tok::l_paren &&
1178
!FormatTok->hasWhitespaceBefore()) {
1179
parseParens();
1180
}
1181
if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1182
Line->Level += PPBranchLevel + 1;
1183
addUnwrappedLine();
1184
++Line->Level;
1185
1186
Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1187
assert((int)Line->PPLevel >= 0);
1188
Line->InMacroBody = true;
1189
1190
if (Style.SkipMacroDefinitionBody) {
1191
while (!eof()) {
1192
FormatTok->Finalized = true;
1193
FormatTok = Tokens->getNextToken();
1194
}
1195
addUnwrappedLine();
1196
return;
1197
}
1198
1199
// Errors during a preprocessor directive can only affect the layout of the
1200
// preprocessor directive, and thus we ignore them. An alternative approach
1201
// would be to use the same approach we use on the file level (no
1202
// re-indentation if there was a structural error) within the macro
1203
// definition.
1204
parseFile();
1205
}
1206
1207
void UnwrappedLineParser::parsePPPragma() {
1208
Line->InPragmaDirective = true;
1209
parsePPUnknown();
1210
}
1211
1212
void UnwrappedLineParser::parsePPUnknown() {
1213
do {
1214
nextToken();
1215
} while (!eof());
1216
if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1217
Line->Level += PPBranchLevel + 1;
1218
addUnwrappedLine();
1219
}
1220
1221
// Here we exclude certain tokens that are not usually the first token in an
1222
// unwrapped line. This is used in attempt to distinguish macro calls without
1223
// trailing semicolons from other constructs split to several lines.
1224
static bool tokenCanStartNewLine(const FormatToken &Tok) {
1225
// Semicolon can be a null-statement, l_square can be a start of a macro or
1226
// a C++11 attribute, but this doesn't seem to be common.
1227
return !Tok.isOneOf(tok::semi, tok::l_brace,
1228
// Tokens that can only be used as binary operators and a
1229
// part of overloaded operator names.
1230
tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1231
tok::less, tok::greater, tok::slash, tok::percent,
1232
tok::lessless, tok::greatergreater, tok::equal,
1233
tok::plusequal, tok::minusequal, tok::starequal,
1234
tok::slashequal, tok::percentequal, tok::ampequal,
1235
tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1236
tok::lesslessequal,
1237
// Colon is used in labels, base class lists, initializer
1238
// lists, range-based for loops, ternary operator, but
1239
// should never be the first token in an unwrapped line.
1240
tok::colon,
1241
// 'noexcept' is a trailing annotation.
1242
tok::kw_noexcept);
1243
}
1244
1245
static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1246
const FormatToken *FormatTok) {
1247
// FIXME: This returns true for C/C++ keywords like 'struct'.
1248
return FormatTok->is(tok::identifier) &&
1249
(!FormatTok->Tok.getIdentifierInfo() ||
1250
!FormatTok->isOneOf(
1251
Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1252
Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1253
Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1254
Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1255
Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1256
Keywords.kw_instanceof, Keywords.kw_interface,
1257
Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1258
}
1259
1260
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1261
const FormatToken *FormatTok) {
1262
return FormatTok->Tok.isLiteral() ||
1263
FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1264
mustBeJSIdent(Keywords, FormatTok);
1265
}
1266
1267
// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1268
// when encountered after a value (see mustBeJSIdentOrValue).
1269
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1270
const FormatToken *FormatTok) {
1271
return FormatTok->isOneOf(
1272
tok::kw_return, Keywords.kw_yield,
1273
// conditionals
1274
tok::kw_if, tok::kw_else,
1275
// loops
1276
tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1277
// switch/case
1278
tok::kw_switch, tok::kw_case,
1279
// exceptions
1280
tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1281
// declaration
1282
tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1283
Keywords.kw_async, Keywords.kw_function,
1284
// import/export
1285
Keywords.kw_import, tok::kw_export);
1286
}
1287
1288
// Checks whether a token is a type in K&R C (aka C78).
1289
static bool isC78Type(const FormatToken &Tok) {
1290
return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1291
tok::kw_unsigned, tok::kw_float, tok::kw_double,
1292
tok::identifier);
1293
}
1294
1295
// This function checks whether a token starts the first parameter declaration
1296
// in a K&R C (aka C78) function definition, e.g.:
1297
// int f(a, b)
1298
// short a, b;
1299
// {
1300
// return a + b;
1301
// }
1302
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1303
const FormatToken *FuncName) {
1304
assert(Tok);
1305
assert(Next);
1306
assert(FuncName);
1307
1308
if (FuncName->isNot(tok::identifier))
1309
return false;
1310
1311
const FormatToken *Prev = FuncName->Previous;
1312
if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1313
return false;
1314
1315
if (!isC78Type(*Tok) &&
1316
!Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1317
return false;
1318
}
1319
1320
if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1321
return false;
1322
1323
Tok = Tok->Previous;
1324
if (!Tok || Tok->isNot(tok::r_paren))
1325
return false;
1326
1327
Tok = Tok->Previous;
1328
if (!Tok || Tok->isNot(tok::identifier))
1329
return false;
1330
1331
return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1332
}
1333
1334
bool UnwrappedLineParser::parseModuleImport() {
1335
assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1336
1337
if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1338
!Token->Tok.getIdentifierInfo() &&
1339
!Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1340
return false;
1341
}
1342
1343
nextToken();
1344
while (!eof()) {
1345
if (FormatTok->is(tok::colon)) {
1346
FormatTok->setFinalizedType(TT_ModulePartitionColon);
1347
}
1348
// Handle import <foo/bar.h> as we would an include statement.
1349
else if (FormatTok->is(tok::less)) {
1350
nextToken();
1351
while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1352
// Mark tokens up to the trailing line comments as implicit string
1353
// literals.
1354
if (FormatTok->isNot(tok::comment) &&
1355
!FormatTok->TokenText.starts_with("//")) {
1356
FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1357
}
1358
nextToken();
1359
}
1360
}
1361
if (FormatTok->is(tok::semi)) {
1362
nextToken();
1363
break;
1364
}
1365
nextToken();
1366
}
1367
1368
addUnwrappedLine();
1369
return true;
1370
}
1371
1372
// readTokenWithJavaScriptASI reads the next token and terminates the current
1373
// line if JavaScript Automatic Semicolon Insertion must
1374
// happen between the current token and the next token.
1375
//
1376
// This method is conservative - it cannot cover all edge cases of JavaScript,
1377
// but only aims to correctly handle certain well known cases. It *must not*
1378
// return true in speculative cases.
1379
void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1380
FormatToken *Previous = FormatTok;
1381
readToken();
1382
FormatToken *Next = FormatTok;
1383
1384
bool IsOnSameLine =
1385
CommentsBeforeNextToken.empty()
1386
? Next->NewlinesBefore == 0
1387
: CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1388
if (IsOnSameLine)
1389
return;
1390
1391
bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1392
bool PreviousStartsTemplateExpr =
1393
Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1394
if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1395
// If the line contains an '@' sign, the previous token might be an
1396
// annotation, which can precede another identifier/value.
1397
bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1398
return LineNode.Tok->is(tok::at);
1399
});
1400
if (HasAt)
1401
return;
1402
}
1403
if (Next->is(tok::exclaim) && PreviousMustBeValue)
1404
return addUnwrappedLine();
1405
bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1406
bool NextEndsTemplateExpr =
1407
Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1408
if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1409
(PreviousMustBeValue ||
1410
Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1411
tok::minusminus))) {
1412
return addUnwrappedLine();
1413
}
1414
if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1415
isJSDeclOrStmt(Keywords, Next)) {
1416
return addUnwrappedLine();
1417
}
1418
}
1419
1420
void UnwrappedLineParser::parseStructuralElement(
1421
const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1422
FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1423
if (Style.Language == FormatStyle::LK_TableGen &&
1424
FormatTok->is(tok::pp_include)) {
1425
nextToken();
1426
if (FormatTok->is(tok::string_literal))
1427
nextToken();
1428
addUnwrappedLine();
1429
return;
1430
}
1431
1432
if (IsCpp) {
1433
while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1434
}
1435
} else if (Style.isVerilog()) {
1436
if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1437
parseForOrWhileLoop(/*HasParens=*/false);
1438
return;
1439
}
1440
if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1441
parseForOrWhileLoop();
1442
return;
1443
}
1444
if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1445
Keywords.kw_assume, Keywords.kw_cover)) {
1446
parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1447
return;
1448
}
1449
1450
// Skip things that can exist before keywords like 'if' and 'case'.
1451
while (true) {
1452
if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1453
Keywords.kw_unique0)) {
1454
nextToken();
1455
} else if (FormatTok->is(tok::l_paren) &&
1456
Tokens->peekNextToken()->is(tok::star)) {
1457
parseParens();
1458
} else {
1459
break;
1460
}
1461
}
1462
}
1463
1464
// Tokens that only make sense at the beginning of a line.
1465
if (FormatTok->isAccessSpecifierKeyword()) {
1466
if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1467
Style.isCSharp()) {
1468
nextToken();
1469
} else {
1470
parseAccessSpecifier();
1471
}
1472
return;
1473
}
1474
switch (FormatTok->Tok.getKind()) {
1475
case tok::kw_asm:
1476
nextToken();
1477
if (FormatTok->is(tok::l_brace)) {
1478
FormatTok->setFinalizedType(TT_InlineASMBrace);
1479
nextToken();
1480
while (FormatTok && !eof()) {
1481
if (FormatTok->is(tok::r_brace)) {
1482
FormatTok->setFinalizedType(TT_InlineASMBrace);
1483
nextToken();
1484
addUnwrappedLine();
1485
break;
1486
}
1487
FormatTok->Finalized = true;
1488
nextToken();
1489
}
1490
}
1491
break;
1492
case tok::kw_namespace:
1493
parseNamespace();
1494
return;
1495
case tok::kw_if: {
1496
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497
// field/method declaration.
1498
break;
1499
}
1500
FormatToken *Tok = parseIfThenElse(IfKind);
1501
if (IfLeftBrace)
1502
*IfLeftBrace = Tok;
1503
return;
1504
}
1505
case tok::kw_for:
1506
case tok::kw_while:
1507
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1508
// field/method declaration.
1509
break;
1510
}
1511
parseForOrWhileLoop();
1512
return;
1513
case tok::kw_do:
1514
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1515
// field/method declaration.
1516
break;
1517
}
1518
parseDoWhile();
1519
if (HasDoWhile)
1520
*HasDoWhile = true;
1521
return;
1522
case tok::kw_switch:
1523
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524
// 'switch: string' field declaration.
1525
break;
1526
}
1527
parseSwitch(/*IsExpr=*/false);
1528
return;
1529
case tok::kw_default: {
1530
// In Verilog default along with other labels are handled in the next loop.
1531
if (Style.isVerilog())
1532
break;
1533
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534
// 'default: string' field declaration.
1535
break;
1536
}
1537
auto *Default = FormatTok;
1538
nextToken();
1539
if (FormatTok->is(tok::colon)) {
1540
FormatTok->setFinalizedType(TT_CaseLabelColon);
1541
parseLabel();
1542
return;
1543
}
1544
if (FormatTok->is(tok::arrow)) {
1545
FormatTok->setFinalizedType(TT_CaseLabelArrow);
1546
Default->setFinalizedType(TT_SwitchExpressionLabel);
1547
parseLabel();
1548
return;
1549
}
1550
// e.g. "default void f() {}" in a Java interface.
1551
break;
1552
}
1553
case tok::kw_case:
1554
// Proto: there are no switch/case statements.
1555
if (Style.Language == FormatStyle::LK_Proto) {
1556
nextToken();
1557
return;
1558
}
1559
if (Style.isVerilog()) {
1560
parseBlock();
1561
addUnwrappedLine();
1562
return;
1563
}
1564
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1565
// 'case: string' field declaration.
1566
nextToken();
1567
break;
1568
}
1569
parseCaseLabel();
1570
return;
1571
case tok::kw_try:
1572
case tok::kw___try:
1573
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1574
// field/method declaration.
1575
break;
1576
}
1577
parseTryCatch();
1578
return;
1579
case tok::kw_extern:
1580
nextToken();
1581
if (Style.isVerilog()) {
1582
// In Verilog and extern module declaration looks like a start of module.
1583
// But there is no body and endmodule. So we handle it separately.
1584
if (Keywords.isVerilogHierarchy(*FormatTok)) {
1585
parseVerilogHierarchyHeader();
1586
return;
1587
}
1588
} else if (FormatTok->is(tok::string_literal)) {
1589
nextToken();
1590
if (FormatTok->is(tok::l_brace)) {
1591
if (Style.BraceWrapping.AfterExternBlock)
1592
addUnwrappedLine();
1593
// Either we indent or for backwards compatibility we follow the
1594
// AfterExternBlock style.
1595
unsigned AddLevels =
1596
(Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1597
(Style.BraceWrapping.AfterExternBlock &&
1598
Style.IndentExternBlock ==
1599
FormatStyle::IEBS_AfterExternBlock)
1600
? 1u
1601
: 0u;
1602
parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1603
addUnwrappedLine();
1604
return;
1605
}
1606
}
1607
break;
1608
case tok::kw_export:
1609
if (Style.isJavaScript()) {
1610
parseJavaScriptEs6ImportExport();
1611
return;
1612
}
1613
if (IsCpp) {
1614
nextToken();
1615
if (FormatTok->is(tok::kw_namespace)) {
1616
parseNamespace();
1617
return;
1618
}
1619
if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1620
return;
1621
}
1622
break;
1623
case tok::kw_inline:
1624
nextToken();
1625
if (FormatTok->is(tok::kw_namespace)) {
1626
parseNamespace();
1627
return;
1628
}
1629
break;
1630
case tok::identifier:
1631
if (FormatTok->is(TT_ForEachMacro)) {
1632
parseForOrWhileLoop();
1633
return;
1634
}
1635
if (FormatTok->is(TT_MacroBlockBegin)) {
1636
parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1637
/*MunchSemi=*/false);
1638
return;
1639
}
1640
if (FormatTok->is(Keywords.kw_import)) {
1641
if (Style.isJavaScript()) {
1642
parseJavaScriptEs6ImportExport();
1643
return;
1644
}
1645
if (Style.Language == FormatStyle::LK_Proto) {
1646
nextToken();
1647
if (FormatTok->is(tok::kw_public))
1648
nextToken();
1649
if (FormatTok->isNot(tok::string_literal))
1650
return;
1651
nextToken();
1652
if (FormatTok->is(tok::semi))
1653
nextToken();
1654
addUnwrappedLine();
1655
return;
1656
}
1657
if (IsCpp && parseModuleImport())
1658
return;
1659
}
1660
if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1661
Keywords.kw_slots, Keywords.kw_qslots)) {
1662
nextToken();
1663
if (FormatTok->is(tok::colon)) {
1664
nextToken();
1665
addUnwrappedLine();
1666
return;
1667
}
1668
}
1669
if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1670
parseStatementMacro();
1671
return;
1672
}
1673
if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1674
parseNamespace();
1675
return;
1676
}
1677
// In Verilog labels can be any expression, so we don't do them here.
1678
// JS doesn't have macros, and within classes colons indicate fields, not
1679
// labels.
1680
// TableGen doesn't have labels.
1681
if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1682
Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1683
nextToken();
1684
if (!Line->InMacroBody || CurrentLines->size() > 1)
1685
Line->Tokens.begin()->Tok->MustBreakBefore = true;
1686
FormatTok->setFinalizedType(TT_GotoLabelColon);
1687
parseLabel(!Style.IndentGotoLabels);
1688
if (HasLabel)
1689
*HasLabel = true;
1690
return;
1691
}
1692
// In all other cases, parse the declaration.
1693
break;
1694
default:
1695
break;
1696
}
1697
1698
for (const bool InRequiresExpression =
1699
OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1700
!eof();) {
1701
if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1702
if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1703
Next && Next->isBinaryOperator()) {
1704
FormatTok->Tok.setKind(tok::identifier);
1705
}
1706
}
1707
const FormatToken *Previous = FormatTok->Previous;
1708
switch (FormatTok->Tok.getKind()) {
1709
case tok::at:
1710
nextToken();
1711
if (FormatTok->is(tok::l_brace)) {
1712
nextToken();
1713
parseBracedList();
1714
break;
1715
} else if (Style.Language == FormatStyle::LK_Java &&
1716
FormatTok->is(Keywords.kw_interface)) {
1717
nextToken();
1718
break;
1719
}
1720
switch (FormatTok->Tok.getObjCKeywordID()) {
1721
case tok::objc_public:
1722
case tok::objc_protected:
1723
case tok::objc_package:
1724
case tok::objc_private:
1725
return parseAccessSpecifier();
1726
case tok::objc_interface:
1727
case tok::objc_implementation:
1728
return parseObjCInterfaceOrImplementation();
1729
case tok::objc_protocol:
1730
if (parseObjCProtocol())
1731
return;
1732
break;
1733
case tok::objc_end:
1734
return; // Handled by the caller.
1735
case tok::objc_optional:
1736
case tok::objc_required:
1737
nextToken();
1738
addUnwrappedLine();
1739
return;
1740
case tok::objc_autoreleasepool:
1741
nextToken();
1742
if (FormatTok->is(tok::l_brace)) {
1743
if (Style.BraceWrapping.AfterControlStatement ==
1744
FormatStyle::BWACS_Always) {
1745
addUnwrappedLine();
1746
}
1747
parseBlock();
1748
}
1749
addUnwrappedLine();
1750
return;
1751
case tok::objc_synchronized:
1752
nextToken();
1753
if (FormatTok->is(tok::l_paren)) {
1754
// Skip synchronization object
1755
parseParens();
1756
}
1757
if (FormatTok->is(tok::l_brace)) {
1758
if (Style.BraceWrapping.AfterControlStatement ==
1759
FormatStyle::BWACS_Always) {
1760
addUnwrappedLine();
1761
}
1762
parseBlock();
1763
}
1764
addUnwrappedLine();
1765
return;
1766
case tok::objc_try:
1767
// This branch isn't strictly necessary (the kw_try case below would
1768
// do this too after the tok::at is parsed above). But be explicit.
1769
parseTryCatch();
1770
return;
1771
default:
1772
break;
1773
}
1774
break;
1775
case tok::kw_requires: {
1776
if (IsCpp) {
1777
bool ParsedClause = parseRequires();
1778
if (ParsedClause)
1779
return;
1780
} else {
1781
nextToken();
1782
}
1783
break;
1784
}
1785
case tok::kw_enum:
1786
// Ignore if this is part of "template <enum ..." or "... -> enum" or
1787
// "template <..., enum ...>".
1788
if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1789
nextToken();
1790
break;
1791
}
1792
1793
// parseEnum falls through and does not yet add an unwrapped line as an
1794
// enum definition can start a structural element.
1795
if (!parseEnum())
1796
break;
1797
// This only applies to C++ and Verilog.
1798
if (!IsCpp && !Style.isVerilog()) {
1799
addUnwrappedLine();
1800
return;
1801
}
1802
break;
1803
case tok::kw_typedef:
1804
nextToken();
1805
if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1806
Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1807
Keywords.kw_CF_CLOSED_ENUM,
1808
Keywords.kw_NS_CLOSED_ENUM)) {
1809
parseEnum();
1810
}
1811
break;
1812
case tok::kw_class:
1813
if (Style.isVerilog()) {
1814
parseBlock();
1815
addUnwrappedLine();
1816
return;
1817
}
1818
if (Style.isTableGen()) {
1819
// Do nothing special. In this case the l_brace becomes FunctionLBrace.
1820
// This is same as def and so on.
1821
nextToken();
1822
break;
1823
}
1824
[[fallthrough]];
1825
case tok::kw_struct:
1826
case tok::kw_union:
1827
if (parseStructLike())
1828
return;
1829
break;
1830
case tok::kw_decltype:
1831
nextToken();
1832
if (FormatTok->is(tok::l_paren)) {
1833
parseParens();
1834
assert(FormatTok->Previous);
1835
if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1836
tok::l_paren)) {
1837
Line->SeenDecltypeAuto = true;
1838
}
1839
}
1840
break;
1841
case tok::period:
1842
nextToken();
1843
// In Java, classes have an implicit static member "class".
1844
if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1845
FormatTok->is(tok::kw_class)) {
1846
nextToken();
1847
}
1848
if (Style.isJavaScript() && FormatTok &&
1849
FormatTok->Tok.getIdentifierInfo()) {
1850
// JavaScript only has pseudo keywords, all keywords are allowed to
1851
// appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1852
nextToken();
1853
}
1854
break;
1855
case tok::semi:
1856
nextToken();
1857
addUnwrappedLine();
1858
return;
1859
case tok::r_brace:
1860
addUnwrappedLine();
1861
return;
1862
case tok::l_paren: {
1863
parseParens();
1864
// Break the unwrapped line if a K&R C function definition has a parameter
1865
// declaration.
1866
if (OpeningBrace || !IsCpp || !Previous || eof())
1867
break;
1868
if (isC78ParameterDecl(FormatTok,
1869
Tokens->peekNextToken(/*SkipComment=*/true),
1870
Previous)) {
1871
addUnwrappedLine();
1872
return;
1873
}
1874
break;
1875
}
1876
case tok::kw_operator:
1877
nextToken();
1878
if (FormatTok->isBinaryOperator())
1879
nextToken();
1880
break;
1881
case tok::caret:
1882
nextToken();
1883
// Block return type.
1884
if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1885
nextToken();
1886
// Return types: pointers are ok too.
1887
while (FormatTok->is(tok::star))
1888
nextToken();
1889
}
1890
// Block argument list.
1891
if (FormatTok->is(tok::l_paren))
1892
parseParens();
1893
// Block body.
1894
if (FormatTok->is(tok::l_brace))
1895
parseChildBlock();
1896
break;
1897
case tok::l_brace:
1898
if (InRequiresExpression)
1899
FormatTok->setFinalizedType(TT_BracedListLBrace);
1900
if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1901
IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1902
// A block outside of parentheses must be the last part of a
1903
// structural element.
1904
// FIXME: Figure out cases where this is not true, and add projections
1905
// for them (the one we know is missing are lambdas).
1906
if (Style.Language == FormatStyle::LK_Java &&
1907
Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1908
// If necessary, we could set the type to something different than
1909
// TT_FunctionLBrace.
1910
if (Style.BraceWrapping.AfterControlStatement ==
1911
FormatStyle::BWACS_Always) {
1912
addUnwrappedLine();
1913
}
1914
} else if (Style.BraceWrapping.AfterFunction) {
1915
addUnwrappedLine();
1916
}
1917
if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1918
FormatTok->setFinalizedType(TT_FunctionLBrace);
1919
parseBlock();
1920
IsDecltypeAutoFunction = false;
1921
addUnwrappedLine();
1922
return;
1923
}
1924
// Otherwise this was a braced init list, and the structural
1925
// element continues.
1926
break;
1927
case tok::kw_try:
1928
if (Style.isJavaScript() && Line->MustBeDeclaration) {
1929
// field/method declaration.
1930
nextToken();
1931
break;
1932
}
1933
// We arrive here when parsing function-try blocks.
1934
if (Style.BraceWrapping.AfterFunction)
1935
addUnwrappedLine();
1936
parseTryCatch();
1937
return;
1938
case tok::identifier: {
1939
if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1940
Line->MustBeDeclaration) {
1941
addUnwrappedLine();
1942
parseCSharpGenericTypeConstraint();
1943
break;
1944
}
1945
if (FormatTok->is(TT_MacroBlockEnd)) {
1946
addUnwrappedLine();
1947
return;
1948
}
1949
1950
// Function declarations (as opposed to function expressions) are parsed
1951
// on their own unwrapped line by continuing this loop. Function
1952
// expressions (functions that are not on their own line) must not create
1953
// a new unwrapped line, so they are special cased below.
1954
size_t TokenCount = Line->Tokens.size();
1955
if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1956
(TokenCount > 1 ||
1957
(TokenCount == 1 &&
1958
Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1959
tryToParseJSFunction();
1960
break;
1961
}
1962
if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1963
FormatTok->is(Keywords.kw_interface)) {
1964
if (Style.isJavaScript()) {
1965
// In JavaScript/TypeScript, "interface" can be used as a standalone
1966
// identifier, e.g. in `var interface = 1;`. If "interface" is
1967
// followed by another identifier, it is very like to be an actual
1968
// interface declaration.
1969
unsigned StoredPosition = Tokens->getPosition();
1970
FormatToken *Next = Tokens->getNextToken();
1971
FormatTok = Tokens->setPosition(StoredPosition);
1972
if (!mustBeJSIdent(Keywords, Next)) {
1973
nextToken();
1974
break;
1975
}
1976
}
1977
parseRecord();
1978
addUnwrappedLine();
1979
return;
1980
}
1981
1982
if (Style.isVerilog()) {
1983
if (FormatTok->is(Keywords.kw_table)) {
1984
parseVerilogTable();
1985
return;
1986
}
1987
if (Keywords.isVerilogBegin(*FormatTok) ||
1988
Keywords.isVerilogHierarchy(*FormatTok)) {
1989
parseBlock();
1990
addUnwrappedLine();
1991
return;
1992
}
1993
}
1994
1995
if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
1996
if (parseStructLike())
1997
return;
1998
break;
1999
}
2000
2001
if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2002
parseStatementMacro();
2003
return;
2004
}
2005
2006
// See if the following token should start a new unwrapped line.
2007
StringRef Text = FormatTok->TokenText;
2008
2009
FormatToken *PreviousToken = FormatTok;
2010
nextToken();
2011
2012
// JS doesn't have macros, and within classes colons indicate fields, not
2013
// labels.
2014
if (Style.isJavaScript())
2015
break;
2016
2017
auto OneTokenSoFar = [&]() {
2018
auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2019
while (I != E && I->Tok->is(tok::comment))
2020
++I;
2021
if (Style.isVerilog())
2022
while (I != E && I->Tok->is(tok::hash))
2023
++I;
2024
return I != E && (++I == E);
2025
};
2026
if (OneTokenSoFar()) {
2027
// Recognize function-like macro usages without trailing semicolon as
2028
// well as free-standing macros like Q_OBJECT.
2029
bool FunctionLike = FormatTok->is(tok::l_paren);
2030
if (FunctionLike)
2031
parseParens();
2032
2033
bool FollowedByNewline =
2034
CommentsBeforeNextToken.empty()
2035
? FormatTok->NewlinesBefore > 0
2036
: CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2037
2038
if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2039
tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2040
if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2041
PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2042
addUnwrappedLine();
2043
return;
2044
}
2045
}
2046
break;
2047
}
2048
case tok::equal:
2049
if ((Style.isJavaScript() || Style.isCSharp()) &&
2050
FormatTok->is(TT_FatArrow)) {
2051
tryToParseChildBlock();
2052
break;
2053
}
2054
2055
nextToken();
2056
if (FormatTok->is(tok::l_brace)) {
2057
// Block kind should probably be set to BK_BracedInit for any language.
2058
// C# needs this change to ensure that array initialisers and object
2059
// initialisers are indented the same way.
2060
if (Style.isCSharp())
2061
FormatTok->setBlockKind(BK_BracedInit);
2062
// TableGen's defset statement has syntax of the form,
2063
// `defset <type> <name> = { <statement>... }`
2064
if (Style.isTableGen() &&
2065
Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2066
FormatTok->setFinalizedType(TT_FunctionLBrace);
2067
parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2068
/*MunchSemi=*/false);
2069
addUnwrappedLine();
2070
break;
2071
}
2072
nextToken();
2073
parseBracedList();
2074
} else if (Style.Language == FormatStyle::LK_Proto &&
2075
FormatTok->is(tok::less)) {
2076
nextToken();
2077
parseBracedList(/*IsAngleBracket=*/true);
2078
}
2079
break;
2080
case tok::l_square:
2081
parseSquare();
2082
break;
2083
case tok::kw_new:
2084
parseNew();
2085
break;
2086
case tok::kw_switch:
2087
if (Style.Language == FormatStyle::LK_Java)
2088
parseSwitch(/*IsExpr=*/true);
2089
else
2090
nextToken();
2091
break;
2092
case tok::kw_case:
2093
// Proto: there are no switch/case statements.
2094
if (Style.Language == FormatStyle::LK_Proto) {
2095
nextToken();
2096
return;
2097
}
2098
// In Verilog switch is called case.
2099
if (Style.isVerilog()) {
2100
parseBlock();
2101
addUnwrappedLine();
2102
return;
2103
}
2104
if (Style.isJavaScript() && Line->MustBeDeclaration) {
2105
// 'case: string' field declaration.
2106
nextToken();
2107
break;
2108
}
2109
parseCaseLabel();
2110
break;
2111
case tok::kw_default:
2112
nextToken();
2113
if (Style.isVerilog()) {
2114
if (FormatTok->is(tok::colon)) {
2115
// The label will be handled in the next iteration.
2116
break;
2117
}
2118
if (FormatTok->is(Keywords.kw_clocking)) {
2119
// A default clocking block.
2120
parseBlock();
2121
addUnwrappedLine();
2122
return;
2123
}
2124
parseVerilogCaseLabel();
2125
return;
2126
}
2127
break;
2128
case tok::colon:
2129
nextToken();
2130
if (Style.isVerilog()) {
2131
parseVerilogCaseLabel();
2132
return;
2133
}
2134
break;
2135
case tok::greater:
2136
nextToken();
2137
if (FormatTok->is(tok::l_brace))
2138
FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2139
break;
2140
default:
2141
nextToken();
2142
break;
2143
}
2144
}
2145
}
2146
2147
bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2148
assert(FormatTok->is(tok::l_brace));
2149
if (!Style.isCSharp())
2150
return false;
2151
// See if it's a property accessor.
2152
if (FormatTok->Previous->isNot(tok::identifier))
2153
return false;
2154
2155
// See if we are inside a property accessor.
2156
//
2157
// Record the current tokenPosition so that we can advance and
2158
// reset the current token. `Next` is not set yet so we need
2159
// another way to advance along the token stream.
2160
unsigned int StoredPosition = Tokens->getPosition();
2161
FormatToken *Tok = Tokens->getNextToken();
2162
2163
// A trivial property accessor is of the form:
2164
// { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2165
// Track these as they do not require line breaks to be introduced.
2166
bool HasSpecialAccessor = false;
2167
bool IsTrivialPropertyAccessor = true;
2168
while (!eof()) {
2169
if (Tok->isAccessSpecifierKeyword() ||
2170
Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get,
2171
Keywords.kw_init, Keywords.kw_set)) {
2172
if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2173
HasSpecialAccessor = true;
2174
Tok = Tokens->getNextToken();
2175
continue;
2176
}
2177
if (Tok->isNot(tok::r_brace))
2178
IsTrivialPropertyAccessor = false;
2179
break;
2180
}
2181
2182
if (!HasSpecialAccessor) {
2183
Tokens->setPosition(StoredPosition);
2184
return false;
2185
}
2186
2187
// Try to parse the property accessor:
2188
// https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2189
Tokens->setPosition(StoredPosition);
2190
if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2191
addUnwrappedLine();
2192
nextToken();
2193
do {
2194
switch (FormatTok->Tok.getKind()) {
2195
case tok::r_brace:
2196
nextToken();
2197
if (FormatTok->is(tok::equal)) {
2198
while (!eof() && FormatTok->isNot(tok::semi))
2199
nextToken();
2200
nextToken();
2201
}
2202
addUnwrappedLine();
2203
return true;
2204
case tok::l_brace:
2205
++Line->Level;
2206
parseBlock(/*MustBeDeclaration=*/true);
2207
addUnwrappedLine();
2208
--Line->Level;
2209
break;
2210
case tok::equal:
2211
if (FormatTok->is(TT_FatArrow)) {
2212
++Line->Level;
2213
do {
2214
nextToken();
2215
} while (!eof() && FormatTok->isNot(tok::semi));
2216
nextToken();
2217
addUnwrappedLine();
2218
--Line->Level;
2219
break;
2220
}
2221
nextToken();
2222
break;
2223
default:
2224
if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2225
Keywords.kw_set) &&
2226
!IsTrivialPropertyAccessor) {
2227
// Non-trivial get/set needs to be on its own line.
2228
addUnwrappedLine();
2229
}
2230
nextToken();
2231
}
2232
} while (!eof());
2233
2234
// Unreachable for well-formed code (paired '{' and '}').
2235
return true;
2236
}
2237
2238
bool UnwrappedLineParser::tryToParseLambda() {
2239
assert(FormatTok->is(tok::l_square));
2240
if (!IsCpp) {
2241
nextToken();
2242
return false;
2243
}
2244
FormatToken &LSquare = *FormatTok;
2245
if (!tryToParseLambdaIntroducer())
2246
return false;
2247
2248
bool SeenArrow = false;
2249
bool InTemplateParameterList = false;
2250
2251
while (FormatTok->isNot(tok::l_brace)) {
2252
if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2253
nextToken();
2254
continue;
2255
}
2256
switch (FormatTok->Tok.getKind()) {
2257
case tok::l_brace:
2258
break;
2259
case tok::l_paren:
2260
parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2261
break;
2262
case tok::l_square:
2263
parseSquare();
2264
break;
2265
case tok::less:
2266
assert(FormatTok->Previous);
2267
if (FormatTok->Previous->is(tok::r_square))
2268
InTemplateParameterList = true;
2269
nextToken();
2270
break;
2271
case tok::kw_auto:
2272
case tok::kw_class:
2273
case tok::kw_struct:
2274
case tok::kw_union:
2275
case tok::kw_template:
2276
case tok::kw_typename:
2277
case tok::amp:
2278
case tok::star:
2279
case tok::kw_const:
2280
case tok::kw_constexpr:
2281
case tok::kw_consteval:
2282
case tok::comma:
2283
case tok::greater:
2284
case tok::identifier:
2285
case tok::numeric_constant:
2286
case tok::coloncolon:
2287
case tok::kw_mutable:
2288
case tok::kw_noexcept:
2289
case tok::kw_static:
2290
nextToken();
2291
break;
2292
// Specialization of a template with an integer parameter can contain
2293
// arithmetic, logical, comparison and ternary operators.
2294
//
2295
// FIXME: This also accepts sequences of operators that are not in the scope
2296
// of a template argument list.
2297
//
2298
// In a C++ lambda a template type can only occur after an arrow. We use
2299
// this as an heuristic to distinguish between Objective-C expressions
2300
// followed by an `a->b` expression, such as:
2301
// ([obj func:arg] + a->b)
2302
// Otherwise the code below would parse as a lambda.
2303
case tok::plus:
2304
case tok::minus:
2305
case tok::exclaim:
2306
case tok::tilde:
2307
case tok::slash:
2308
case tok::percent:
2309
case tok::lessless:
2310
case tok::pipe:
2311
case tok::pipepipe:
2312
case tok::ampamp:
2313
case tok::caret:
2314
case tok::equalequal:
2315
case tok::exclaimequal:
2316
case tok::greaterequal:
2317
case tok::lessequal:
2318
case tok::question:
2319
case tok::colon:
2320
case tok::ellipsis:
2321
case tok::kw_true:
2322
case tok::kw_false:
2323
if (SeenArrow || InTemplateParameterList) {
2324
nextToken();
2325
break;
2326
}
2327
return true;
2328
case tok::arrow:
2329
// This might or might not actually be a lambda arrow (this could be an
2330
// ObjC method invocation followed by a dereferencing arrow). We might
2331
// reset this back to TT_Unknown in TokenAnnotator.
2332
FormatTok->setFinalizedType(TT_LambdaArrow);
2333
SeenArrow = true;
2334
nextToken();
2335
break;
2336
case tok::kw_requires: {
2337
auto *RequiresToken = FormatTok;
2338
nextToken();
2339
parseRequiresClause(RequiresToken);
2340
break;
2341
}
2342
case tok::equal:
2343
if (!InTemplateParameterList)
2344
return true;
2345
nextToken();
2346
break;
2347
default:
2348
return true;
2349
}
2350
}
2351
2352
FormatTok->setFinalizedType(TT_LambdaLBrace);
2353
LSquare.setFinalizedType(TT_LambdaLSquare);
2354
2355
NestedLambdas.push_back(Line->SeenDecltypeAuto);
2356
parseChildBlock();
2357
assert(!NestedLambdas.empty());
2358
NestedLambdas.pop_back();
2359
2360
return true;
2361
}
2362
2363
bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2364
const FormatToken *Previous = FormatTok->Previous;
2365
const FormatToken *LeftSquare = FormatTok;
2366
nextToken();
2367
if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2368
!Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2369
tok::kw_co_yield, tok::kw_co_return)) ||
2370
Previous->closesScope())) ||
2371
LeftSquare->isCppStructuredBinding(IsCpp)) {
2372
return false;
2373
}
2374
if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2375
return false;
2376
if (FormatTok->is(tok::r_square)) {
2377
const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2378
if (Next->is(tok::greater))
2379
return false;
2380
}
2381
parseSquare(/*LambdaIntroducer=*/true);
2382
return true;
2383
}
2384
2385
void UnwrappedLineParser::tryToParseJSFunction() {
2386
assert(FormatTok->is(Keywords.kw_function));
2387
if (FormatTok->is(Keywords.kw_async))
2388
nextToken();
2389
// Consume "function".
2390
nextToken();
2391
2392
// Consume * (generator function). Treat it like C++'s overloaded operators.
2393
if (FormatTok->is(tok::star)) {
2394
FormatTok->setFinalizedType(TT_OverloadedOperator);
2395
nextToken();
2396
}
2397
2398
// Consume function name.
2399
if (FormatTok->is(tok::identifier))
2400
nextToken();
2401
2402
if (FormatTok->isNot(tok::l_paren))
2403
return;
2404
2405
// Parse formal parameter list.
2406
parseParens();
2407
2408
if (FormatTok->is(tok::colon)) {
2409
// Parse a type definition.
2410
nextToken();
2411
2412
// Eat the type declaration. For braced inline object types, balance braces,
2413
// otherwise just parse until finding an l_brace for the function body.
2414
if (FormatTok->is(tok::l_brace))
2415
tryToParseBracedList();
2416
else
2417
while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2418
nextToken();
2419
}
2420
2421
if (FormatTok->is(tok::semi))
2422
return;
2423
2424
parseChildBlock();
2425
}
2426
2427
bool UnwrappedLineParser::tryToParseBracedList() {
2428
if (FormatTok->is(BK_Unknown))
2429
calculateBraceTypes();
2430
assert(FormatTok->isNot(BK_Unknown));
2431
if (FormatTok->is(BK_Block))
2432
return false;
2433
nextToken();
2434
parseBracedList();
2435
return true;
2436
}
2437
2438
bool UnwrappedLineParser::tryToParseChildBlock() {
2439
assert(Style.isJavaScript() || Style.isCSharp());
2440
assert(FormatTok->is(TT_FatArrow));
2441
// Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2442
// They always start an expression or a child block if followed by a curly
2443
// brace.
2444
nextToken();
2445
if (FormatTok->isNot(tok::l_brace))
2446
return false;
2447
parseChildBlock();
2448
return true;
2449
}
2450
2451
bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2452
assert(!IsAngleBracket || !IsEnum);
2453
bool HasError = false;
2454
2455
// FIXME: Once we have an expression parser in the UnwrappedLineParser,
2456
// replace this by using parseAssignmentExpression() inside.
2457
do {
2458
if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2459
tryToParseChildBlock()) {
2460
continue;
2461
}
2462
if (Style.isJavaScript()) {
2463
if (FormatTok->is(Keywords.kw_function)) {
2464
tryToParseJSFunction();
2465
continue;
2466
}
2467
if (FormatTok->is(tok::l_brace)) {
2468
// Could be a method inside of a braced list `{a() { return 1; }}`.
2469
if (tryToParseBracedList())
2470
continue;
2471
parseChildBlock();
2472
}
2473
}
2474
if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2475
if (IsEnum) {
2476
FormatTok->setBlockKind(BK_Block);
2477
if (!Style.AllowShortEnumsOnASingleLine)
2478
addUnwrappedLine();
2479
}
2480
nextToken();
2481
return !HasError;
2482
}
2483
switch (FormatTok->Tok.getKind()) {
2484
case tok::l_square:
2485
if (Style.isCSharp())
2486
parseSquare();
2487
else
2488
tryToParseLambda();
2489
break;
2490
case tok::l_paren:
2491
parseParens();
2492
// JavaScript can just have free standing methods and getters/setters in
2493
// object literals. Detect them by a "{" following ")".
2494
if (Style.isJavaScript()) {
2495
if (FormatTok->is(tok::l_brace))
2496
parseChildBlock();
2497
break;
2498
}
2499
break;
2500
case tok::l_brace:
2501
// Assume there are no blocks inside a braced init list apart
2502
// from the ones we explicitly parse out (like lambdas).
2503
FormatTok->setBlockKind(BK_BracedInit);
2504
if (!IsAngleBracket) {
2505
auto *Prev = FormatTok->Previous;
2506
if (Prev && Prev->is(tok::greater))
2507
Prev->setFinalizedType(TT_TemplateCloser);
2508
}
2509
nextToken();
2510
parseBracedList();
2511
break;
2512
case tok::less:
2513
nextToken();
2514
if (IsAngleBracket)
2515
parseBracedList(/*IsAngleBracket=*/true);
2516
break;
2517
case tok::semi:
2518
// JavaScript (or more precisely TypeScript) can have semicolons in braced
2519
// lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2520
// used for error recovery if we have otherwise determined that this is
2521
// a braced list.
2522
if (Style.isJavaScript()) {
2523
nextToken();
2524
break;
2525
}
2526
HasError = true;
2527
if (!IsEnum)
2528
return false;
2529
nextToken();
2530
break;
2531
case tok::comma:
2532
nextToken();
2533
if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2534
addUnwrappedLine();
2535
break;
2536
default:
2537
nextToken();
2538
break;
2539
}
2540
} while (!eof());
2541
return false;
2542
}
2543
2544
/// \brief Parses a pair of parentheses (and everything between them).
2545
/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2546
/// double ampersands. This applies for all nested scopes as well.
2547
///
2548
/// Returns whether there is a `=` token between the parentheses.
2549
bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2550
assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2551
auto *LeftParen = FormatTok;
2552
bool SeenEqual = false;
2553
bool MightBeFoldExpr = false;
2554
const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2555
nextToken();
2556
do {
2557
switch (FormatTok->Tok.getKind()) {
2558
case tok::l_paren:
2559
if (parseParens(AmpAmpTokenType))
2560
SeenEqual = true;
2561
if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2562
parseChildBlock();
2563
break;
2564
case tok::r_paren: {
2565
auto *Prev = LeftParen->Previous;
2566
if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2567
Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2568
const auto *Next = Tokens->peekNextToken();
2569
const bool DoubleParens =
2570
Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2571
const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2572
const bool Blacklisted =
2573
PrevPrev &&
2574
(PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2575
(SeenEqual &&
2576
(PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2577
PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2578
const bool ReturnParens =
2579
Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2580
((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2581
(!NestedLambdas.empty() && !NestedLambdas.back())) &&
2582
Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2583
Next->is(tok::semi);
2584
if ((DoubleParens && !Blacklisted) || ReturnParens) {
2585
LeftParen->Optional = true;
2586
FormatTok->Optional = true;
2587
}
2588
}
2589
if (Prev) {
2590
if (Prev->is(TT_TypenameMacro)) {
2591
LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2592
FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2593
} else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2594
Prev->setFinalizedType(TT_TemplateCloser);
2595
}
2596
}
2597
nextToken();
2598
return SeenEqual;
2599
}
2600
case tok::r_brace:
2601
// A "}" inside parenthesis is an error if there wasn't a matching "{".
2602
return SeenEqual;
2603
case tok::l_square:
2604
tryToParseLambda();
2605
break;
2606
case tok::l_brace:
2607
if (!tryToParseBracedList())
2608
parseChildBlock();
2609
break;
2610
case tok::at:
2611
nextToken();
2612
if (FormatTok->is(tok::l_brace)) {
2613
nextToken();
2614
parseBracedList();
2615
}
2616
break;
2617
case tok::ellipsis:
2618
MightBeFoldExpr = true;
2619
nextToken();
2620
break;
2621
case tok::equal:
2622
SeenEqual = true;
2623
if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2624
tryToParseChildBlock();
2625
else
2626
nextToken();
2627
break;
2628
case tok::kw_class:
2629
if (Style.isJavaScript())
2630
parseRecord(/*ParseAsExpr=*/true);
2631
else
2632
nextToken();
2633
break;
2634
case tok::identifier:
2635
if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2636
tryToParseJSFunction();
2637
else
2638
nextToken();
2639
break;
2640
case tok::kw_switch:
2641
if (Style.Language == FormatStyle::LK_Java)
2642
parseSwitch(/*IsExpr=*/true);
2643
else
2644
nextToken();
2645
break;
2646
case tok::kw_requires: {
2647
auto RequiresToken = FormatTok;
2648
nextToken();
2649
parseRequiresExpression(RequiresToken);
2650
break;
2651
}
2652
case tok::ampamp:
2653
if (AmpAmpTokenType != TT_Unknown)
2654
FormatTok->setFinalizedType(AmpAmpTokenType);
2655
[[fallthrough]];
2656
default:
2657
nextToken();
2658
break;
2659
}
2660
} while (!eof());
2661
return SeenEqual;
2662
}
2663
2664
void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2665
if (!LambdaIntroducer) {
2666
assert(FormatTok->is(tok::l_square) && "'[' expected.");
2667
if (tryToParseLambda())
2668
return;
2669
}
2670
do {
2671
switch (FormatTok->Tok.getKind()) {
2672
case tok::l_paren:
2673
parseParens();
2674
break;
2675
case tok::r_square:
2676
nextToken();
2677
return;
2678
case tok::r_brace:
2679
// A "}" inside parenthesis is an error if there wasn't a matching "{".
2680
return;
2681
case tok::l_square:
2682
parseSquare();
2683
break;
2684
case tok::l_brace: {
2685
if (!tryToParseBracedList())
2686
parseChildBlock();
2687
break;
2688
}
2689
case tok::at:
2690
case tok::colon:
2691
nextToken();
2692
if (FormatTok->is(tok::l_brace)) {
2693
nextToken();
2694
parseBracedList();
2695
}
2696
break;
2697
default:
2698
nextToken();
2699
break;
2700
}
2701
} while (!eof());
2702
}
2703
2704
void UnwrappedLineParser::keepAncestorBraces() {
2705
if (!Style.RemoveBracesLLVM)
2706
return;
2707
2708
const int MaxNestingLevels = 2;
2709
const int Size = NestedTooDeep.size();
2710
if (Size >= MaxNestingLevels)
2711
NestedTooDeep[Size - MaxNestingLevels] = true;
2712
NestedTooDeep.push_back(false);
2713
}
2714
2715
static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2716
for (const auto &Token : llvm::reverse(Line.Tokens))
2717
if (Token.Tok->isNot(tok::comment))
2718
return Token.Tok;
2719
2720
return nullptr;
2721
}
2722
2723
void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2724
FormatToken *Tok = nullptr;
2725
2726
if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2727
PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2728
Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2729
? getLastNonComment(*Line)
2730
: Line->Tokens.back().Tok;
2731
assert(Tok);
2732
if (Tok->BraceCount < 0) {
2733
assert(Tok->BraceCount == -1);
2734
Tok = nullptr;
2735
} else {
2736
Tok->BraceCount = -1;
2737
}
2738
}
2739
2740
addUnwrappedLine();
2741
++Line->Level;
2742
++Line->UnbracedBodyLevel;
2743
parseStructuralElement();
2744
--Line->UnbracedBodyLevel;
2745
2746
if (Tok) {
2747
assert(!Line->InPPDirective);
2748
Tok = nullptr;
2749
for (const auto &L : llvm::reverse(*CurrentLines)) {
2750
if (!L.InPPDirective && getLastNonComment(L)) {
2751
Tok = L.Tokens.back().Tok;
2752
break;
2753
}
2754
}
2755
assert(Tok);
2756
++Tok->BraceCount;
2757
}
2758
2759
if (CheckEOF && eof())
2760
addUnwrappedLine();
2761
2762
--Line->Level;
2763
}
2764
2765
static void markOptionalBraces(FormatToken *LeftBrace) {
2766
if (!LeftBrace)
2767
return;
2768
2769
assert(LeftBrace->is(tok::l_brace));
2770
2771
FormatToken *RightBrace = LeftBrace->MatchingParen;
2772
if (!RightBrace) {
2773
assert(!LeftBrace->Optional);
2774
return;
2775
}
2776
2777
assert(RightBrace->is(tok::r_brace));
2778
assert(RightBrace->MatchingParen == LeftBrace);
2779
assert(LeftBrace->Optional == RightBrace->Optional);
2780
2781
LeftBrace->Optional = true;
2782
RightBrace->Optional = true;
2783
}
2784
2785
void UnwrappedLineParser::handleAttributes() {
2786
// Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2787
if (FormatTok->isAttribute())
2788
nextToken();
2789
else if (FormatTok->is(tok::l_square))
2790
handleCppAttributes();
2791
}
2792
2793
bool UnwrappedLineParser::handleCppAttributes() {
2794
// Handle [[likely]] / [[unlikely]] attributes.
2795
assert(FormatTok->is(tok::l_square));
2796
if (!tryToParseSimpleAttribute())
2797
return false;
2798
parseSquare();
2799
return true;
2800
}
2801
2802
/// Returns whether \c Tok begins a block.
2803
bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2804
// FIXME: rename the function or make
2805
// Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2806
return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2807
: Tok.is(tok::l_brace);
2808
}
2809
2810
FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2811
bool KeepBraces,
2812
bool IsVerilogAssert) {
2813
assert((FormatTok->is(tok::kw_if) ||
2814
(Style.isVerilog() &&
2815
FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2816
Keywords.kw_assume, Keywords.kw_cover))) &&
2817
"'if' expected");
2818
nextToken();
2819
2820
if (IsVerilogAssert) {
2821
// Handle `assert #0` and `assert final`.
2822
if (FormatTok->is(Keywords.kw_verilogHash)) {
2823
nextToken();
2824
if (FormatTok->is(tok::numeric_constant))
2825
nextToken();
2826
} else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2827
Keywords.kw_sequence)) {
2828
nextToken();
2829
}
2830
}
2831
2832
// TableGen's if statement has the form of `if <cond> then { ... }`.
2833
if (Style.isTableGen()) {
2834
while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2835
// Simply skip until then. This range only contains a value.
2836
nextToken();
2837
}
2838
}
2839
2840
// Handle `if !consteval`.
2841
if (FormatTok->is(tok::exclaim))
2842
nextToken();
2843
2844
bool KeepIfBraces = true;
2845
if (FormatTok->is(tok::kw_consteval)) {
2846
nextToken();
2847
} else {
2848
KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2849
if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2850
nextToken();
2851
if (FormatTok->is(tok::l_paren)) {
2852
FormatTok->setFinalizedType(TT_ConditionLParen);
2853
parseParens();
2854
}
2855
}
2856
handleAttributes();
2857
// The then action is optional in Verilog assert statements.
2858
if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2859
nextToken();
2860
addUnwrappedLine();
2861
return nullptr;
2862
}
2863
2864
bool NeedsUnwrappedLine = false;
2865
keepAncestorBraces();
2866
2867
FormatToken *IfLeftBrace = nullptr;
2868
IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2869
2870
if (isBlockBegin(*FormatTok)) {
2871
FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2872
IfLeftBrace = FormatTok;
2873
CompoundStatementIndenter Indenter(this, Style, Line->Level);
2874
parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2875
/*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2876
setPreviousRBraceType(TT_ControlStatementRBrace);
2877
if (Style.BraceWrapping.BeforeElse)
2878
addUnwrappedLine();
2879
else
2880
NeedsUnwrappedLine = true;
2881
} else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2882
addUnwrappedLine();
2883
} else {
2884
parseUnbracedBody();
2885
}
2886
2887
if (Style.RemoveBracesLLVM) {
2888
assert(!NestedTooDeep.empty());
2889
KeepIfBraces = KeepIfBraces ||
2890
(IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2891
NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2892
IfBlockKind == IfStmtKind::IfElseIf;
2893
}
2894
2895
bool KeepElseBraces = KeepIfBraces;
2896
FormatToken *ElseLeftBrace = nullptr;
2897
IfStmtKind Kind = IfStmtKind::IfOnly;
2898
2899
if (FormatTok->is(tok::kw_else)) {
2900
if (Style.RemoveBracesLLVM) {
2901
NestedTooDeep.back() = false;
2902
Kind = IfStmtKind::IfElse;
2903
}
2904
nextToken();
2905
handleAttributes();
2906
if (isBlockBegin(*FormatTok)) {
2907
const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2908
FormatTok->setFinalizedType(TT_ElseLBrace);
2909
ElseLeftBrace = FormatTok;
2910
CompoundStatementIndenter Indenter(this, Style, Line->Level);
2911
IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2912
FormatToken *IfLBrace =
2913
parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2914
/*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2915
setPreviousRBraceType(TT_ElseRBrace);
2916
if (FormatTok->is(tok::kw_else)) {
2917
KeepElseBraces = KeepElseBraces ||
2918
ElseBlockKind == IfStmtKind::IfOnly ||
2919
ElseBlockKind == IfStmtKind::IfElseIf;
2920
} else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2921
KeepElseBraces = true;
2922
assert(ElseLeftBrace->MatchingParen);
2923
markOptionalBraces(ElseLeftBrace);
2924
}
2925
addUnwrappedLine();
2926
} else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2927
const FormatToken *Previous = Tokens->getPreviousToken();
2928
assert(Previous);
2929
const bool IsPrecededByComment = Previous->is(tok::comment);
2930
if (IsPrecededByComment) {
2931
addUnwrappedLine();
2932
++Line->Level;
2933
}
2934
bool TooDeep = true;
2935
if (Style.RemoveBracesLLVM) {
2936
Kind = IfStmtKind::IfElseIf;
2937
TooDeep = NestedTooDeep.pop_back_val();
2938
}
2939
ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2940
if (Style.RemoveBracesLLVM)
2941
NestedTooDeep.push_back(TooDeep);
2942
if (IsPrecededByComment)
2943
--Line->Level;
2944
} else {
2945
parseUnbracedBody(/*CheckEOF=*/true);
2946
}
2947
} else {
2948
KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2949
if (NeedsUnwrappedLine)
2950
addUnwrappedLine();
2951
}
2952
2953
if (!Style.RemoveBracesLLVM)
2954
return nullptr;
2955
2956
assert(!NestedTooDeep.empty());
2957
KeepElseBraces = KeepElseBraces ||
2958
(ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2959
NestedTooDeep.back();
2960
2961
NestedTooDeep.pop_back();
2962
2963
if (!KeepIfBraces && !KeepElseBraces) {
2964
markOptionalBraces(IfLeftBrace);
2965
markOptionalBraces(ElseLeftBrace);
2966
} else if (IfLeftBrace) {
2967
FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2968
if (IfRightBrace) {
2969
assert(IfRightBrace->MatchingParen == IfLeftBrace);
2970
assert(!IfLeftBrace->Optional);
2971
assert(!IfRightBrace->Optional);
2972
IfLeftBrace->MatchingParen = nullptr;
2973
IfRightBrace->MatchingParen = nullptr;
2974
}
2975
}
2976
2977
if (IfKind)
2978
*IfKind = Kind;
2979
2980
return IfLeftBrace;
2981
}
2982
2983
void UnwrappedLineParser::parseTryCatch() {
2984
assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2985
nextToken();
2986
bool NeedsUnwrappedLine = false;
2987
bool HasCtorInitializer = false;
2988
if (FormatTok->is(tok::colon)) {
2989
auto *Colon = FormatTok;
2990
// We are in a function try block, what comes is an initializer list.
2991
nextToken();
2992
if (FormatTok->is(tok::identifier)) {
2993
HasCtorInitializer = true;
2994
Colon->setFinalizedType(TT_CtorInitializerColon);
2995
}
2996
2997
// In case identifiers were removed by clang-tidy, what might follow is
2998
// multiple commas in sequence - before the first identifier.
2999
while (FormatTok->is(tok::comma))
3000
nextToken();
3001
3002
while (FormatTok->is(tok::identifier)) {
3003
nextToken();
3004
if (FormatTok->is(tok::l_paren)) {
3005
parseParens();
3006
} else if (FormatTok->is(tok::l_brace)) {
3007
nextToken();
3008
parseBracedList();
3009
}
3010
3011
// In case identifiers were removed by clang-tidy, what might follow is
3012
// multiple commas in sequence - after the first identifier.
3013
while (FormatTok->is(tok::comma))
3014
nextToken();
3015
}
3016
}
3017
// Parse try with resource.
3018
if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3019
parseParens();
3020
3021
keepAncestorBraces();
3022
3023
if (FormatTok->is(tok::l_brace)) {
3024
if (HasCtorInitializer)
3025
FormatTok->setFinalizedType(TT_FunctionLBrace);
3026
CompoundStatementIndenter Indenter(this, Style, Line->Level);
3027
parseBlock();
3028
if (Style.BraceWrapping.BeforeCatch)
3029
addUnwrappedLine();
3030
else
3031
NeedsUnwrappedLine = true;
3032
} else if (FormatTok->isNot(tok::kw_catch)) {
3033
// The C++ standard requires a compound-statement after a try.
3034
// If there's none, we try to assume there's a structuralElement
3035
// and try to continue.
3036
addUnwrappedLine();
3037
++Line->Level;
3038
parseStructuralElement();
3039
--Line->Level;
3040
}
3041
while (true) {
3042
if (FormatTok->is(tok::at))
3043
nextToken();
3044
if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3045
tok::kw___finally) ||
3046
((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3047
FormatTok->is(Keywords.kw_finally)) ||
3048
(FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3049
FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3050
break;
3051
}
3052
nextToken();
3053
while (FormatTok->isNot(tok::l_brace)) {
3054
if (FormatTok->is(tok::l_paren)) {
3055
parseParens();
3056
continue;
3057
}
3058
if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3059
if (Style.RemoveBracesLLVM)
3060
NestedTooDeep.pop_back();
3061
return;
3062
}
3063
nextToken();
3064
}
3065
NeedsUnwrappedLine = false;
3066
Line->MustBeDeclaration = false;
3067
CompoundStatementIndenter Indenter(this, Style, Line->Level);
3068
parseBlock();
3069
if (Style.BraceWrapping.BeforeCatch)
3070
addUnwrappedLine();
3071
else
3072
NeedsUnwrappedLine = true;
3073
}
3074
3075
if (Style.RemoveBracesLLVM)
3076
NestedTooDeep.pop_back();
3077
3078
if (NeedsUnwrappedLine)
3079
addUnwrappedLine();
3080
}
3081
3082
void UnwrappedLineParser::parseNamespace() {
3083
assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3084
"'namespace' expected");
3085
3086
const FormatToken &InitialToken = *FormatTok;
3087
nextToken();
3088
if (InitialToken.is(TT_NamespaceMacro)) {
3089
parseParens();
3090
} else {
3091
while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3092
tok::l_square, tok::period, tok::l_paren) ||
3093
(Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3094
if (FormatTok->is(tok::l_square))
3095
parseSquare();
3096
else if (FormatTok->is(tok::l_paren))
3097
parseParens();
3098
else
3099
nextToken();
3100
}
3101
}
3102
if (FormatTok->is(tok::l_brace)) {
3103
FormatTok->setFinalizedType(TT_NamespaceLBrace);
3104
3105
if (ShouldBreakBeforeBrace(Style, InitialToken))
3106
addUnwrappedLine();
3107
3108
unsigned AddLevels =
3109
Style.NamespaceIndentation == FormatStyle::NI_All ||
3110
(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3111
DeclarationScopeStack.size() > 1)
3112
? 1u
3113
: 0u;
3114
bool ManageWhitesmithsBraces =
3115
AddLevels == 0u &&
3116
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3117
3118
// If we're in Whitesmiths mode, indent the brace if we're not indenting
3119
// the whole block.
3120
if (ManageWhitesmithsBraces)
3121
++Line->Level;
3122
3123
// Munch the semicolon after a namespace. This is more common than one would
3124
// think. Putting the semicolon into its own line is very ugly.
3125
parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3126
/*KeepBraces=*/true, /*IfKind=*/nullptr,
3127
ManageWhitesmithsBraces);
3128
3129
addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3130
3131
if (ManageWhitesmithsBraces)
3132
--Line->Level;
3133
}
3134
// FIXME: Add error handling.
3135
}
3136
3137
void UnwrappedLineParser::parseNew() {
3138
assert(FormatTok->is(tok::kw_new) && "'new' expected");
3139
nextToken();
3140
3141
if (Style.isCSharp()) {
3142
do {
3143
// Handle constructor invocation, e.g. `new(field: value)`.
3144
if (FormatTok->is(tok::l_paren))
3145
parseParens();
3146
3147
// Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3148
if (FormatTok->is(tok::l_brace))
3149
parseBracedList();
3150
3151
if (FormatTok->isOneOf(tok::semi, tok::comma))
3152
return;
3153
3154
nextToken();
3155
} while (!eof());
3156
}
3157
3158
if (Style.Language != FormatStyle::LK_Java)
3159
return;
3160
3161
// In Java, we can parse everything up to the parens, which aren't optional.
3162
do {
3163
// There should not be a ;, { or } before the new's open paren.
3164
if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3165
return;
3166
3167
// Consume the parens.
3168
if (FormatTok->is(tok::l_paren)) {
3169
parseParens();
3170
3171
// If there is a class body of an anonymous class, consume that as child.
3172
if (FormatTok->is(tok::l_brace))
3173
parseChildBlock();
3174
return;
3175
}
3176
nextToken();
3177
} while (!eof());
3178
}
3179
3180
void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3181
keepAncestorBraces();
3182
3183
if (isBlockBegin(*FormatTok)) {
3184
FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3185
FormatToken *LeftBrace = FormatTok;
3186
CompoundStatementIndenter Indenter(this, Style, Line->Level);
3187
parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3188
/*MunchSemi=*/true, KeepBraces);
3189
setPreviousRBraceType(TT_ControlStatementRBrace);
3190
if (!KeepBraces) {
3191
assert(!NestedTooDeep.empty());
3192
if (!NestedTooDeep.back())
3193
markOptionalBraces(LeftBrace);
3194
}
3195
if (WrapRightBrace)
3196
addUnwrappedLine();
3197
} else {
3198
parseUnbracedBody();
3199
}
3200
3201
if (!KeepBraces)
3202
NestedTooDeep.pop_back();
3203
}
3204
3205
void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3206
assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3207
(Style.isVerilog() &&
3208
FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3209
Keywords.kw_always_ff, Keywords.kw_always_latch,
3210
Keywords.kw_final, Keywords.kw_initial,
3211
Keywords.kw_foreach, Keywords.kw_forever,
3212
Keywords.kw_repeat))) &&
3213
"'for', 'while' or foreach macro expected");
3214
const bool KeepBraces = !Style.RemoveBracesLLVM ||
3215
!FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3216
3217
nextToken();
3218
// JS' for await ( ...
3219
if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3220
nextToken();
3221
if (IsCpp && FormatTok->is(tok::kw_co_await))
3222
nextToken();
3223
if (HasParens && FormatTok->is(tok::l_paren)) {
3224
// The type is only set for Verilog basically because we were afraid to
3225
// change the existing behavior for loops. See the discussion on D121756 for
3226
// details.
3227
if (Style.isVerilog())
3228
FormatTok->setFinalizedType(TT_ConditionLParen);
3229
parseParens();
3230
}
3231
3232
if (Style.isVerilog()) {
3233
// Event control.
3234
parseVerilogSensitivityList();
3235
} else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3236
Tokens->getPreviousToken()->is(tok::r_paren)) {
3237
nextToken();
3238
addUnwrappedLine();
3239
return;
3240
}
3241
3242
handleAttributes();
3243
parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3244
}
3245
3246
void UnwrappedLineParser::parseDoWhile() {
3247
assert(FormatTok->is(tok::kw_do) && "'do' expected");
3248
nextToken();
3249
3250
parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3251
3252
// FIXME: Add error handling.
3253
if (FormatTok->isNot(tok::kw_while)) {
3254
addUnwrappedLine();
3255
return;
3256
}
3257
3258
FormatTok->setFinalizedType(TT_DoWhile);
3259
3260
// If in Whitesmiths mode, the line with the while() needs to be indented
3261
// to the same level as the block.
3262
if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3263
++Line->Level;
3264
3265
nextToken();
3266
parseStructuralElement();
3267
}
3268
3269
void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3270
nextToken();
3271
unsigned OldLineLevel = Line->Level;
3272
3273
if (LeftAlignLabel)
3274
Line->Level = 0;
3275
else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3276
--Line->Level;
3277
3278
if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3279
FormatTok->is(tok::l_brace)) {
3280
3281
CompoundStatementIndenter Indenter(this, Line->Level,
3282
Style.BraceWrapping.AfterCaseLabel,
3283
Style.BraceWrapping.IndentBraces);
3284
parseBlock();
3285
if (FormatTok->is(tok::kw_break)) {
3286
if (Style.BraceWrapping.AfterControlStatement ==
3287
FormatStyle::BWACS_Always) {
3288
addUnwrappedLine();
3289
if (!Style.IndentCaseBlocks &&
3290
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3291
++Line->Level;
3292
}
3293
}
3294
parseStructuralElement();
3295
}
3296
addUnwrappedLine();
3297
} else {
3298
if (FormatTok->is(tok::semi))
3299
nextToken();
3300
addUnwrappedLine();
3301
}
3302
Line->Level = OldLineLevel;
3303
if (FormatTok->isNot(tok::l_brace)) {
3304
parseStructuralElement();
3305
addUnwrappedLine();
3306
}
3307
}
3308
3309
void UnwrappedLineParser::parseCaseLabel() {
3310
assert(FormatTok->is(tok::kw_case) && "'case' expected");
3311
auto *Case = FormatTok;
3312
3313
// FIXME: fix handling of complex expressions here.
3314
do {
3315
nextToken();
3316
if (FormatTok->is(tok::colon)) {
3317
FormatTok->setFinalizedType(TT_CaseLabelColon);
3318
break;
3319
}
3320
if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3321
FormatTok->setFinalizedType(TT_CaseLabelArrow);
3322
Case->setFinalizedType(TT_SwitchExpressionLabel);
3323
break;
3324
}
3325
} while (!eof());
3326
parseLabel();
3327
}
3328
3329
void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3330
assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3331
nextToken();
3332
if (FormatTok->is(tok::l_paren))
3333
parseParens();
3334
3335
keepAncestorBraces();
3336
3337
if (FormatTok->is(tok::l_brace)) {
3338
CompoundStatementIndenter Indenter(this, Style, Line->Level);
3339
FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3340
: TT_ControlStatementLBrace);
3341
if (IsExpr)
3342
parseChildBlock();
3343
else
3344
parseBlock();
3345
setPreviousRBraceType(TT_ControlStatementRBrace);
3346
if (!IsExpr)
3347
addUnwrappedLine();
3348
} else {
3349
addUnwrappedLine();
3350
++Line->Level;
3351
parseStructuralElement();
3352
--Line->Level;
3353
}
3354
3355
if (Style.RemoveBracesLLVM)
3356
NestedTooDeep.pop_back();
3357
}
3358
3359
// Operators that can follow a C variable.
3360
static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
3361
switch (Kind) {
3362
case tok::ampamp:
3363
case tok::ampequal:
3364
case tok::arrow:
3365
case tok::caret:
3366
case tok::caretequal:
3367
case tok::comma:
3368
case tok::ellipsis:
3369
case tok::equal:
3370
case tok::equalequal:
3371
case tok::exclaim:
3372
case tok::exclaimequal:
3373
case tok::greater:
3374
case tok::greaterequal:
3375
case tok::greatergreater:
3376
case tok::greatergreaterequal:
3377
case tok::l_paren:
3378
case tok::l_square:
3379
case tok::less:
3380
case tok::lessequal:
3381
case tok::lessless:
3382
case tok::lesslessequal:
3383
case tok::minus:
3384
case tok::minusequal:
3385
case tok::minusminus:
3386
case tok::percent:
3387
case tok::percentequal:
3388
case tok::period:
3389
case tok::pipe:
3390
case tok::pipeequal:
3391
case tok::pipepipe:
3392
case tok::plus:
3393
case tok::plusequal:
3394
case tok::plusplus:
3395
case tok::question:
3396
case tok::r_brace:
3397
case tok::r_paren:
3398
case tok::r_square:
3399
case tok::semi:
3400
case tok::slash:
3401
case tok::slashequal:
3402
case tok::star:
3403
case tok::starequal:
3404
return true;
3405
default:
3406
return false;
3407
}
3408
}
3409
3410
void UnwrappedLineParser::parseAccessSpecifier() {
3411
FormatToken *AccessSpecifierCandidate = FormatTok;
3412
nextToken();
3413
// Understand Qt's slots.
3414
if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3415
nextToken();
3416
// Otherwise, we don't know what it is, and we'd better keep the next token.
3417
if (FormatTok->is(tok::colon)) {
3418
nextToken();
3419
addUnwrappedLine();
3420
} else if (FormatTok->isNot(tok::coloncolon) &&
3421
!isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3422
// Not a variable name nor namespace name.
3423
addUnwrappedLine();
3424
} else if (AccessSpecifierCandidate) {
3425
// Consider the access specifier to be a C identifier.
3426
AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3427
}
3428
}
3429
3430
/// \brief Parses a requires, decides if it is a clause or an expression.
3431
/// \pre The current token has to be the requires keyword.
3432
/// \returns true if it parsed a clause.
3433
bool UnwrappedLineParser::parseRequires() {
3434
assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3435
auto RequiresToken = FormatTok;
3436
3437
// We try to guess if it is a requires clause, or a requires expression. For
3438
// that we first consume the keyword and check the next token.
3439
nextToken();
3440
3441
switch (FormatTok->Tok.getKind()) {
3442
case tok::l_brace:
3443
// This can only be an expression, never a clause.
3444
parseRequiresExpression(RequiresToken);
3445
return false;
3446
case tok::l_paren:
3447
// Clauses and expression can start with a paren, it's unclear what we have.
3448
break;
3449
default:
3450
// All other tokens can only be a clause.
3451
parseRequiresClause(RequiresToken);
3452
return true;
3453
}
3454
3455
// Looking forward we would have to decide if there are function declaration
3456
// like arguments to the requires expression:
3457
// requires (T t) {
3458
// Or there is a constraint expression for the requires clause:
3459
// requires (C<T> && ...
3460
3461
// But first let's look behind.
3462
auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3463
3464
if (!PreviousNonComment ||
3465
PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3466
// If there is no token, or an expression left brace, we are a requires
3467
// clause within a requires expression.
3468
parseRequiresClause(RequiresToken);
3469
return true;
3470
}
3471
3472
switch (PreviousNonComment->Tok.getKind()) {
3473
case tok::greater:
3474
case tok::r_paren:
3475
case tok::kw_noexcept:
3476
case tok::kw_const:
3477
// This is a requires clause.
3478
parseRequiresClause(RequiresToken);
3479
return true;
3480
case tok::amp:
3481
case tok::ampamp: {
3482
// This can be either:
3483
// if (... && requires (T t) ...)
3484
// Or
3485
// void member(...) && requires (C<T> ...
3486
// We check the one token before that for a const:
3487
// void member(...) const && requires (C<T> ...
3488
auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3489
if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3490
parseRequiresClause(RequiresToken);
3491
return true;
3492
}
3493
break;
3494
}
3495
default:
3496
if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3497
// This is a requires clause.
3498
parseRequiresClause(RequiresToken);
3499
return true;
3500
}
3501
// It's an expression.
3502
parseRequiresExpression(RequiresToken);
3503
return false;
3504
}
3505
3506
// Now we look forward and try to check if the paren content is a parameter
3507
// list. The parameters can be cv-qualified and contain references or
3508
// pointers.
3509
// So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3510
// of stuff: typename, const, *, &, &&, ::, identifiers.
3511
3512
unsigned StoredPosition = Tokens->getPosition();
3513
FormatToken *NextToken = Tokens->getNextToken();
3514
int Lookahead = 0;
3515
auto PeekNext = [&Lookahead, &NextToken, this] {
3516
++Lookahead;
3517
NextToken = Tokens->getNextToken();
3518
};
3519
3520
bool FoundType = false;
3521
bool LastWasColonColon = false;
3522
int OpenAngles = 0;
3523
3524
for (; Lookahead < 50; PeekNext()) {
3525
switch (NextToken->Tok.getKind()) {
3526
case tok::kw_volatile:
3527
case tok::kw_const:
3528
case tok::comma:
3529
if (OpenAngles == 0) {
3530
FormatTok = Tokens->setPosition(StoredPosition);
3531
parseRequiresExpression(RequiresToken);
3532
return false;
3533
}
3534
break;
3535
case tok::eof:
3536
// Break out of the loop.
3537
Lookahead = 50;
3538
break;
3539
case tok::coloncolon:
3540
LastWasColonColon = true;
3541
break;
3542
case tok::kw_decltype:
3543
case tok::identifier:
3544
if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3545
FormatTok = Tokens->setPosition(StoredPosition);
3546
parseRequiresExpression(RequiresToken);
3547
return false;
3548
}
3549
FoundType = true;
3550
LastWasColonColon = false;
3551
break;
3552
case tok::less:
3553
++OpenAngles;
3554
break;
3555
case tok::greater:
3556
--OpenAngles;
3557
break;
3558
default:
3559
if (NextToken->isTypeName(LangOpts)) {
3560
FormatTok = Tokens->setPosition(StoredPosition);
3561
parseRequiresExpression(RequiresToken);
3562
return false;
3563
}
3564
break;
3565
}
3566
}
3567
// This seems to be a complicated expression, just assume it's a clause.
3568
FormatTok = Tokens->setPosition(StoredPosition);
3569
parseRequiresClause(RequiresToken);
3570
return true;
3571
}
3572
3573
/// \brief Parses a requires clause.
3574
/// \param RequiresToken The requires keyword token, which starts this clause.
3575
/// \pre We need to be on the next token after the requires keyword.
3576
/// \sa parseRequiresExpression
3577
///
3578
/// Returns if it either has finished parsing the clause, or it detects, that
3579
/// the clause is incorrect.
3580
void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3581
assert(FormatTok->getPreviousNonComment() == RequiresToken);
3582
assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3583
3584
// If there is no previous token, we are within a requires expression,
3585
// otherwise we will always have the template or function declaration in front
3586
// of it.
3587
bool InRequiresExpression =
3588
!RequiresToken->Previous ||
3589
RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3590
3591
RequiresToken->setFinalizedType(InRequiresExpression
3592
? TT_RequiresClauseInARequiresExpression
3593
: TT_RequiresClause);
3594
3595
// NOTE: parseConstraintExpression is only ever called from this function.
3596
// It could be inlined into here.
3597
parseConstraintExpression();
3598
3599
if (!InRequiresExpression)
3600
FormatTok->Previous->ClosesRequiresClause = true;
3601
}
3602
3603
/// \brief Parses a requires expression.
3604
/// \param RequiresToken The requires keyword token, which starts this clause.
3605
/// \pre We need to be on the next token after the requires keyword.
3606
/// \sa parseRequiresClause
3607
///
3608
/// Returns if it either has finished parsing the expression, or it detects,
3609
/// that the expression is incorrect.
3610
void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3611
assert(FormatTok->getPreviousNonComment() == RequiresToken);
3612
assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3613
3614
RequiresToken->setFinalizedType(TT_RequiresExpression);
3615
3616
if (FormatTok->is(tok::l_paren)) {
3617
FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3618
parseParens();
3619
}
3620
3621
if (FormatTok->is(tok::l_brace)) {
3622
FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3623
parseChildBlock();
3624
}
3625
}
3626
3627
/// \brief Parses a constraint expression.
3628
///
3629
/// This is the body of a requires clause. It returns, when the parsing is
3630
/// complete, or the expression is incorrect.
3631
void UnwrappedLineParser::parseConstraintExpression() {
3632
// The special handling for lambdas is needed since tryToParseLambda() eats a
3633
// token and if a requires expression is the last part of a requires clause
3634
// and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3635
// not set on the correct token. Thus we need to be aware if we even expect a
3636
// lambda to be possible.
3637
// template <typename T> requires requires { ... } [[nodiscard]] ...;
3638
bool LambdaNextTimeAllowed = true;
3639
3640
// Within lambda declarations, it is permitted to put a requires clause after
3641
// its template parameter list, which would place the requires clause right
3642
// before the parentheses of the parameters of the lambda declaration. Thus,
3643
// we track if we expect to see grouping parentheses at all.
3644
// Without this check, `requires foo<T> (T t)` in the below example would be
3645
// seen as the whole requires clause, accidentally eating the parameters of
3646
// the lambda.
3647
// [&]<typename T> requires foo<T> (T t) { ... };
3648
bool TopLevelParensAllowed = true;
3649
3650
do {
3651
bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3652
3653
switch (FormatTok->Tok.getKind()) {
3654
case tok::kw_requires: {
3655
auto RequiresToken = FormatTok;
3656
nextToken();
3657
parseRequiresExpression(RequiresToken);
3658
break;
3659
}
3660
3661
case tok::l_paren:
3662
if (!TopLevelParensAllowed)
3663
return;
3664
parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3665
TopLevelParensAllowed = false;
3666
break;
3667
3668
case tok::l_square:
3669
if (!LambdaThisTimeAllowed || !tryToParseLambda())
3670
return;
3671
break;
3672
3673
case tok::kw_const:
3674
case tok::semi:
3675
case tok::kw_class:
3676
case tok::kw_struct:
3677
case tok::kw_union:
3678
return;
3679
3680
case tok::l_brace:
3681
// Potential function body.
3682
return;
3683
3684
case tok::ampamp:
3685
case tok::pipepipe:
3686
FormatTok->setFinalizedType(TT_BinaryOperator);
3687
nextToken();
3688
LambdaNextTimeAllowed = true;
3689
TopLevelParensAllowed = true;
3690
break;
3691
3692
case tok::comma:
3693
case tok::comment:
3694
LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3695
nextToken();
3696
break;
3697
3698
case tok::kw_sizeof:
3699
case tok::greater:
3700
case tok::greaterequal:
3701
case tok::greatergreater:
3702
case tok::less:
3703
case tok::lessequal:
3704
case tok::lessless:
3705
case tok::equalequal:
3706
case tok::exclaim:
3707
case tok::exclaimequal:
3708
case tok::plus:
3709
case tok::minus:
3710
case tok::star:
3711
case tok::slash:
3712
LambdaNextTimeAllowed = true;
3713
TopLevelParensAllowed = true;
3714
// Just eat them.
3715
nextToken();
3716
break;
3717
3718
case tok::numeric_constant:
3719
case tok::coloncolon:
3720
case tok::kw_true:
3721
case tok::kw_false:
3722
TopLevelParensAllowed = false;
3723
// Just eat them.
3724
nextToken();
3725
break;
3726
3727
case tok::kw_static_cast:
3728
case tok::kw_const_cast:
3729
case tok::kw_reinterpret_cast:
3730
case tok::kw_dynamic_cast:
3731
nextToken();
3732
if (FormatTok->isNot(tok::less))
3733
return;
3734
3735
nextToken();
3736
parseBracedList(/*IsAngleBracket=*/true);
3737
break;
3738
3739
default:
3740
if (!FormatTok->Tok.getIdentifierInfo()) {
3741
// Identifiers are part of the default case, we check for more then
3742
// tok::identifier to handle builtin type traits.
3743
return;
3744
}
3745
3746
// We need to differentiate identifiers for a template deduction guide,
3747
// variables, or function return types (the constraint expression has
3748
// ended before that), and basically all other cases. But it's easier to
3749
// check the other way around.
3750
assert(FormatTok->Previous);
3751
switch (FormatTok->Previous->Tok.getKind()) {
3752
case tok::coloncolon: // Nested identifier.
3753
case tok::ampamp: // Start of a function or variable for the
3754
case tok::pipepipe: // constraint expression. (binary)
3755
case tok::exclaim: // The same as above, but unary.
3756
case tok::kw_requires: // Initial identifier of a requires clause.
3757
case tok::equal: // Initial identifier of a concept declaration.
3758
break;
3759
default:
3760
return;
3761
}
3762
3763
// Read identifier with optional template declaration.
3764
nextToken();
3765
if (FormatTok->is(tok::less)) {
3766
nextToken();
3767
parseBracedList(/*IsAngleBracket=*/true);
3768
}
3769
TopLevelParensAllowed = false;
3770
break;
3771
}
3772
} while (!eof());
3773
}
3774
3775
bool UnwrappedLineParser::parseEnum() {
3776
const FormatToken &InitialToken = *FormatTok;
3777
3778
// Won't be 'enum' for NS_ENUMs.
3779
if (FormatTok->is(tok::kw_enum))
3780
nextToken();
3781
3782
// In TypeScript, "enum" can also be used as property name, e.g. in interface
3783
// declarations. An "enum" keyword followed by a colon would be a syntax
3784
// error and thus assume it is just an identifier.
3785
if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3786
return false;
3787
3788
// In protobuf, "enum" can be used as a field name.
3789
if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3790
return false;
3791
3792
if (IsCpp) {
3793
// Eat up enum class ...
3794
if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3795
nextToken();
3796
while (FormatTok->is(tok::l_square))
3797
if (!handleCppAttributes())
3798
return false;
3799
}
3800
3801
while (FormatTok->Tok.getIdentifierInfo() ||
3802
FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3803
tok::greater, tok::comma, tok::question,
3804
tok::l_square)) {
3805
if (Style.isVerilog()) {
3806
FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3807
nextToken();
3808
// In Verilog the base type can have dimensions.
3809
while (FormatTok->is(tok::l_square))
3810
parseSquare();
3811
} else {
3812
nextToken();
3813
}
3814
// We can have macros or attributes in between 'enum' and the enum name.
3815
if (FormatTok->is(tok::l_paren))
3816
parseParens();
3817
if (FormatTok->is(tok::identifier)) {
3818
nextToken();
3819
// If there are two identifiers in a row, this is likely an elaborate
3820
// return type. In Java, this can be "implements", etc.
3821
if (IsCpp && FormatTok->is(tok::identifier))
3822
return false;
3823
}
3824
}
3825
3826
// Just a declaration or something is wrong.
3827
if (FormatTok->isNot(tok::l_brace))
3828
return true;
3829
FormatTok->setFinalizedType(TT_EnumLBrace);
3830
FormatTok->setBlockKind(BK_Block);
3831
3832
if (Style.Language == FormatStyle::LK_Java) {
3833
// Java enums are different.
3834
parseJavaEnumBody();
3835
return true;
3836
}
3837
if (Style.Language == FormatStyle::LK_Proto) {
3838
parseBlock(/*MustBeDeclaration=*/true);
3839
return true;
3840
}
3841
3842
if (!Style.AllowShortEnumsOnASingleLine &&
3843
ShouldBreakBeforeBrace(Style, InitialToken)) {
3844
addUnwrappedLine();
3845
}
3846
// Parse enum body.
3847
nextToken();
3848
if (!Style.AllowShortEnumsOnASingleLine) {
3849
addUnwrappedLine();
3850
Line->Level += 1;
3851
}
3852
bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3853
if (!Style.AllowShortEnumsOnASingleLine)
3854
Line->Level -= 1;
3855
if (HasError) {
3856
if (FormatTok->is(tok::semi))
3857
nextToken();
3858
addUnwrappedLine();
3859
}
3860
setPreviousRBraceType(TT_EnumRBrace);
3861
return true;
3862
3863
// There is no addUnwrappedLine() here so that we fall through to parsing a
3864
// structural element afterwards. Thus, in "enum A {} n, m;",
3865
// "} n, m;" will end up in one unwrapped line.
3866
}
3867
3868
bool UnwrappedLineParser::parseStructLike() {
3869
// parseRecord falls through and does not yet add an unwrapped line as a
3870
// record declaration or definition can start a structural element.
3871
parseRecord();
3872
// This does not apply to Java, JavaScript and C#.
3873
if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3874
Style.isCSharp()) {
3875
if (FormatTok->is(tok::semi))
3876
nextToken();
3877
addUnwrappedLine();
3878
return true;
3879
}
3880
return false;
3881
}
3882
3883
namespace {
3884
// A class used to set and restore the Token position when peeking
3885
// ahead in the token source.
3886
class ScopedTokenPosition {
3887
unsigned StoredPosition;
3888
FormatTokenSource *Tokens;
3889
3890
public:
3891
ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3892
assert(Tokens && "Tokens expected to not be null");
3893
StoredPosition = Tokens->getPosition();
3894
}
3895
3896
~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3897
};
3898
} // namespace
3899
3900
// Look to see if we have [[ by looking ahead, if
3901
// its not then rewind to the original position.
3902
bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3903
ScopedTokenPosition AutoPosition(Tokens);
3904
FormatToken *Tok = Tokens->getNextToken();
3905
// We already read the first [ check for the second.
3906
if (Tok->isNot(tok::l_square))
3907
return false;
3908
// Double check that the attribute is just something
3909
// fairly simple.
3910
while (Tok->isNot(tok::eof)) {
3911
if (Tok->is(tok::r_square))
3912
break;
3913
Tok = Tokens->getNextToken();
3914
}
3915
if (Tok->is(tok::eof))
3916
return false;
3917
Tok = Tokens->getNextToken();
3918
if (Tok->isNot(tok::r_square))
3919
return false;
3920
Tok = Tokens->getNextToken();
3921
if (Tok->is(tok::semi))
3922
return false;
3923
return true;
3924
}
3925
3926
void UnwrappedLineParser::parseJavaEnumBody() {
3927
assert(FormatTok->is(tok::l_brace));
3928
const FormatToken *OpeningBrace = FormatTok;
3929
3930
// Determine whether the enum is simple, i.e. does not have a semicolon or
3931
// constants with class bodies. Simple enums can be formatted like braced
3932
// lists, contracted to a single line, etc.
3933
unsigned StoredPosition = Tokens->getPosition();
3934
bool IsSimple = true;
3935
FormatToken *Tok = Tokens->getNextToken();
3936
while (Tok->isNot(tok::eof)) {
3937
if (Tok->is(tok::r_brace))
3938
break;
3939
if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3940
IsSimple = false;
3941
break;
3942
}
3943
// FIXME: This will also mark enums with braces in the arguments to enum
3944
// constants as "not simple". This is probably fine in practice, though.
3945
Tok = Tokens->getNextToken();
3946
}
3947
FormatTok = Tokens->setPosition(StoredPosition);
3948
3949
if (IsSimple) {
3950
nextToken();
3951
parseBracedList();
3952
addUnwrappedLine();
3953
return;
3954
}
3955
3956
// Parse the body of a more complex enum.
3957
// First add a line for everything up to the "{".
3958
nextToken();
3959
addUnwrappedLine();
3960
++Line->Level;
3961
3962
// Parse the enum constants.
3963
while (!eof()) {
3964
if (FormatTok->is(tok::l_brace)) {
3965
// Parse the constant's class body.
3966
parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3967
/*MunchSemi=*/false);
3968
} else if (FormatTok->is(tok::l_paren)) {
3969
parseParens();
3970
} else if (FormatTok->is(tok::comma)) {
3971
nextToken();
3972
addUnwrappedLine();
3973
} else if (FormatTok->is(tok::semi)) {
3974
nextToken();
3975
addUnwrappedLine();
3976
break;
3977
} else if (FormatTok->is(tok::r_brace)) {
3978
addUnwrappedLine();
3979
break;
3980
} else {
3981
nextToken();
3982
}
3983
}
3984
3985
// Parse the class body after the enum's ";" if any.
3986
parseLevel(OpeningBrace);
3987
nextToken();
3988
--Line->Level;
3989
addUnwrappedLine();
3990
}
3991
3992
void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3993
const FormatToken &InitialToken = *FormatTok;
3994
nextToken();
3995
3996
const FormatToken *ClassName = nullptr;
3997
bool IsDerived = false;
3998
auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3999
return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4000
};
4001
// JavaScript/TypeScript supports anonymous classes like:
4002
// a = class extends foo { }
4003
bool JSPastExtendsOrImplements = false;
4004
// The actual identifier can be a nested name specifier, and in macros
4005
// it is often token-pasted.
4006
// An [[attribute]] can be before the identifier.
4007
while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4008
tok::kw_alignas, tok::l_square) ||
4009
FormatTok->isAttribute() ||
4010
((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4011
FormatTok->isOneOf(tok::period, tok::comma))) {
4012
if (Style.isJavaScript() &&
4013
FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4014
JSPastExtendsOrImplements = true;
4015
// JavaScript/TypeScript supports inline object types in
4016
// extends/implements positions:
4017
// class Foo implements {bar: number} { }
4018
nextToken();
4019
if (FormatTok->is(tok::l_brace)) {
4020
tryToParseBracedList();
4021
continue;
4022
}
4023
}
4024
if (FormatTok->is(tok::l_square) && handleCppAttributes())
4025
continue;
4026
const auto *Previous = FormatTok;
4027
nextToken();
4028
switch (FormatTok->Tok.getKind()) {
4029
case tok::l_paren:
4030
// We can have macros in between 'class' and the class name.
4031
if (!IsNonMacroIdentifier(Previous) ||
4032
// e.g. `struct macro(a) S { int i; };`
4033
Previous->Previous == &InitialToken) {
4034
parseParens();
4035
}
4036
break;
4037
case tok::coloncolon:
4038
case tok::hashhash:
4039
break;
4040
default:
4041
if (!JSPastExtendsOrImplements && !ClassName &&
4042
Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4043
ClassName = Previous;
4044
}
4045
}
4046
}
4047
4048
auto IsListInitialization = [&] {
4049
if (!ClassName || IsDerived)
4050
return false;
4051
assert(FormatTok->is(tok::l_brace));
4052
const auto *Prev = FormatTok->getPreviousNonComment();
4053
assert(Prev);
4054
return Prev != ClassName && Prev->is(tok::identifier) &&
4055
Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4056
};
4057
4058
if (FormatTok->isOneOf(tok::colon, tok::less)) {
4059
int AngleNestingLevel = 0;
4060
do {
4061
if (FormatTok->is(tok::less))
4062
++AngleNestingLevel;
4063
else if (FormatTok->is(tok::greater))
4064
--AngleNestingLevel;
4065
4066
if (AngleNestingLevel == 0) {
4067
if (FormatTok->is(tok::colon)) {
4068
IsDerived = true;
4069
} else if (FormatTok->is(tok::identifier) &&
4070
FormatTok->Previous->is(tok::coloncolon)) {
4071
ClassName = FormatTok;
4072
} else if (FormatTok->is(tok::l_paren) &&
4073
IsNonMacroIdentifier(FormatTok->Previous)) {
4074
break;
4075
}
4076
}
4077
if (FormatTok->is(tok::l_brace)) {
4078
if (AngleNestingLevel == 0 && IsListInitialization())
4079
return;
4080
calculateBraceTypes(/*ExpectClassBody=*/true);
4081
if (!tryToParseBracedList())
4082
break;
4083
}
4084
if (FormatTok->is(tok::l_square)) {
4085
FormatToken *Previous = FormatTok->Previous;
4086
if (!Previous || (Previous->isNot(tok::r_paren) &&
4087
!Previous->isTypeOrIdentifier(LangOpts))) {
4088
// Don't try parsing a lambda if we had a closing parenthesis before,
4089
// it was probably a pointer to an array: int (*)[].
4090
if (!tryToParseLambda())
4091
continue;
4092
} else {
4093
parseSquare();
4094
continue;
4095
}
4096
}
4097
if (FormatTok->is(tok::semi))
4098
return;
4099
if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4100
addUnwrappedLine();
4101
nextToken();
4102
parseCSharpGenericTypeConstraint();
4103
break;
4104
}
4105
nextToken();
4106
} while (!eof());
4107
}
4108
4109
auto GetBraceTypes =
4110
[](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4111
switch (RecordTok.Tok.getKind()) {
4112
case tok::kw_class:
4113
return {TT_ClassLBrace, TT_ClassRBrace};
4114
case tok::kw_struct:
4115
return {TT_StructLBrace, TT_StructRBrace};
4116
case tok::kw_union:
4117
return {TT_UnionLBrace, TT_UnionRBrace};
4118
default:
4119
// Useful for e.g. interface.
4120
return {TT_RecordLBrace, TT_RecordRBrace};
4121
}
4122
};
4123
if (FormatTok->is(tok::l_brace)) {
4124
if (IsListInitialization())
4125
return;
4126
auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4127
FormatTok->setFinalizedType(OpenBraceType);
4128
if (ParseAsExpr) {
4129
parseChildBlock();
4130
} else {
4131
if (ShouldBreakBeforeBrace(Style, InitialToken))
4132
addUnwrappedLine();
4133
4134
unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4135
parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4136
}
4137
setPreviousRBraceType(ClosingBraceType);
4138
}
4139
// There is no addUnwrappedLine() here so that we fall through to parsing a
4140
// structural element afterwards. Thus, in "class A {} n, m;",
4141
// "} n, m;" will end up in one unwrapped line.
4142
}
4143
4144
void UnwrappedLineParser::parseObjCMethod() {
4145
assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4146
"'(' or identifier expected.");
4147
do {
4148
if (FormatTok->is(tok::semi)) {
4149
nextToken();
4150
addUnwrappedLine();
4151
return;
4152
} else if (FormatTok->is(tok::l_brace)) {
4153
if (Style.BraceWrapping.AfterFunction)
4154
addUnwrappedLine();
4155
parseBlock();
4156
addUnwrappedLine();
4157
return;
4158
} else {
4159
nextToken();
4160
}
4161
} while (!eof());
4162
}
4163
4164
void UnwrappedLineParser::parseObjCProtocolList() {
4165
assert(FormatTok->is(tok::less) && "'<' expected.");
4166
do {
4167
nextToken();
4168
// Early exit in case someone forgot a close angle.
4169
if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4170
FormatTok->isObjCAtKeyword(tok::objc_end)) {
4171
return;
4172
}
4173
} while (!eof() && FormatTok->isNot(tok::greater));
4174
nextToken(); // Skip '>'.
4175
}
4176
4177
void UnwrappedLineParser::parseObjCUntilAtEnd() {
4178
do {
4179
if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4180
nextToken();
4181
addUnwrappedLine();
4182
break;
4183
}
4184
if (FormatTok->is(tok::l_brace)) {
4185
parseBlock();
4186
// In ObjC interfaces, nothing should be following the "}".
4187
addUnwrappedLine();
4188
} else if (FormatTok->is(tok::r_brace)) {
4189
// Ignore stray "}". parseStructuralElement doesn't consume them.
4190
nextToken();
4191
addUnwrappedLine();
4192
} else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4193
nextToken();
4194
parseObjCMethod();
4195
} else {
4196
parseStructuralElement();
4197
}
4198
} while (!eof());
4199
}
4200
4201
void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4202
assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4203
FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4204
nextToken();
4205
nextToken(); // interface name
4206
4207
// @interface can be followed by a lightweight generic
4208
// specialization list, then either a base class or a category.
4209
if (FormatTok->is(tok::less))
4210
parseObjCLightweightGenerics();
4211
if (FormatTok->is(tok::colon)) {
4212
nextToken();
4213
nextToken(); // base class name
4214
// The base class can also have lightweight generics applied to it.
4215
if (FormatTok->is(tok::less))
4216
parseObjCLightweightGenerics();
4217
} else if (FormatTok->is(tok::l_paren)) {
4218
// Skip category, if present.
4219
parseParens();
4220
}
4221
4222
if (FormatTok->is(tok::less))
4223
parseObjCProtocolList();
4224
4225
if (FormatTok->is(tok::l_brace)) {
4226
if (Style.BraceWrapping.AfterObjCDeclaration)
4227
addUnwrappedLine();
4228
parseBlock(/*MustBeDeclaration=*/true);
4229
}
4230
4231
// With instance variables, this puts '}' on its own line. Without instance
4232
// variables, this ends the @interface line.
4233
addUnwrappedLine();
4234
4235
parseObjCUntilAtEnd();
4236
}
4237
4238
void UnwrappedLineParser::parseObjCLightweightGenerics() {
4239
assert(FormatTok->is(tok::less));
4240
// Unlike protocol lists, generic parameterizations support
4241
// nested angles:
4242
//
4243
// @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4244
// NSObject <NSCopying, NSSecureCoding>
4245
//
4246
// so we need to count how many open angles we have left.
4247
unsigned NumOpenAngles = 1;
4248
do {
4249
nextToken();
4250
// Early exit in case someone forgot a close angle.
4251
if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4252
FormatTok->isObjCAtKeyword(tok::objc_end)) {
4253
break;
4254
}
4255
if (FormatTok->is(tok::less)) {
4256
++NumOpenAngles;
4257
} else if (FormatTok->is(tok::greater)) {
4258
assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4259
--NumOpenAngles;
4260
}
4261
} while (!eof() && NumOpenAngles != 0);
4262
nextToken(); // Skip '>'.
4263
}
4264
4265
// Returns true for the declaration/definition form of @protocol,
4266
// false for the expression form.
4267
bool UnwrappedLineParser::parseObjCProtocol() {
4268
assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4269
nextToken();
4270
4271
if (FormatTok->is(tok::l_paren)) {
4272
// The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4273
return false;
4274
}
4275
4276
// The definition/declaration form,
4277
// @protocol Foo
4278
// - (int)someMethod;
4279
// @end
4280
4281
nextToken(); // protocol name
4282
4283
if (FormatTok->is(tok::less))
4284
parseObjCProtocolList();
4285
4286
// Check for protocol declaration.
4287
if (FormatTok->is(tok::semi)) {
4288
nextToken();
4289
addUnwrappedLine();
4290
return true;
4291
}
4292
4293
addUnwrappedLine();
4294
parseObjCUntilAtEnd();
4295
return true;
4296
}
4297
4298
void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4299
bool IsImport = FormatTok->is(Keywords.kw_import);
4300
assert(IsImport || FormatTok->is(tok::kw_export));
4301
nextToken();
4302
4303
// Consume the "default" in "export default class/function".
4304
if (FormatTok->is(tok::kw_default))
4305
nextToken();
4306
4307
// Consume "async function", "function" and "default function", so that these
4308
// get parsed as free-standing JS functions, i.e. do not require a trailing
4309
// semicolon.
4310
if (FormatTok->is(Keywords.kw_async))
4311
nextToken();
4312
if (FormatTok->is(Keywords.kw_function)) {
4313
nextToken();
4314
return;
4315
}
4316
4317
// For imports, `export *`, `export {...}`, consume the rest of the line up
4318
// to the terminating `;`. For everything else, just return and continue
4319
// parsing the structural element, i.e. the declaration or expression for
4320
// `export default`.
4321
if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4322
!FormatTok->isStringLiteral() &&
4323
!(FormatTok->is(Keywords.kw_type) &&
4324
Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4325
return;
4326
}
4327
4328
while (!eof()) {
4329
if (FormatTok->is(tok::semi))
4330
return;
4331
if (Line->Tokens.empty()) {
4332
// Common issue: Automatic Semicolon Insertion wrapped the line, so the
4333
// import statement should terminate.
4334
return;
4335
}
4336
if (FormatTok->is(tok::l_brace)) {
4337
FormatTok->setBlockKind(BK_Block);
4338
nextToken();
4339
parseBracedList();
4340
} else {
4341
nextToken();
4342
}
4343
}
4344
}
4345
4346
void UnwrappedLineParser::parseStatementMacro() {
4347
nextToken();
4348
if (FormatTok->is(tok::l_paren))
4349
parseParens();
4350
if (FormatTok->is(tok::semi))
4351
nextToken();
4352
addUnwrappedLine();
4353
}
4354
4355
void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4356
// consume things like a::`b.c[d:e] or a::*
4357
while (true) {
4358
if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4359
tok::coloncolon, tok::hash) ||
4360
Keywords.isVerilogIdentifier(*FormatTok)) {
4361
nextToken();
4362
} else if (FormatTok->is(tok::l_square)) {
4363
parseSquare();
4364
} else {
4365
break;
4366
}
4367
}
4368
}
4369
4370
void UnwrappedLineParser::parseVerilogSensitivityList() {
4371
if (FormatTok->isNot(tok::at))
4372
return;
4373
nextToken();
4374
// A block event expression has 2 at signs.
4375
if (FormatTok->is(tok::at))
4376
nextToken();
4377
switch (FormatTok->Tok.getKind()) {
4378
case tok::star:
4379
nextToken();
4380
break;
4381
case tok::l_paren:
4382
parseParens();
4383
break;
4384
default:
4385
parseVerilogHierarchyIdentifier();
4386
break;
4387
}
4388
}
4389
4390
unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4391
unsigned AddLevels = 0;
4392
4393
if (FormatTok->is(Keywords.kw_clocking)) {
4394
nextToken();
4395
if (Keywords.isVerilogIdentifier(*FormatTok))
4396
nextToken();
4397
parseVerilogSensitivityList();
4398
if (FormatTok->is(tok::semi))
4399
nextToken();
4400
} else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4401
Keywords.kw_casez, Keywords.kw_randcase,
4402
Keywords.kw_randsequence)) {
4403
if (Style.IndentCaseLabels)
4404
AddLevels++;
4405
nextToken();
4406
if (FormatTok->is(tok::l_paren)) {
4407
FormatTok->setFinalizedType(TT_ConditionLParen);
4408
parseParens();
4409
}
4410
if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4411
nextToken();
4412
// The case header has no semicolon.
4413
} else {
4414
// "module" etc.
4415
nextToken();
4416
// all the words like the name of the module and specifiers like
4417
// "automatic" and the width of function return type
4418
while (true) {
4419
if (FormatTok->is(tok::l_square)) {
4420
auto Prev = FormatTok->getPreviousNonComment();
4421
if (Prev && Keywords.isVerilogIdentifier(*Prev))
4422
Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4423
parseSquare();
4424
} else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4425
FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4426
nextToken();
4427
} else {
4428
break;
4429
}
4430
}
4431
4432
auto NewLine = [this]() {
4433
addUnwrappedLine();
4434
Line->IsContinuation = true;
4435
};
4436
4437
// package imports
4438
while (FormatTok->is(Keywords.kw_import)) {
4439
NewLine();
4440
nextToken();
4441
parseVerilogHierarchyIdentifier();
4442
if (FormatTok->is(tok::semi))
4443
nextToken();
4444
}
4445
4446
// parameters and ports
4447
if (FormatTok->is(Keywords.kw_verilogHash)) {
4448
NewLine();
4449
nextToken();
4450
if (FormatTok->is(tok::l_paren)) {
4451
FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4452
parseParens();
4453
}
4454
}
4455
if (FormatTok->is(tok::l_paren)) {
4456
NewLine();
4457
FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4458
parseParens();
4459
}
4460
4461
// extends and implements
4462
if (FormatTok->is(Keywords.kw_extends)) {
4463
NewLine();
4464
nextToken();
4465
parseVerilogHierarchyIdentifier();
4466
if (FormatTok->is(tok::l_paren))
4467
parseParens();
4468
}
4469
if (FormatTok->is(Keywords.kw_implements)) {
4470
NewLine();
4471
do {
4472
nextToken();
4473
parseVerilogHierarchyIdentifier();
4474
} while (FormatTok->is(tok::comma));
4475
}
4476
4477
// Coverage event for cover groups.
4478
if (FormatTok->is(tok::at)) {
4479
NewLine();
4480
parseVerilogSensitivityList();
4481
}
4482
4483
if (FormatTok->is(tok::semi))
4484
nextToken(/*LevelDifference=*/1);
4485
addUnwrappedLine();
4486
}
4487
4488
return AddLevels;
4489
}
4490
4491
void UnwrappedLineParser::parseVerilogTable() {
4492
assert(FormatTok->is(Keywords.kw_table));
4493
nextToken(/*LevelDifference=*/1);
4494
addUnwrappedLine();
4495
4496
auto InitialLevel = Line->Level++;
4497
while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4498
FormatToken *Tok = FormatTok;
4499
nextToken();
4500
if (Tok->is(tok::semi))
4501
addUnwrappedLine();
4502
else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4503
Tok->setFinalizedType(TT_VerilogTableItem);
4504
}
4505
Line->Level = InitialLevel;
4506
nextToken(/*LevelDifference=*/-1);
4507
addUnwrappedLine();
4508
}
4509
4510
void UnwrappedLineParser::parseVerilogCaseLabel() {
4511
// The label will get unindented in AnnotatingParser. If there are no leading
4512
// spaces, indent the rest here so that things inside the block will be
4513
// indented relative to things outside. We don't use parseLabel because we
4514
// don't know whether this colon is a label or a ternary expression at this
4515
// point.
4516
auto OrigLevel = Line->Level;
4517
auto FirstLine = CurrentLines->size();
4518
if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4519
++Line->Level;
4520
else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4521
--Line->Level;
4522
parseStructuralElement();
4523
// Restore the indentation in both the new line and the line that has the
4524
// label.
4525
if (CurrentLines->size() > FirstLine)
4526
(*CurrentLines)[FirstLine].Level = OrigLevel;
4527
Line->Level = OrigLevel;
4528
}
4529
4530
bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4531
for (const auto &N : Line.Tokens) {
4532
if (N.Tok->MacroCtx)
4533
return true;
4534
for (const UnwrappedLine &Child : N.Children)
4535
if (containsExpansion(Child))
4536
return true;
4537
}
4538
return false;
4539
}
4540
4541
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4542
if (Line->Tokens.empty())
4543
return;
4544
LLVM_DEBUG({
4545
if (!parsingPPDirective()) {
4546
llvm::dbgs() << "Adding unwrapped line:\n";
4547
printDebugInfo(*Line);
4548
}
4549
});
4550
4551
// If this line closes a block when in Whitesmiths mode, remember that
4552
// information so that the level can be decreased after the line is added.
4553
// This has to happen after the addition of the line since the line itself
4554
// needs to be indented.
4555
bool ClosesWhitesmithsBlock =
4556
Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4557
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4558
4559
// If the current line was expanded from a macro call, we use it to
4560
// reconstruct an unwrapped line from the structure of the expanded unwrapped
4561
// line and the unexpanded token stream.
4562
if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4563
if (!Reconstruct)
4564
Reconstruct.emplace(Line->Level, Unexpanded);
4565
Reconstruct->addLine(*Line);
4566
4567
// While the reconstructed unexpanded lines are stored in the normal
4568
// flow of lines, the expanded lines are stored on the side to be analyzed
4569
// in an extra step.
4570
CurrentExpandedLines.push_back(std::move(*Line));
4571
4572
if (Reconstruct->finished()) {
4573
UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4574
assert(!Reconstructed.Tokens.empty() &&
4575
"Reconstructed must at least contain the macro identifier.");
4576
assert(!parsingPPDirective());
4577
LLVM_DEBUG({
4578
llvm::dbgs() << "Adding unexpanded line:\n";
4579
printDebugInfo(Reconstructed);
4580
});
4581
ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4582
Lines.push_back(std::move(Reconstructed));
4583
CurrentExpandedLines.clear();
4584
Reconstruct.reset();
4585
}
4586
} else {
4587
// At the top level we only get here when no unexpansion is going on, or
4588
// when conditional formatting led to unfinished macro reconstructions.
4589
assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4590
CurrentLines->push_back(std::move(*Line));
4591
}
4592
Line->Tokens.clear();
4593
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4594
Line->FirstStartColumn = 0;
4595
Line->IsContinuation = false;
4596
Line->SeenDecltypeAuto = false;
4597
4598
if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4599
--Line->Level;
4600
if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4601
CurrentLines->append(
4602
std::make_move_iterator(PreprocessorDirectives.begin()),
4603
std::make_move_iterator(PreprocessorDirectives.end()));
4604
PreprocessorDirectives.clear();
4605
}
4606
// Disconnect the current token from the last token on the previous line.
4607
FormatTok->Previous = nullptr;
4608
}
4609
4610
bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4611
4612
bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4613
return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4614
FormatTok.NewlinesBefore > 0;
4615
}
4616
4617
// Checks if \p FormatTok is a line comment that continues the line comment
4618
// section on \p Line.
4619
static bool
4620
continuesLineCommentSection(const FormatToken &FormatTok,
4621
const UnwrappedLine &Line,
4622
const llvm::Regex &CommentPragmasRegex) {
4623
if (Line.Tokens.empty())
4624
return false;
4625
4626
StringRef IndentContent = FormatTok.TokenText;
4627
if (FormatTok.TokenText.starts_with("//") ||
4628
FormatTok.TokenText.starts_with("/*")) {
4629
IndentContent = FormatTok.TokenText.substr(2);
4630
}
4631
if (CommentPragmasRegex.match(IndentContent))
4632
return false;
4633
4634
// If Line starts with a line comment, then FormatTok continues the comment
4635
// section if its original column is greater or equal to the original start
4636
// column of the line.
4637
//
4638
// Define the min column token of a line as follows: if a line ends in '{' or
4639
// contains a '{' followed by a line comment, then the min column token is
4640
// that '{'. Otherwise, the min column token of the line is the first token of
4641
// the line.
4642
//
4643
// If Line starts with a token other than a line comment, then FormatTok
4644
// continues the comment section if its original column is greater than the
4645
// original start column of the min column token of the line.
4646
//
4647
// For example, the second line comment continues the first in these cases:
4648
//
4649
// // first line
4650
// // second line
4651
//
4652
// and:
4653
//
4654
// // first line
4655
// // second line
4656
//
4657
// and:
4658
//
4659
// int i; // first line
4660
// // second line
4661
//
4662
// and:
4663
//
4664
// do { // first line
4665
// // second line
4666
// int i;
4667
// } while (true);
4668
//
4669
// and:
4670
//
4671
// enum {
4672
// a, // first line
4673
// // second line
4674
// b
4675
// };
4676
//
4677
// The second line comment doesn't continue the first in these cases:
4678
//
4679
// // first line
4680
// // second line
4681
//
4682
// and:
4683
//
4684
// int i; // first line
4685
// // second line
4686
//
4687
// and:
4688
//
4689
// do { // first line
4690
// // second line
4691
// int i;
4692
// } while (true);
4693
//
4694
// and:
4695
//
4696
// enum {
4697
// a, // first line
4698
// // second line
4699
// };
4700
const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4701
4702
// Scan for '{//'. If found, use the column of '{' as a min column for line
4703
// comment section continuation.
4704
const FormatToken *PreviousToken = nullptr;
4705
for (const UnwrappedLineNode &Node : Line.Tokens) {
4706
if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4707
isLineComment(*Node.Tok)) {
4708
MinColumnToken = PreviousToken;
4709
break;
4710
}
4711
PreviousToken = Node.Tok;
4712
4713
// Grab the last newline preceding a token in this unwrapped line.
4714
if (Node.Tok->NewlinesBefore > 0)
4715
MinColumnToken = Node.Tok;
4716
}
4717
if (PreviousToken && PreviousToken->is(tok::l_brace))
4718
MinColumnToken = PreviousToken;
4719
4720
return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4721
MinColumnToken);
4722
}
4723
4724
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4725
bool JustComments = Line->Tokens.empty();
4726
for (FormatToken *Tok : CommentsBeforeNextToken) {
4727
// Line comments that belong to the same line comment section are put on the
4728
// same line since later we might want to reflow content between them.
4729
// Additional fine-grained breaking of line comment sections is controlled
4730
// by the class BreakableLineCommentSection in case it is desirable to keep
4731
// several line comment sections in the same unwrapped line.
4732
//
4733
// FIXME: Consider putting separate line comment sections as children to the
4734
// unwrapped line instead.
4735
Tok->ContinuesLineCommentSection =
4736
continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4737
if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4738
addUnwrappedLine();
4739
pushToken(Tok);
4740
}
4741
if (NewlineBeforeNext && JustComments)
4742
addUnwrappedLine();
4743
CommentsBeforeNextToken.clear();
4744
}
4745
4746
void UnwrappedLineParser::nextToken(int LevelDifference) {
4747
if (eof())
4748
return;
4749
flushComments(isOnNewLine(*FormatTok));
4750
pushToken(FormatTok);
4751
FormatToken *Previous = FormatTok;
4752
if (!Style.isJavaScript())
4753
readToken(LevelDifference);
4754
else
4755
readTokenWithJavaScriptASI();
4756
FormatTok->Previous = Previous;
4757
if (Style.isVerilog()) {
4758
// Blocks in Verilog can have `begin` and `end` instead of braces. For
4759
// keywords like `begin`, we can't treat them the same as left braces
4760
// because some contexts require one of them. For example structs use
4761
// braces and if blocks use keywords, and a left brace can occur in an if
4762
// statement, but it is not a block. For keywords like `end`, we simply
4763
// treat them the same as right braces.
4764
if (Keywords.isVerilogEnd(*FormatTok))
4765
FormatTok->Tok.setKind(tok::r_brace);
4766
}
4767
}
4768
4769
void UnwrappedLineParser::distributeComments(
4770
const SmallVectorImpl<FormatToken *> &Comments,
4771
const FormatToken *NextTok) {
4772
// Whether or not a line comment token continues a line is controlled by
4773
// the method continuesLineCommentSection, with the following caveat:
4774
//
4775
// Define a trail of Comments to be a nonempty proper postfix of Comments such
4776
// that each comment line from the trail is aligned with the next token, if
4777
// the next token exists. If a trail exists, the beginning of the maximal
4778
// trail is marked as a start of a new comment section.
4779
//
4780
// For example in this code:
4781
//
4782
// int a; // line about a
4783
// // line 1 about b
4784
// // line 2 about b
4785
// int b;
4786
//
4787
// the two lines about b form a maximal trail, so there are two sections, the
4788
// first one consisting of the single comment "// line about a" and the
4789
// second one consisting of the next two comments.
4790
if (Comments.empty())
4791
return;
4792
bool ShouldPushCommentsInCurrentLine = true;
4793
bool HasTrailAlignedWithNextToken = false;
4794
unsigned StartOfTrailAlignedWithNextToken = 0;
4795
if (NextTok) {
4796
// We are skipping the first element intentionally.
4797
for (unsigned i = Comments.size() - 1; i > 0; --i) {
4798
if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4799
HasTrailAlignedWithNextToken = true;
4800
StartOfTrailAlignedWithNextToken = i;
4801
}
4802
}
4803
}
4804
for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4805
FormatToken *FormatTok = Comments[i];
4806
if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4807
FormatTok->ContinuesLineCommentSection = false;
4808
} else {
4809
FormatTok->ContinuesLineCommentSection =
4810
continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4811
}
4812
if (!FormatTok->ContinuesLineCommentSection &&
4813
(isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4814
ShouldPushCommentsInCurrentLine = false;
4815
}
4816
if (ShouldPushCommentsInCurrentLine)
4817
pushToken(FormatTok);
4818
else
4819
CommentsBeforeNextToken.push_back(FormatTok);
4820
}
4821
}
4822
4823
void UnwrappedLineParser::readToken(int LevelDifference) {
4824
SmallVector<FormatToken *, 1> Comments;
4825
bool PreviousWasComment = false;
4826
bool FirstNonCommentOnLine = false;
4827
do {
4828
FormatTok = Tokens->getNextToken();
4829
assert(FormatTok);
4830
while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4831
TT_ConflictAlternative)) {
4832
if (FormatTok->is(TT_ConflictStart))
4833
conditionalCompilationStart(/*Unreachable=*/false);
4834
else if (FormatTok->is(TT_ConflictAlternative))
4835
conditionalCompilationAlternative();
4836
else if (FormatTok->is(TT_ConflictEnd))
4837
conditionalCompilationEnd();
4838
FormatTok = Tokens->getNextToken();
4839
FormatTok->MustBreakBefore = true;
4840
FormatTok->MustBreakBeforeFinalized = true;
4841
}
4842
4843
auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4844
const FormatToken &Tok,
4845
bool PreviousWasComment) {
4846
auto IsFirstOnLine = [](const FormatToken &Tok) {
4847
return Tok.HasUnescapedNewline || Tok.IsFirst;
4848
};
4849
4850
// Consider preprocessor directives preceded by block comments as first
4851
// on line.
4852
if (PreviousWasComment)
4853
return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4854
return IsFirstOnLine(Tok);
4855
};
4856
4857
FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4858
FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4859
PreviousWasComment = FormatTok->is(tok::comment);
4860
4861
while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4862
(!Style.isVerilog() ||
4863
Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4864
FirstNonCommentOnLine) {
4865
distributeComments(Comments, FormatTok);
4866
Comments.clear();
4867
// If there is an unfinished unwrapped line, we flush the preprocessor
4868
// directives only after that unwrapped line was finished later.
4869
bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4870
ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4871
assert((LevelDifference >= 0 ||
4872
static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4873
"LevelDifference makes Line->Level negative");
4874
Line->Level += LevelDifference;
4875
// Comments stored before the preprocessor directive need to be output
4876
// before the preprocessor directive, at the same level as the
4877
// preprocessor directive, as we consider them to apply to the directive.
4878
if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4879
PPBranchLevel > 0) {
4880
Line->Level += PPBranchLevel;
4881
}
4882
assert(Line->Level >= Line->UnbracedBodyLevel);
4883
Line->Level -= Line->UnbracedBodyLevel;
4884
flushComments(isOnNewLine(*FormatTok));
4885
parsePPDirective();
4886
PreviousWasComment = FormatTok->is(tok::comment);
4887
FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4888
FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4889
}
4890
4891
if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4892
!Line->InPPDirective) {
4893
continue;
4894
}
4895
4896
if (FormatTok->is(tok::identifier) &&
4897
Macros.defined(FormatTok->TokenText) &&
4898
// FIXME: Allow expanding macros in preprocessor directives.
4899
!Line->InPPDirective) {
4900
FormatToken *ID = FormatTok;
4901
unsigned Position = Tokens->getPosition();
4902
4903
// To correctly parse the code, we need to replace the tokens of the macro
4904
// call with its expansion.
4905
auto PreCall = std::move(Line);
4906
Line.reset(new UnwrappedLine);
4907
bool OldInExpansion = InExpansion;
4908
InExpansion = true;
4909
// We parse the macro call into a new line.
4910
auto Args = parseMacroCall();
4911
InExpansion = OldInExpansion;
4912
assert(Line->Tokens.front().Tok == ID);
4913
// And remember the unexpanded macro call tokens.
4914
auto UnexpandedLine = std::move(Line);
4915
// Reset to the old line.
4916
Line = std::move(PreCall);
4917
4918
LLVM_DEBUG({
4919
llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4920
if (Args) {
4921
llvm::dbgs() << "(";
4922
for (const auto &Arg : Args.value())
4923
for (const auto &T : Arg)
4924
llvm::dbgs() << T->TokenText << " ";
4925
llvm::dbgs() << ")";
4926
}
4927
llvm::dbgs() << "\n";
4928
});
4929
if (Macros.objectLike(ID->TokenText) && Args &&
4930
!Macros.hasArity(ID->TokenText, Args->size())) {
4931
// The macro is either
4932
// - object-like, but we got argumnets, or
4933
// - overloaded to be both object-like and function-like, but none of
4934
// the function-like arities match the number of arguments.
4935
// Thus, expand as object-like macro.
4936
LLVM_DEBUG(llvm::dbgs()
4937
<< "Macro \"" << ID->TokenText
4938
<< "\" not overloaded for arity " << Args->size()
4939
<< "or not function-like, using object-like overload.");
4940
Args.reset();
4941
UnexpandedLine->Tokens.resize(1);
4942
Tokens->setPosition(Position);
4943
nextToken();
4944
assert(!Args && Macros.objectLike(ID->TokenText));
4945
}
4946
if ((!Args && Macros.objectLike(ID->TokenText)) ||
4947
(Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4948
// Next, we insert the expanded tokens in the token stream at the
4949
// current position, and continue parsing.
4950
Unexpanded[ID] = std::move(UnexpandedLine);
4951
SmallVector<FormatToken *, 8> Expansion =
4952
Macros.expand(ID, std::move(Args));
4953
if (!Expansion.empty())
4954
FormatTok = Tokens->insertTokens(Expansion);
4955
4956
LLVM_DEBUG({
4957
llvm::dbgs() << "Expanded: ";
4958
for (const auto &T : Expansion)
4959
llvm::dbgs() << T->TokenText << " ";
4960
llvm::dbgs() << "\n";
4961
});
4962
} else {
4963
LLVM_DEBUG({
4964
llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4965
<< "\", because it was used ";
4966
if (Args)
4967
llvm::dbgs() << "with " << Args->size();
4968
else
4969
llvm::dbgs() << "without";
4970
llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4971
});
4972
Tokens->setPosition(Position);
4973
FormatTok = ID;
4974
}
4975
}
4976
4977
if (FormatTok->isNot(tok::comment)) {
4978
distributeComments(Comments, FormatTok);
4979
Comments.clear();
4980
return;
4981
}
4982
4983
Comments.push_back(FormatTok);
4984
} while (!eof());
4985
4986
distributeComments(Comments, nullptr);
4987
Comments.clear();
4988
}
4989
4990
namespace {
4991
template <typename Iterator>
4992
void pushTokens(Iterator Begin, Iterator End,
4993
llvm::SmallVectorImpl<FormatToken *> &Into) {
4994
for (auto I = Begin; I != End; ++I) {
4995
Into.push_back(I->Tok);
4996
for (const auto &Child : I->Children)
4997
pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4998
}
4999
}
5000
} // namespace
5001
5002
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5003
UnwrappedLineParser::parseMacroCall() {
5004
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5005
assert(Line->Tokens.empty());
5006
nextToken();
5007
if (FormatTok->isNot(tok::l_paren))
5008
return Args;
5009
unsigned Position = Tokens->getPosition();
5010
FormatToken *Tok = FormatTok;
5011
nextToken();
5012
Args.emplace();
5013
auto ArgStart = std::prev(Line->Tokens.end());
5014
5015
int Parens = 0;
5016
do {
5017
switch (FormatTok->Tok.getKind()) {
5018
case tok::l_paren:
5019
++Parens;
5020
nextToken();
5021
break;
5022
case tok::r_paren: {
5023
if (Parens > 0) {
5024
--Parens;
5025
nextToken();
5026
break;
5027
}
5028
Args->push_back({});
5029
pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5030
nextToken();
5031
return Args;
5032
}
5033
case tok::comma: {
5034
if (Parens > 0) {
5035
nextToken();
5036
break;
5037
}
5038
Args->push_back({});
5039
pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5040
nextToken();
5041
ArgStart = std::prev(Line->Tokens.end());
5042
break;
5043
}
5044
default:
5045
nextToken();
5046
break;
5047
}
5048
} while (!eof());
5049
Line->Tokens.resize(1);
5050
Tokens->setPosition(Position);
5051
FormatTok = Tok;
5052
return {};
5053
}
5054
5055
void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5056
Line->Tokens.push_back(UnwrappedLineNode(Tok));
5057
if (MustBreakBeforeNextToken) {
5058
Line->Tokens.back().Tok->MustBreakBefore = true;
5059
Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5060
MustBreakBeforeNextToken = false;
5061
}
5062
}
5063
5064
} // end namespace format
5065
} // end namespace clang
5066
5067