CoCalc -- UnwrappedLineParser.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp
³⁵²³⁴ views
1
//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file contains the implementation of the UnwrappedLineParser,
11
/// which turns a stream of tokens into UnwrappedLines.
12
///
13
//===----------------------------------------------------------------------===//
14

15
#include "UnwrappedLineParser.h"
16
#include "FormatToken.h"
17
#include "FormatTokenLexer.h"
18
#include "FormatTokenSource.h"
19
#include "Macros.h"
20
#include "TokenAnnotator.h"
21
#include "clang/Basic/TokenKinds.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/raw_os_ostream.h"
26
#include "llvm/Support/raw_ostream.h"
27

28
#include <algorithm>
29
#include <utility>
30

31
#define DEBUG_TYPE "format-parser"
32

33
namespace clang {
34
namespace format {
35

36
namespace {
37

38
void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line,
39
               StringRef Prefix = "", bool PrintText = false) {
40
  OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn
41
     << ")" << (Line.InPPDirective ? " MACRO" : "") << ": ";
42
  bool NewLine = false;
43
  for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
44
                                                    E = Line.Tokens.end();
45
       I != E; ++I) {
46
    if (NewLine) {
47
      OS << Prefix;
48
      NewLine = false;
49
    }
50
    OS << I->Tok->Tok.getName() << "["
51
       << "T=" << (unsigned)I->Tok->getType()
52
       << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText
53
       << "\"] ";
54
    for (SmallVectorImpl<UnwrappedLine>::const_iterator
55
             CI = I->Children.begin(),
56
             CE = I->Children.end();
57
         CI != CE; ++CI) {
58
      OS << "\n";
59
      printLine(OS, *CI, (Prefix + "  ").str());
60
      NewLine = true;
61
    }
62
  }
63
  if (!NewLine)
64
    OS << "\n";
65
}
66

67
LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) {
68
  printLine(llvm::dbgs(), Line);
69
}
70

71
class ScopedDeclarationState {
72
public:
73
  ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
74
                         bool MustBeDeclaration)
75
      : Line(Line), Stack(Stack) {
76
    Line.MustBeDeclaration = MustBeDeclaration;
77
    Stack.push_back(MustBeDeclaration);
78
  }
79
  ~ScopedDeclarationState() {
80
    Stack.pop_back();
81
    if (!Stack.empty())
82
      Line.MustBeDeclaration = Stack.back();
83
    else
84
      Line.MustBeDeclaration = true;
85
  }
86

87
private:
88
  UnwrappedLine &Line;
89
  llvm::BitVector &Stack;
90
};
91

92
} // end anonymous namespace
93

94
std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
95
  llvm::raw_os_ostream OS(Stream);
96
  printLine(OS, Line);
97
  return Stream;
98
}
99

100
class ScopedLineState {
101
public:
102
  ScopedLineState(UnwrappedLineParser &Parser,
103
                  bool SwitchToPreprocessorLines = false)
104
      : Parser(Parser), OriginalLines(Parser.CurrentLines) {
105
    if (SwitchToPreprocessorLines)
106
      Parser.CurrentLines = &Parser.PreprocessorDirectives;
107
    else if (!Parser.Line->Tokens.empty())
108
      Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
109
    PreBlockLine = std::move(Parser.Line);
110
    Parser.Line = std::make_unique<UnwrappedLine>();
111
    Parser.Line->Level = PreBlockLine->Level;
112
    Parser.Line->PPLevel = PreBlockLine->PPLevel;
113
    Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
114
    Parser.Line->InMacroBody = PreBlockLine->InMacroBody;
115
    Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel;
116
  }
117

118
  ~ScopedLineState() {
119
    if (!Parser.Line->Tokens.empty())
120
      Parser.addUnwrappedLine();
121
    assert(Parser.Line->Tokens.empty());
122
    Parser.Line = std::move(PreBlockLine);
123
    if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
124
      Parser.MustBreakBeforeNextToken = true;
125
    Parser.CurrentLines = OriginalLines;
126
  }
127

128
private:
129
  UnwrappedLineParser &Parser;
130

131
  std::unique_ptr<UnwrappedLine> PreBlockLine;
132
  SmallVectorImpl<UnwrappedLine> *OriginalLines;
133
};
134

135
class CompoundStatementIndenter {
136
public:
137
  CompoundStatementIndenter(UnwrappedLineParser *Parser,
138
                            const FormatStyle &Style, unsigned &LineLevel)
139
      : CompoundStatementIndenter(Parser, LineLevel,
140
                                  Style.BraceWrapping.AfterControlStatement,
141
                                  Style.BraceWrapping.IndentBraces) {}
142
  CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
143
                            bool WrapBrace, bool IndentBrace)
144
      : LineLevel(LineLevel), OldLineLevel(LineLevel) {
145
    if (WrapBrace)
146
      Parser->addUnwrappedLine();
147
    if (IndentBrace)
148
      ++LineLevel;
149
  }
150
  ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
151

152
private:
153
  unsigned &LineLevel;
154
  unsigned OldLineLevel;
155
};
156

157
UnwrappedLineParser::UnwrappedLineParser(
158
    SourceManager &SourceMgr, const FormatStyle &Style,
159
    const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
160
    ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
161
    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
162
    IdentifierTable &IdentTable)
163
    : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
164
      CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()),
165
      LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords),
166
      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
167
      Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
168
      IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
169
                       ? IG_Rejected
170
                       : IG_Inited),
171
      IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
172
      Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
173
  assert(IsCpp == LangOpts.CXXOperatorNames);
174
}
175

176
void UnwrappedLineParser::reset() {
177
  PPBranchLevel = -1;
178
  IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
179
                     ? IG_Rejected
180
                     : IG_Inited;
181
  IncludeGuardToken = nullptr;
182
  Line.reset(new UnwrappedLine);
183
  CommentsBeforeNextToken.clear();
184
  FormatTok = nullptr;
185
  MustBreakBeforeNextToken = false;
186
  IsDecltypeAutoFunction = false;
187
  PreprocessorDirectives.clear();
188
  CurrentLines = &Lines;
189
  DeclarationScopeStack.clear();
190
  NestedTooDeep.clear();
191
  NestedLambdas.clear();
192
  PPStack.clear();
193
  Line->FirstStartColumn = FirstStartColumn;
194

195
  if (!Unexpanded.empty())
196
    for (FormatToken *Token : AllTokens)
197
      Token->MacroCtx.reset();
198
  CurrentExpandedLines.clear();
199
  ExpandedLines.clear();
200
  Unexpanded.clear();
201
  InExpansion = false;
202
  Reconstruct.reset();
203
}
204

205
void UnwrappedLineParser::parse() {
206
  IndexedTokenSource TokenSource(AllTokens);
207
  Line->FirstStartColumn = FirstStartColumn;
208
  do {
209
    LLVM_DEBUG(llvm::dbgs() << "----\n");
210
    reset();
211
    Tokens = &TokenSource;
212
    TokenSource.reset();
213

214
    readToken();
215
    parseFile();
216

217
    // If we found an include guard then all preprocessor directives (other than
218
    // the guard) are over-indented by one.
219
    if (IncludeGuard == IG_Found) {
220
      for (auto &Line : Lines)
221
        if (Line.InPPDirective && Line.Level > 0)
222
          --Line.Level;
223
    }
224

225
    // Create line with eof token.
226
    assert(eof());
227
    pushToken(FormatTok);
228
    addUnwrappedLine();
229

230
    // In a first run, format everything with the lines containing macro calls
231
    // replaced by the expansion.
232
    if (!ExpandedLines.empty()) {
233
      LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
234
      for (const auto &Line : Lines) {
235
        if (!Line.Tokens.empty()) {
236
          auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
237
          if (it != ExpandedLines.end()) {
238
            for (const auto &Expanded : it->second) {
239
              LLVM_DEBUG(printDebugInfo(Expanded));
240
              Callback.consumeUnwrappedLine(Expanded);
241
            }
242
            continue;
243
          }
244
        }
245
        LLVM_DEBUG(printDebugInfo(Line));
246
        Callback.consumeUnwrappedLine(Line);
247
      }
248
      Callback.finishRun();
249
    }
250

251
    LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
252
    for (const UnwrappedLine &Line : Lines) {
253
      LLVM_DEBUG(printDebugInfo(Line));
254
      Callback.consumeUnwrappedLine(Line);
255
    }
256
    Callback.finishRun();
257
    Lines.clear();
258
    while (!PPLevelBranchIndex.empty() &&
259
           PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
260
      PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
261
      PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
262
    }
263
    if (!PPLevelBranchIndex.empty()) {
264
      ++PPLevelBranchIndex.back();
265
      assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
266
      assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
267
    }
268
  } while (!PPLevelBranchIndex.empty());
269
}
270

271
void UnwrappedLineParser::parseFile() {
272
  // The top-level context in a file always has declarations, except for pre-
273
  // processor directives and JavaScript files.
274
  bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
275
  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
276
                                          MustBeDeclaration);
277
  if (Style.Language == FormatStyle::LK_TextProto)
278
    parseBracedList();
279
  else
280
    parseLevel();
281
  // Make sure to format the remaining tokens.
282
  //
283
  // LK_TextProto is special since its top-level is parsed as the body of a
284
  // braced list, which does not necessarily have natural line separators such
285
  // as a semicolon. Comments after the last entry that have been determined to
286
  // not belong to that line, as in:
287
  //   key: value
288
  //   // endfile comment
289
  // do not have a chance to be put on a line of their own until this point.
290
  // Here we add this newline before end-of-file comments.
291
  if (Style.Language == FormatStyle::LK_TextProto &&
292
      !CommentsBeforeNextToken.empty()) {
293
    addUnwrappedLine();
294
  }
295
  flushComments(true);
296
  addUnwrappedLine();
297
}
298

299
void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
300
  do {
301
    switch (FormatTok->Tok.getKind()) {
302
    case tok::l_brace:
303
      return;
304
    default:
305
      if (FormatTok->is(Keywords.kw_where)) {
306
        addUnwrappedLine();
307
        nextToken();
308
        parseCSharpGenericTypeConstraint();
309
        break;
310
      }
311
      nextToken();
312
      break;
313
    }
314
  } while (!eof());
315
}
316

317
void UnwrappedLineParser::parseCSharpAttribute() {
318
  int UnpairedSquareBrackets = 1;
319
  do {
320
    switch (FormatTok->Tok.getKind()) {
321
    case tok::r_square:
322
      nextToken();
323
      --UnpairedSquareBrackets;
324
      if (UnpairedSquareBrackets == 0) {
325
        addUnwrappedLine();
326
        return;
327
      }
328
      break;
329
    case tok::l_square:
330
      ++UnpairedSquareBrackets;
331
      nextToken();
332
      break;
333
    default:
334
      nextToken();
335
      break;
336
    }
337
  } while (!eof());
338
}
339

340
bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
341
  if (!Lines.empty() && Lines.back().InPPDirective)
342
    return true;
343

344
  const FormatToken *Previous = Tokens->getPreviousToken();
345
  return Previous && Previous->is(tok::comment) &&
346
         (Previous->IsMultiline || Previous->NewlinesBefore > 0);
347
}
348

349
/// \brief Parses a level, that is ???.
350
/// \param OpeningBrace Opening brace (\p nullptr if absent) of that level.
351
/// \param IfKind The \p if statement kind in the level.
352
/// \param IfLeftBrace The left brace of the \p if block in the level.
353
/// \returns true if a simple block of if/else/for/while, or false otherwise.
354
/// (A simple block has a single statement.)
355
bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
356
                                     IfStmtKind *IfKind,
357
                                     FormatToken **IfLeftBrace) {
358
  const bool InRequiresExpression =
359
      OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
360
  const bool IsPrecededByCommentOrPPDirective =
361
      !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
362
  FormatToken *IfLBrace = nullptr;
363
  bool HasDoWhile = false;
364
  bool HasLabel = false;
365
  unsigned StatementCount = 0;
366
  bool SwitchLabelEncountered = false;
367

368
  do {
369
    if (FormatTok->isAttribute()) {
370
      nextToken();
371
      if (FormatTok->is(tok::l_paren))
372
        parseParens();
373
      continue;
374
    }
375
    tok::TokenKind Kind = FormatTok->Tok.getKind();
376
    if (FormatTok->is(TT_MacroBlockBegin))
377
      Kind = tok::l_brace;
378
    else if (FormatTok->is(TT_MacroBlockEnd))
379
      Kind = tok::r_brace;
380

381
    auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile,
382
                         &HasLabel, &StatementCount] {
383
      parseStructuralElement(OpeningBrace, IfKind, &IfLBrace,
384
                             HasDoWhile ? nullptr : &HasDoWhile,
385
                             HasLabel ? nullptr : &HasLabel);
386
      ++StatementCount;
387
      assert(StatementCount > 0 && "StatementCount overflow!");
388
    };
389

390
    switch (Kind) {
391
    case tok::comment:
392
      nextToken();
393
      addUnwrappedLine();
394
      break;
395
    case tok::l_brace:
396
      if (InRequiresExpression) {
397
        FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
398
      } else if (FormatTok->Previous &&
399
                 FormatTok->Previous->ClosesRequiresClause) {
400
        // We need the 'default' case here to correctly parse a function
401
        // l_brace.
402
        ParseDefault();
403
        continue;
404
      }
405
      if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) {
406
        if (tryToParseBracedList())
407
          continue;
408
        FormatTok->setFinalizedType(TT_BlockLBrace);
409
      }
410
      parseBlock();
411
      ++StatementCount;
412
      assert(StatementCount > 0 && "StatementCount overflow!");
413
      addUnwrappedLine();
414
      break;
415
    case tok::r_brace:
416
      if (OpeningBrace) {
417
        if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
418
            !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
419
          return false;
420
        }
421
        if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
422
            HasDoWhile || IsPrecededByCommentOrPPDirective ||
423
            precededByCommentOrPPDirective()) {
424
          return false;
425
        }
426
        const FormatToken *Next = Tokens->peekNextToken();
427
        if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
428
          return false;
429
        if (IfLeftBrace)
430
          *IfLeftBrace = IfLBrace;
431
        return true;
432
      }
433
      nextToken();
434
      addUnwrappedLine();
435
      break;
436
    case tok::kw_default: {
437
      unsigned StoredPosition = Tokens->getPosition();
438
      auto *Next = Tokens->getNextNonComment();
439
      FormatTok = Tokens->setPosition(StoredPosition);
440
      if (!Next->isOneOf(tok::colon, tok::arrow)) {
441
        // default not followed by `:` or `->` is not a case label; treat it
442
        // like an identifier.
443
        parseStructuralElement();
444
        break;
445
      }
446
      // Else, if it is 'default:', fall through to the case handling.
447
      [[fallthrough]];
448
    }
449
    case tok::kw_case:
450
      if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() ||
451
          (Style.isJavaScript() && Line->MustBeDeclaration)) {
452
        // Proto: there are no switch/case statements
453
        // Verilog: Case labels don't have this word. We handle case
454
        // labels including default in TokenAnnotator.
455
        // JavaScript: A 'case: string' style field declaration.
456
        ParseDefault();
457
        break;
458
      }
459
      if (!SwitchLabelEncountered &&
460
          (Style.IndentCaseLabels ||
461
           (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) ||
462
           (Line->InPPDirective && Line->Level == 1))) {
463
        ++Line->Level;
464
      }
465
      SwitchLabelEncountered = true;
466
      parseStructuralElement();
467
      break;
468
    case tok::l_square:
469
      if (Style.isCSharp()) {
470
        nextToken();
471
        parseCSharpAttribute();
472
        break;
473
      }
474
      if (handleCppAttributes())
475
        break;
476
      [[fallthrough]];
477
    default:
478
      ParseDefault();
479
      break;
480
    }
481
  } while (!eof());
482

483
  return false;
484
}
485

486
void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
487
  // We'll parse forward through the tokens until we hit
488
  // a closing brace or eof - note that getNextToken() will
489
  // parse macros, so this will magically work inside macro
490
  // definitions, too.
491
  unsigned StoredPosition = Tokens->getPosition();
492
  FormatToken *Tok = FormatTok;
493
  const FormatToken *PrevTok = Tok->Previous;
494
  // Keep a stack of positions of lbrace tokens. We will
495
  // update information about whether an lbrace starts a
496
  // braced init list or a different block during the loop.
497
  struct StackEntry {
498
    FormatToken *Tok;
499
    const FormatToken *PrevTok;
500
  };
501
  SmallVector<StackEntry, 8> LBraceStack;
502
  assert(Tok->is(tok::l_brace));
503

504
  do {
505
    auto *NextTok = Tokens->getNextNonComment();
506

507
    if (!Line->InMacroBody && !Style.isTableGen()) {
508
      // Skip PPDirective lines and comments.
509
      while (NextTok->is(tok::hash)) {
510
        NextTok = Tokens->getNextToken();
511
        if (NextTok->is(tok::pp_not_keyword))
512
          break;
513
        do {
514
          NextTok = Tokens->getNextToken();
515
        } while (!NextTok->HasUnescapedNewline && NextTok->isNot(tok::eof));
516

517
        while (NextTok->is(tok::comment))
518
          NextTok = Tokens->getNextToken();
519
      }
520
    }
521

522
    switch (Tok->Tok.getKind()) {
523
    case tok::l_brace:
524
      if (Style.isJavaScript() && PrevTok) {
525
        if (PrevTok->isOneOf(tok::colon, tok::less)) {
526
          // A ':' indicates this code is in a type, or a braced list
527
          // following a label in an object literal ({a: {b: 1}}).
528
          // A '<' could be an object used in a comparison, but that is nonsense
529
          // code (can never return true), so more likely it is a generic type
530
          // argument (`X<{a: string; b: number}>`).
531
          // The code below could be confused by semicolons between the
532
          // individual members in a type member list, which would normally
533
          // trigger BK_Block. In both cases, this must be parsed as an inline
534
          // braced init.
535
          Tok->setBlockKind(BK_BracedInit);
536
        } else if (PrevTok->is(tok::r_paren)) {
537
          // `) { }` can only occur in function or method declarations in JS.
538
          Tok->setBlockKind(BK_Block);
539
        }
540
      } else {
541
        Tok->setBlockKind(BK_Unknown);
542
      }
543
      LBraceStack.push_back({Tok, PrevTok});
544
      break;
545
    case tok::r_brace:
546
      if (LBraceStack.empty())
547
        break;
548
      if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) {
549
        bool ProbablyBracedList = false;
550
        if (Style.Language == FormatStyle::LK_Proto) {
551
          ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
552
        } else if (LBrace->isNot(TT_EnumLBrace)) {
553
          // Using OriginalColumn to distinguish between ObjC methods and
554
          // binary operators is a bit hacky.
555
          bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
556
                                  NextTok->OriginalColumn == 0;
557

558
          // Try to detect a braced list. Note that regardless how we mark inner
559
          // braces here, we will overwrite the BlockKind later if we parse a
560
          // braced list (where all blocks inside are by default braced lists),
561
          // or when we explicitly detect blocks (for example while parsing
562
          // lambdas).
563

564
          // If we already marked the opening brace as braced list, the closing
565
          // must also be part of it.
566
          ProbablyBracedList = LBrace->is(TT_BracedListLBrace);
567

568
          ProbablyBracedList = ProbablyBracedList ||
569
                               (Style.isJavaScript() &&
570
                                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
571
                                                 Keywords.kw_as));
572
          ProbablyBracedList =
573
              ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
574
                                               NextTok->is(tok::l_paren)));
575

576
          // If there is a comma, semicolon or right paren after the closing
577
          // brace, we assume this is a braced initializer list.
578
          // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
579
          // braced list in JS.
580
          ProbablyBracedList =
581
              ProbablyBracedList ||
582
              NextTok->isOneOf(tok::comma, tok::period, tok::colon,
583
                               tok::r_paren, tok::r_square, tok::ellipsis);
584

585
          // Distinguish between braced list in a constructor initializer list
586
          // followed by constructor body, or just adjacent blocks.
587
          ProbablyBracedList =
588
              ProbablyBracedList ||
589
              (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok &&
590
               LBraceStack.back().PrevTok->isOneOf(tok::identifier,
591
                                                   tok::greater));
592

593
          ProbablyBracedList =
594
              ProbablyBracedList ||
595
              (NextTok->is(tok::identifier) &&
596
               !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
597

598
          ProbablyBracedList = ProbablyBracedList ||
599
                               (NextTok->is(tok::semi) &&
600
                                (!ExpectClassBody || LBraceStack.size() != 1));
601

602
          ProbablyBracedList =
603
              ProbablyBracedList ||
604
              (NextTok->isBinaryOperator() && !NextIsObjCMethod);
605

606
          if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
607
            // We can have an array subscript after a braced init
608
            // list, but C++11 attributes are expected after blocks.
609
            NextTok = Tokens->getNextToken();
610
            ProbablyBracedList = NextTok->isNot(tok::l_square);
611
          }
612

613
          // Cpp macro definition body that is a nonempty braced list or block:
614
          if (IsCpp && Line->InMacroBody && PrevTok != FormatTok &&
615
              !FormatTok->Previous && NextTok->is(tok::eof) &&
616
              // A statement can end with only `;` (simple statement), a block
617
              // closing brace (compound statement), or `:` (label statement).
618
              // If PrevTok is a block opening brace, Tok ends an empty block.
619
              !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) {
620
            ProbablyBracedList = true;
621
          }
622
        }
623
        const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block;
624
        Tok->setBlockKind(BlockKind);
625
        LBrace->setBlockKind(BlockKind);
626
      }
627
      LBraceStack.pop_back();
628
      break;
629
    case tok::identifier:
630
      if (Tok->isNot(TT_StatementMacro))
631
        break;
632
      [[fallthrough]];
633
    case tok::at:
634
    case tok::semi:
635
    case tok::kw_if:
636
    case tok::kw_while:
637
    case tok::kw_for:
638
    case tok::kw_switch:
639
    case tok::kw_try:
640
    case tok::kw___try:
641
      if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown))
642
        LBraceStack.back().Tok->setBlockKind(BK_Block);
643
      break;
644
    default:
645
      break;
646
    }
647

648
    PrevTok = Tok;
649
    Tok = NextTok;
650
  } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
651

652
  // Assume other blocks for all unclosed opening braces.
653
  for (const auto &Entry : LBraceStack)
654
    if (Entry.Tok->is(BK_Unknown))
655
      Entry.Tok->setBlockKind(BK_Block);
656

657
  FormatTok = Tokens->setPosition(StoredPosition);
658
}
659

660
// Sets the token type of the directly previous right brace.
661
void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) {
662
  if (auto Prev = FormatTok->getPreviousNonComment();
663
      Prev && Prev->is(tok::r_brace)) {
664
    Prev->setFinalizedType(Type);
665
  }
666
}
667

668
template <class T>
669
static inline void hash_combine(std::size_t &seed, const T &v) {
670
  std::hash<T> hasher;
671
  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
672
}
673

674
size_t UnwrappedLineParser::computePPHash() const {
675
  size_t h = 0;
676
  for (const auto &i : PPStack) {
677
    hash_combine(h, size_t(i.Kind));
678
    hash_combine(h, i.Line);
679
  }
680
  return h;
681
}
682

683
// Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
684
// is not null, subtracts its length (plus the preceding space) when computing
685
// the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
686
// running the token annotator on it so that we can restore them afterward.
687
bool UnwrappedLineParser::mightFitOnOneLine(
688
    UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
689
  const auto ColumnLimit = Style.ColumnLimit;
690
  if (ColumnLimit == 0)
691
    return true;
692

693
  auto &Tokens = ParsedLine.Tokens;
694
  assert(!Tokens.empty());
695

696
  const auto *LastToken = Tokens.back().Tok;
697
  assert(LastToken);
698

699
  SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
700

701
  int Index = 0;
702
  for (const auto &Token : Tokens) {
703
    assert(Token.Tok);
704
    auto &SavedToken = SavedTokens[Index++];
705
    SavedToken.Tok = new FormatToken;
706
    SavedToken.Tok->copyFrom(*Token.Tok);
707
    SavedToken.Children = std::move(Token.Children);
708
  }
709

710
  AnnotatedLine Line(ParsedLine);
711
  assert(Line.Last == LastToken);
712

713
  TokenAnnotator Annotator(Style, Keywords);
714
  Annotator.annotate(Line);
715
  Annotator.calculateFormattingInformation(Line);
716

717
  auto Length = LastToken->TotalLength;
718
  if (OpeningBrace) {
719
    assert(OpeningBrace != Tokens.front().Tok);
720
    if (auto Prev = OpeningBrace->Previous;
721
        Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) {
722
      Length -= ColumnLimit;
723
    }
724
    Length -= OpeningBrace->TokenText.size() + 1;
725
  }
726

727
  if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) {
728
    assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace));
729
    Length -= FirstToken->TokenText.size() + 1;
730
  }
731

732
  Index = 0;
733
  for (auto &Token : Tokens) {
734
    const auto &SavedToken = SavedTokens[Index++];
735
    Token.Tok->copyFrom(*SavedToken.Tok);
736
    Token.Children = std::move(SavedToken.Children);
737
    delete SavedToken.Tok;
738
  }
739

740
  // If these change PPLevel needs to be used for get correct indentation.
741
  assert(!Line.InMacroBody);
742
  assert(!Line.InPPDirective);
743
  return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
744
}
745

746
FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
747
                                             unsigned AddLevels, bool MunchSemi,
748
                                             bool KeepBraces,
749
                                             IfStmtKind *IfKind,
750
                                             bool UnindentWhitesmithsBraces) {
751
  auto HandleVerilogBlockLabel = [this]() {
752
    // ":" name
753
    if (Style.isVerilog() && FormatTok->is(tok::colon)) {
754
      nextToken();
755
      if (Keywords.isVerilogIdentifier(*FormatTok))
756
        nextToken();
757
    }
758
  };
759

760
  // Whether this is a Verilog-specific block that has a special header like a
761
  // module.
762
  const bool VerilogHierarchy =
763
      Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok);
764
  assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
765
          (Style.isVerilog() &&
766
           (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) &&
767
         "'{' or macro block token expected");
768
  FormatToken *Tok = FormatTok;
769
  const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
770
  auto Index = CurrentLines->size();
771
  const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
772
  FormatTok->setBlockKind(BK_Block);
773

774
  // For Whitesmiths mode, jump to the next level prior to skipping over the
775
  // braces.
776
  if (!VerilogHierarchy && AddLevels > 0 &&
777
      Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
778
    ++Line->Level;
779
  }
780

781
  size_t PPStartHash = computePPHash();
782

783
  const unsigned InitialLevel = Line->Level;
784
  if (VerilogHierarchy) {
785
    AddLevels += parseVerilogHierarchyHeader();
786
  } else {
787
    nextToken(/*LevelDifference=*/AddLevels);
788
    HandleVerilogBlockLabel();
789
  }
790

791
  // Bail out if there are too many levels. Otherwise, the stack might overflow.
792
  if (Line->Level > 300)
793
    return nullptr;
794

795
  if (MacroBlock && FormatTok->is(tok::l_paren))
796
    parseParens();
797

798
  size_t NbPreprocessorDirectives =
799
      !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
800
  addUnwrappedLine();
801
  size_t OpeningLineIndex =
802
      CurrentLines->empty()
803
          ? (UnwrappedLine::kInvalidIndex)
804
          : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
805

806
  // Whitesmiths is weird here. The brace needs to be indented for the namespace
807
  // block, but the block itself may not be indented depending on the style
808
  // settings. This allows the format to back up one level in those cases.
809
  if (UnindentWhitesmithsBraces)
810
    --Line->Level;
811

812
  ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
813
                                          MustBeDeclaration);
814
  if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
815
    Line->Level += AddLevels;
816

817
  FormatToken *IfLBrace = nullptr;
818
  const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace);
819

820
  if (eof())
821
    return IfLBrace;
822

823
  if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd)
824
                 : FormatTok->isNot(tok::r_brace)) {
825
    Line->Level = InitialLevel;
826
    FormatTok->setBlockKind(BK_Block);
827
    return IfLBrace;
828
  }
829

830
  if (FormatTok->is(tok::r_brace)) {
831
    FormatTok->setBlockKind(BK_Block);
832
    if (Tok->is(TT_NamespaceLBrace))
833
      FormatTok->setFinalizedType(TT_NamespaceRBrace);
834
  }
835

836
  const bool IsFunctionRBrace =
837
      FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace);
838

839
  auto RemoveBraces = [=]() mutable {
840
    if (!SimpleBlock)
841
      return false;
842
    assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
843
    assert(FormatTok->is(tok::r_brace));
844
    const bool WrappedOpeningBrace = !Tok->Previous;
845
    if (WrappedOpeningBrace && FollowedByComment)
846
      return false;
847
    const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
848
    if (KeepBraces && !HasRequiredIfBraces)
849
      return false;
850
    if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
851
      const FormatToken *Previous = Tokens->getPreviousToken();
852
      assert(Previous);
853
      if (Previous->is(tok::r_brace) && !Previous->Optional)
854
        return false;
855
    }
856
    assert(!CurrentLines->empty());
857
    auto &LastLine = CurrentLines->back();
858
    if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
859
      return false;
860
    if (Tok->is(TT_ElseLBrace))
861
      return true;
862
    if (WrappedOpeningBrace) {
863
      assert(Index > 0);
864
      --Index; // The line above the wrapped l_brace.
865
      Tok = nullptr;
866
    }
867
    return mightFitOnOneLine((*CurrentLines)[Index], Tok);
868
  };
869
  if (RemoveBraces()) {
870
    Tok->MatchingParen = FormatTok;
871
    FormatTok->MatchingParen = Tok;
872
  }
873

874
  size_t PPEndHash = computePPHash();
875

876
  // Munch the closing brace.
877
  nextToken(/*LevelDifference=*/-AddLevels);
878

879
  // When this is a function block and there is an unnecessary semicolon
880
  // afterwards then mark it as optional (so the RemoveSemi pass can get rid of
881
  // it later).
882
  if (Style.RemoveSemicolon && IsFunctionRBrace) {
883
    while (FormatTok->is(tok::semi)) {
884
      FormatTok->Optional = true;
885
      nextToken();
886
    }
887
  }
888

889
  HandleVerilogBlockLabel();
890

891
  if (MacroBlock && FormatTok->is(tok::l_paren))
892
    parseParens();
893

894
  Line->Level = InitialLevel;
895

896
  if (FormatTok->is(tok::kw_noexcept)) {
897
    // A noexcept in a requires expression.
898
    nextToken();
899
  }
900

901
  if (FormatTok->is(tok::arrow)) {
902
    // Following the } or noexcept we can find a trailing return type arrow
903
    // as part of an implicit conversion constraint.
904
    nextToken();
905
    parseStructuralElement();
906
  }
907

908
  if (MunchSemi && FormatTok->is(tok::semi))
909
    nextToken();
910

911
  if (PPStartHash == PPEndHash) {
912
    Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
913
    if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
914
      // Update the opening line to add the forward reference as well
915
      (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
916
          CurrentLines->size() - 1;
917
    }
918
  }
919

920
  return IfLBrace;
921
}
922

923
static bool isGoogScope(const UnwrappedLine &Line) {
924
  // FIXME: Closure-library specific stuff should not be hard-coded but be
925
  // configurable.
926
  if (Line.Tokens.size() < 4)
927
    return false;
928
  auto I = Line.Tokens.begin();
929
  if (I->Tok->TokenText != "goog")
930
    return false;
931
  ++I;
932
  if (I->Tok->isNot(tok::period))
933
    return false;
934
  ++I;
935
  if (I->Tok->TokenText != "scope")
936
    return false;
937
  ++I;
938
  return I->Tok->is(tok::l_paren);
939
}
940

941
static bool isIIFE(const UnwrappedLine &Line,
942
                   const AdditionalKeywords &Keywords) {
943
  // Look for the start of an immediately invoked anonymous function.
944
  // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
945
  // This is commonly done in JavaScript to create a new, anonymous scope.
946
  // Example: (function() { ... })()
947
  if (Line.Tokens.size() < 3)
948
    return false;
949
  auto I = Line.Tokens.begin();
950
  if (I->Tok->isNot(tok::l_paren))
951
    return false;
952
  ++I;
953
  if (I->Tok->isNot(Keywords.kw_function))
954
    return false;
955
  ++I;
956
  return I->Tok->is(tok::l_paren);
957
}
958

959
static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
960
                                   const FormatToken &InitialToken) {
961
  tok::TokenKind Kind = InitialToken.Tok.getKind();
962
  if (InitialToken.is(TT_NamespaceMacro))
963
    Kind = tok::kw_namespace;
964

965
  switch (Kind) {
966
  case tok::kw_namespace:
967
    return Style.BraceWrapping.AfterNamespace;
968
  case tok::kw_class:
969
    return Style.BraceWrapping.AfterClass;
970
  case tok::kw_union:
971
    return Style.BraceWrapping.AfterUnion;
972
  case tok::kw_struct:
973
    return Style.BraceWrapping.AfterStruct;
974
  case tok::kw_enum:
975
    return Style.BraceWrapping.AfterEnum;
976
  default:
977
    return false;
978
  }
979
}
980

981
void UnwrappedLineParser::parseChildBlock() {
982
  assert(FormatTok->is(tok::l_brace));
983
  FormatTok->setBlockKind(BK_Block);
984
  const FormatToken *OpeningBrace = FormatTok;
985
  nextToken();
986
  {
987
    bool SkipIndent = (Style.isJavaScript() &&
988
                       (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
989
    ScopedLineState LineState(*this);
990
    ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
991
                                            /*MustBeDeclaration=*/false);
992
    Line->Level += SkipIndent ? 0 : 1;
993
    parseLevel(OpeningBrace);
994
    flushComments(isOnNewLine(*FormatTok));
995
    Line->Level -= SkipIndent ? 0 : 1;
996
  }
997
  nextToken();
998
}
999

1000
void UnwrappedLineParser::parsePPDirective() {
1001
  assert(FormatTok->is(tok::hash) && "'#' expected");
1002
  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1003

1004
  nextToken();
1005

1006
  if (!FormatTok->Tok.getIdentifierInfo()) {
1007
    parsePPUnknown();
1008
    return;
1009
  }
1010

1011
  switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1012
  case tok::pp_define:
1013
    parsePPDefine();
1014
    return;
1015
  case tok::pp_if:
1016
    parsePPIf(/*IfDef=*/false);
1017
    break;
1018
  case tok::pp_ifdef:
1019
  case tok::pp_ifndef:
1020
    parsePPIf(/*IfDef=*/true);
1021
    break;
1022
  case tok::pp_else:
1023
  case tok::pp_elifdef:
1024
  case tok::pp_elifndef:
1025
  case tok::pp_elif:
1026
    parsePPElse();
1027
    break;
1028
  case tok::pp_endif:
1029
    parsePPEndIf();
1030
    break;
1031
  case tok::pp_pragma:
1032
    parsePPPragma();
1033
    break;
1034
  default:
1035
    parsePPUnknown();
1036
    break;
1037
  }
1038
}
1039

1040
void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1041
  size_t Line = CurrentLines->size();
1042
  if (CurrentLines == &PreprocessorDirectives)
1043
    Line += Lines.size();
1044

1045
  if (Unreachable ||
1046
      (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1047
    PPStack.push_back({PP_Unreachable, Line});
1048
  } else {
1049
    PPStack.push_back({PP_Conditional, Line});
1050
  }
1051
}
1052

1053
void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1054
  ++PPBranchLevel;
1055
  assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1056
  if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1057
    PPLevelBranchIndex.push_back(0);
1058
    PPLevelBranchCount.push_back(0);
1059
  }
1060
  PPChainBranchIndex.push(Unreachable ? -1 : 0);
1061
  bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1062
  conditionalCompilationCondition(Unreachable || Skip);
1063
}
1064

1065
void UnwrappedLineParser::conditionalCompilationAlternative() {
1066
  if (!PPStack.empty())
1067
    PPStack.pop_back();
1068
  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1069
  if (!PPChainBranchIndex.empty())
1070
    ++PPChainBranchIndex.top();
1071
  conditionalCompilationCondition(
1072
      PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1073
      PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1074
}
1075

1076
void UnwrappedLineParser::conditionalCompilationEnd() {
1077
  assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1078
  if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1079
    if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1080
      PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1081
  }
1082
  // Guard against #endif's without #if.
1083
  if (PPBranchLevel > -1)
1084
    --PPBranchLevel;
1085
  if (!PPChainBranchIndex.empty())
1086
    PPChainBranchIndex.pop();
1087
  if (!PPStack.empty())
1088
    PPStack.pop_back();
1089
}
1090

1091
void UnwrappedLineParser::parsePPIf(bool IfDef) {
1092
  bool IfNDef = FormatTok->is(tok::pp_ifndef);
1093
  nextToken();
1094
  bool Unreachable = false;
1095
  if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1096
    Unreachable = true;
1097
  if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1098
    Unreachable = true;
1099
  conditionalCompilationStart(Unreachable);
1100
  FormatToken *IfCondition = FormatTok;
1101
  // If there's a #ifndef on the first line, and the only lines before it are
1102
  // comments, it could be an include guard.
1103
  bool MaybeIncludeGuard = IfNDef;
1104
  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1105
    for (auto &Line : Lines) {
1106
      if (Line.Tokens.front().Tok->isNot(tok::comment)) {
1107
        MaybeIncludeGuard = false;
1108
        IncludeGuard = IG_Rejected;
1109
        break;
1110
      }
1111
    }
1112
  }
1113
  --PPBranchLevel;
1114
  parsePPUnknown();
1115
  ++PPBranchLevel;
1116
  if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1117
    IncludeGuard = IG_IfNdefed;
1118
    IncludeGuardToken = IfCondition;
1119
  }
1120
}
1121

1122
void UnwrappedLineParser::parsePPElse() {
1123
  // If a potential include guard has an #else, it's not an include guard.
1124
  if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1125
    IncludeGuard = IG_Rejected;
1126
  // Don't crash when there is an #else without an #if.
1127
  assert(PPBranchLevel >= -1);
1128
  if (PPBranchLevel == -1)
1129
    conditionalCompilationStart(/*Unreachable=*/true);
1130
  conditionalCompilationAlternative();
1131
  --PPBranchLevel;
1132
  parsePPUnknown();
1133
  ++PPBranchLevel;
1134
}
1135

1136
void UnwrappedLineParser::parsePPEndIf() {
1137
  conditionalCompilationEnd();
1138
  parsePPUnknown();
1139
  // If the #endif of a potential include guard is the last thing in the file,
1140
  // then we found an include guard.
1141
  if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1142
      Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1143
    IncludeGuard = IG_Found;
1144
  }
1145
}
1146

1147
void UnwrappedLineParser::parsePPDefine() {
1148
  nextToken();
1149

1150
  if (!FormatTok->Tok.getIdentifierInfo()) {
1151
    IncludeGuard = IG_Rejected;
1152
    IncludeGuardToken = nullptr;
1153
    parsePPUnknown();
1154
    return;
1155
  }
1156

1157
  if (IncludeGuard == IG_IfNdefed &&
1158
      IncludeGuardToken->TokenText == FormatTok->TokenText) {
1159
    IncludeGuard = IG_Defined;
1160
    IncludeGuardToken = nullptr;
1161
    for (auto &Line : Lines) {
1162
      if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1163
        IncludeGuard = IG_Rejected;
1164
        break;
1165
      }
1166
    }
1167
  }
1168

1169
  // In the context of a define, even keywords should be treated as normal
1170
  // identifiers. Setting the kind to identifier is not enough, because we need
1171
  // to treat additional keywords like __except as well, which are already
1172
  // identifiers. Setting the identifier info to null interferes with include
1173
  // guard processing above, and changes preprocessing nesting.
1174
  FormatTok->Tok.setKind(tok::identifier);
1175
  FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1176
  nextToken();
1177
  if (FormatTok->Tok.getKind() == tok::l_paren &&
1178
      !FormatTok->hasWhitespaceBefore()) {
1179
    parseParens();
1180
  }
1181
  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1182
    Line->Level += PPBranchLevel + 1;
1183
  addUnwrappedLine();
1184
  ++Line->Level;
1185

1186
  Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1);
1187
  assert((int)Line->PPLevel >= 0);
1188
  Line->InMacroBody = true;
1189

1190
  if (Style.SkipMacroDefinitionBody) {
1191
    while (!eof()) {
1192
      FormatTok->Finalized = true;
1193
      FormatTok = Tokens->getNextToken();
1194
    }
1195
    addUnwrappedLine();
1196
    return;
1197
  }
1198

1199
  // Errors during a preprocessor directive can only affect the layout of the
1200
  // preprocessor directive, and thus we ignore them. An alternative approach
1201
  // would be to use the same approach we use on the file level (no
1202
  // re-indentation if there was a structural error) within the macro
1203
  // definition.
1204
  parseFile();
1205
}
1206

1207
void UnwrappedLineParser::parsePPPragma() {
1208
  Line->InPragmaDirective = true;
1209
  parsePPUnknown();
1210
}
1211

1212
void UnwrappedLineParser::parsePPUnknown() {
1213
  do {
1214
    nextToken();
1215
  } while (!eof());
1216
  if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1217
    Line->Level += PPBranchLevel + 1;
1218
  addUnwrappedLine();
1219
}
1220

1221
// Here we exclude certain tokens that are not usually the first token in an
1222
// unwrapped line. This is used in attempt to distinguish macro calls without
1223
// trailing semicolons from other constructs split to several lines.
1224
static bool tokenCanStartNewLine(const FormatToken &Tok) {
1225
  // Semicolon can be a null-statement, l_square can be a start of a macro or
1226
  // a C++11 attribute, but this doesn't seem to be common.
1227
  return !Tok.isOneOf(tok::semi, tok::l_brace,
1228
                      // Tokens that can only be used as binary operators and a
1229
                      // part of overloaded operator names.
1230
                      tok::period, tok::periodstar, tok::arrow, tok::arrowstar,
1231
                      tok::less, tok::greater, tok::slash, tok::percent,
1232
                      tok::lessless, tok::greatergreater, tok::equal,
1233
                      tok::plusequal, tok::minusequal, tok::starequal,
1234
                      tok::slashequal, tok::percentequal, tok::ampequal,
1235
                      tok::pipeequal, tok::caretequal, tok::greatergreaterequal,
1236
                      tok::lesslessequal,
1237
                      // Colon is used in labels, base class lists, initializer
1238
                      // lists, range-based for loops, ternary operator, but
1239
                      // should never be the first token in an unwrapped line.
1240
                      tok::colon,
1241
                      // 'noexcept' is a trailing annotation.
1242
                      tok::kw_noexcept);
1243
}
1244

1245
static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1246
                          const FormatToken *FormatTok) {
1247
  // FIXME: This returns true for C/C++ keywords like 'struct'.
1248
  return FormatTok->is(tok::identifier) &&
1249
         (!FormatTok->Tok.getIdentifierInfo() ||
1250
          !FormatTok->isOneOf(
1251
              Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1252
              Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1253
              Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1254
              Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1255
              Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1256
              Keywords.kw_instanceof, Keywords.kw_interface,
1257
              Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1258
}
1259

1260
static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1261
                                 const FormatToken *FormatTok) {
1262
  return FormatTok->Tok.isLiteral() ||
1263
         FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1264
         mustBeJSIdent(Keywords, FormatTok);
1265
}
1266

1267
// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1268
// when encountered after a value (see mustBeJSIdentOrValue).
1269
static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1270
                           const FormatToken *FormatTok) {
1271
  return FormatTok->isOneOf(
1272
      tok::kw_return, Keywords.kw_yield,
1273
      // conditionals
1274
      tok::kw_if, tok::kw_else,
1275
      // loops
1276
      tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1277
      // switch/case
1278
      tok::kw_switch, tok::kw_case,
1279
      // exceptions
1280
      tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1281
      // declaration
1282
      tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1283
      Keywords.kw_async, Keywords.kw_function,
1284
      // import/export
1285
      Keywords.kw_import, tok::kw_export);
1286
}
1287

1288
// Checks whether a token is a type in K&R C (aka C78).
1289
static bool isC78Type(const FormatToken &Tok) {
1290
  return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1291
                     tok::kw_unsigned, tok::kw_float, tok::kw_double,
1292
                     tok::identifier);
1293
}
1294

1295
// This function checks whether a token starts the first parameter declaration
1296
// in a K&R C (aka C78) function definition, e.g.:
1297
//   int f(a, b)
1298
//   short a, b;
1299
//   {
1300
//      return a + b;
1301
//   }
1302
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1303
                               const FormatToken *FuncName) {
1304
  assert(Tok);
1305
  assert(Next);
1306
  assert(FuncName);
1307

1308
  if (FuncName->isNot(tok::identifier))
1309
    return false;
1310

1311
  const FormatToken *Prev = FuncName->Previous;
1312
  if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1313
    return false;
1314

1315
  if (!isC78Type(*Tok) &&
1316
      !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1317
    return false;
1318
  }
1319

1320
  if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1321
    return false;
1322

1323
  Tok = Tok->Previous;
1324
  if (!Tok || Tok->isNot(tok::r_paren))
1325
    return false;
1326

1327
  Tok = Tok->Previous;
1328
  if (!Tok || Tok->isNot(tok::identifier))
1329
    return false;
1330

1331
  return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1332
}
1333

1334
bool UnwrappedLineParser::parseModuleImport() {
1335
  assert(FormatTok->is(Keywords.kw_import) && "'import' expected");
1336

1337
  if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true);
1338
      !Token->Tok.getIdentifierInfo() &&
1339
      !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) {
1340
    return false;
1341
  }
1342

1343
  nextToken();
1344
  while (!eof()) {
1345
    if (FormatTok->is(tok::colon)) {
1346
      FormatTok->setFinalizedType(TT_ModulePartitionColon);
1347
    }
1348
    // Handle import <foo/bar.h> as we would an include statement.
1349
    else if (FormatTok->is(tok::less)) {
1350
      nextToken();
1351
      while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1352
        // Mark tokens up to the trailing line comments as implicit string
1353
        // literals.
1354
        if (FormatTok->isNot(tok::comment) &&
1355
            !FormatTok->TokenText.starts_with("//")) {
1356
          FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1357
        }
1358
        nextToken();
1359
      }
1360
    }
1361
    if (FormatTok->is(tok::semi)) {
1362
      nextToken();
1363
      break;
1364
    }
1365
    nextToken();
1366
  }
1367

1368
  addUnwrappedLine();
1369
  return true;
1370
}
1371

1372
// readTokenWithJavaScriptASI reads the next token and terminates the current
1373
// line if JavaScript Automatic Semicolon Insertion must
1374
// happen between the current token and the next token.
1375
//
1376
// This method is conservative - it cannot cover all edge cases of JavaScript,
1377
// but only aims to correctly handle certain well known cases. It *must not*
1378
// return true in speculative cases.
1379
void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1380
  FormatToken *Previous = FormatTok;
1381
  readToken();
1382
  FormatToken *Next = FormatTok;
1383

1384
  bool IsOnSameLine =
1385
      CommentsBeforeNextToken.empty()
1386
          ? Next->NewlinesBefore == 0
1387
          : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1388
  if (IsOnSameLine)
1389
    return;
1390

1391
  bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1392
  bool PreviousStartsTemplateExpr =
1393
      Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${");
1394
  if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1395
    // If the line contains an '@' sign, the previous token might be an
1396
    // annotation, which can precede another identifier/value.
1397
    bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1398
      return LineNode.Tok->is(tok::at);
1399
    });
1400
    if (HasAt)
1401
      return;
1402
  }
1403
  if (Next->is(tok::exclaim) && PreviousMustBeValue)
1404
    return addUnwrappedLine();
1405
  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1406
  bool NextEndsTemplateExpr =
1407
      Next->is(TT_TemplateString) && Next->TokenText.starts_with("}");
1408
  if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1409
      (PreviousMustBeValue ||
1410
       Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1411
                         tok::minusminus))) {
1412
    return addUnwrappedLine();
1413
  }
1414
  if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1415
      isJSDeclOrStmt(Keywords, Next)) {
1416
    return addUnwrappedLine();
1417
  }
1418
}
1419

1420
void UnwrappedLineParser::parseStructuralElement(
1421
    const FormatToken *OpeningBrace, IfStmtKind *IfKind,
1422
    FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1423
  if (Style.Language == FormatStyle::LK_TableGen &&
1424
      FormatTok->is(tok::pp_include)) {
1425
    nextToken();
1426
    if (FormatTok->is(tok::string_literal))
1427
      nextToken();
1428
    addUnwrappedLine();
1429
    return;
1430
  }
1431

1432
  if (IsCpp) {
1433
    while (FormatTok->is(tok::l_square) && handleCppAttributes()) {
1434
    }
1435
  } else if (Style.isVerilog()) {
1436
    if (Keywords.isVerilogStructuredProcedure(*FormatTok)) {
1437
      parseForOrWhileLoop(/*HasParens=*/false);
1438
      return;
1439
    }
1440
    if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) {
1441
      parseForOrWhileLoop();
1442
      return;
1443
    }
1444
    if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
1445
                           Keywords.kw_assume, Keywords.kw_cover)) {
1446
      parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true);
1447
      return;
1448
    }
1449

1450
    // Skip things that can exist before keywords like 'if' and 'case'.
1451
    while (true) {
1452
      if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique,
1453
                             Keywords.kw_unique0)) {
1454
        nextToken();
1455
      } else if (FormatTok->is(tok::l_paren) &&
1456
                 Tokens->peekNextToken()->is(tok::star)) {
1457
        parseParens();
1458
      } else {
1459
        break;
1460
      }
1461
    }
1462
  }
1463

1464
  // Tokens that only make sense at the beginning of a line.
1465
  if (FormatTok->isAccessSpecifierKeyword()) {
1466
    if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1467
        Style.isCSharp()) {
1468
      nextToken();
1469
    } else {
1470
      parseAccessSpecifier();
1471
    }
1472
    return;
1473
  }
1474
  switch (FormatTok->Tok.getKind()) {
1475
  case tok::kw_asm:
1476
    nextToken();
1477
    if (FormatTok->is(tok::l_brace)) {
1478
      FormatTok->setFinalizedType(TT_InlineASMBrace);
1479
      nextToken();
1480
      while (FormatTok && !eof()) {
1481
        if (FormatTok->is(tok::r_brace)) {
1482
          FormatTok->setFinalizedType(TT_InlineASMBrace);
1483
          nextToken();
1484
          addUnwrappedLine();
1485
          break;
1486
        }
1487
        FormatTok->Finalized = true;
1488
        nextToken();
1489
      }
1490
    }
1491
    break;
1492
  case tok::kw_namespace:
1493
    parseNamespace();
1494
    return;
1495
  case tok::kw_if: {
1496
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497
      // field/method declaration.
1498
      break;
1499
    }
1500
    FormatToken *Tok = parseIfThenElse(IfKind);
1501
    if (IfLeftBrace)
1502
      *IfLeftBrace = Tok;
1503
    return;
1504
  }
1505
  case tok::kw_for:
1506
  case tok::kw_while:
1507
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1508
      // field/method declaration.
1509
      break;
1510
    }
1511
    parseForOrWhileLoop();
1512
    return;
1513
  case tok::kw_do:
1514
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1515
      // field/method declaration.
1516
      break;
1517
    }
1518
    parseDoWhile();
1519
    if (HasDoWhile)
1520
      *HasDoWhile = true;
1521
    return;
1522
  case tok::kw_switch:
1523
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1524
      // 'switch: string' field declaration.
1525
      break;
1526
    }
1527
    parseSwitch(/*IsExpr=*/false);
1528
    return;
1529
  case tok::kw_default: {
1530
    // In Verilog default along with other labels are handled in the next loop.
1531
    if (Style.isVerilog())
1532
      break;
1533
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534
      // 'default: string' field declaration.
1535
      break;
1536
    }
1537
    auto *Default = FormatTok;
1538
    nextToken();
1539
    if (FormatTok->is(tok::colon)) {
1540
      FormatTok->setFinalizedType(TT_CaseLabelColon);
1541
      parseLabel();
1542
      return;
1543
    }
1544
    if (FormatTok->is(tok::arrow)) {
1545
      FormatTok->setFinalizedType(TT_CaseLabelArrow);
1546
      Default->setFinalizedType(TT_SwitchExpressionLabel);
1547
      parseLabel();
1548
      return;
1549
    }
1550
    // e.g. "default void f() {}" in a Java interface.
1551
    break;
1552
  }
1553
  case tok::kw_case:
1554
    // Proto: there are no switch/case statements.
1555
    if (Style.Language == FormatStyle::LK_Proto) {
1556
      nextToken();
1557
      return;
1558
    }
1559
    if (Style.isVerilog()) {
1560
      parseBlock();
1561
      addUnwrappedLine();
1562
      return;
1563
    }
1564
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1565
      // 'case: string' field declaration.
1566
      nextToken();
1567
      break;
1568
    }
1569
    parseCaseLabel();
1570
    return;
1571
  case tok::kw_try:
1572
  case tok::kw___try:
1573
    if (Style.isJavaScript() && Line->MustBeDeclaration) {
1574
      // field/method declaration.
1575
      break;
1576
    }
1577
    parseTryCatch();
1578
    return;
1579
  case tok::kw_extern:
1580
    nextToken();
1581
    if (Style.isVerilog()) {
1582
      // In Verilog and extern module declaration looks like a start of module.
1583
      // But there is no body and endmodule. So we handle it separately.
1584
      if (Keywords.isVerilogHierarchy(*FormatTok)) {
1585
        parseVerilogHierarchyHeader();
1586
        return;
1587
      }
1588
    } else if (FormatTok->is(tok::string_literal)) {
1589
      nextToken();
1590
      if (FormatTok->is(tok::l_brace)) {
1591
        if (Style.BraceWrapping.AfterExternBlock)
1592
          addUnwrappedLine();
1593
        // Either we indent or for backwards compatibility we follow the
1594
        // AfterExternBlock style.
1595
        unsigned AddLevels =
1596
            (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1597
                    (Style.BraceWrapping.AfterExternBlock &&
1598
                     Style.IndentExternBlock ==
1599
                         FormatStyle::IEBS_AfterExternBlock)
1600
                ? 1u
1601
                : 0u;
1602
        parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1603
        addUnwrappedLine();
1604
        return;
1605
      }
1606
    }
1607
    break;
1608
  case tok::kw_export:
1609
    if (Style.isJavaScript()) {
1610
      parseJavaScriptEs6ImportExport();
1611
      return;
1612
    }
1613
    if (IsCpp) {
1614
      nextToken();
1615
      if (FormatTok->is(tok::kw_namespace)) {
1616
        parseNamespace();
1617
        return;
1618
      }
1619
      if (FormatTok->is(Keywords.kw_import) && parseModuleImport())
1620
        return;
1621
    }
1622
    break;
1623
  case tok::kw_inline:
1624
    nextToken();
1625
    if (FormatTok->is(tok::kw_namespace)) {
1626
      parseNamespace();
1627
      return;
1628
    }
1629
    break;
1630
  case tok::identifier:
1631
    if (FormatTok->is(TT_ForEachMacro)) {
1632
      parseForOrWhileLoop();
1633
      return;
1634
    }
1635
    if (FormatTok->is(TT_MacroBlockBegin)) {
1636
      parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1637
                 /*MunchSemi=*/false);
1638
      return;
1639
    }
1640
    if (FormatTok->is(Keywords.kw_import)) {
1641
      if (Style.isJavaScript()) {
1642
        parseJavaScriptEs6ImportExport();
1643
        return;
1644
      }
1645
      if (Style.Language == FormatStyle::LK_Proto) {
1646
        nextToken();
1647
        if (FormatTok->is(tok::kw_public))
1648
          nextToken();
1649
        if (FormatTok->isNot(tok::string_literal))
1650
          return;
1651
        nextToken();
1652
        if (FormatTok->is(tok::semi))
1653
          nextToken();
1654
        addUnwrappedLine();
1655
        return;
1656
      }
1657
      if (IsCpp && parseModuleImport())
1658
        return;
1659
    }
1660
    if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1661
                                    Keywords.kw_slots, Keywords.kw_qslots)) {
1662
      nextToken();
1663
      if (FormatTok->is(tok::colon)) {
1664
        nextToken();
1665
        addUnwrappedLine();
1666
        return;
1667
      }
1668
    }
1669
    if (IsCpp && FormatTok->is(TT_StatementMacro)) {
1670
      parseStatementMacro();
1671
      return;
1672
    }
1673
    if (IsCpp && FormatTok->is(TT_NamespaceMacro)) {
1674
      parseNamespace();
1675
      return;
1676
    }
1677
    // In Verilog labels can be any expression, so we don't do them here.
1678
    // JS doesn't have macros, and within classes colons indicate fields, not
1679
    // labels.
1680
    // TableGen doesn't have labels.
1681
    if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() &&
1682
        Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) {
1683
      nextToken();
1684
      if (!Line->InMacroBody || CurrentLines->size() > 1)
1685
        Line->Tokens.begin()->Tok->MustBreakBefore = true;
1686
      FormatTok->setFinalizedType(TT_GotoLabelColon);
1687
      parseLabel(!Style.IndentGotoLabels);
1688
      if (HasLabel)
1689
        *HasLabel = true;
1690
      return;
1691
    }
1692
    // In all other cases, parse the declaration.
1693
    break;
1694
  default:
1695
    break;
1696
  }
1697

1698
  for (const bool InRequiresExpression =
1699
           OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace);
1700
       !eof();) {
1701
    if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
1702
      if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
1703
          Next && Next->isBinaryOperator()) {
1704
        FormatTok->Tok.setKind(tok::identifier);
1705
      }
1706
    }
1707
    const FormatToken *Previous = FormatTok->Previous;
1708
    switch (FormatTok->Tok.getKind()) {
1709
    case tok::at:
1710
      nextToken();
1711
      if (FormatTok->is(tok::l_brace)) {
1712
        nextToken();
1713
        parseBracedList();
1714
        break;
1715
      } else if (Style.Language == FormatStyle::LK_Java &&
1716
                 FormatTok->is(Keywords.kw_interface)) {
1717
        nextToken();
1718
        break;
1719
      }
1720
      switch (FormatTok->Tok.getObjCKeywordID()) {
1721
      case tok::objc_public:
1722
      case tok::objc_protected:
1723
      case tok::objc_package:
1724
      case tok::objc_private:
1725
        return parseAccessSpecifier();
1726
      case tok::objc_interface:
1727
      case tok::objc_implementation:
1728
        return parseObjCInterfaceOrImplementation();
1729
      case tok::objc_protocol:
1730
        if (parseObjCProtocol())
1731
          return;
1732
        break;
1733
      case tok::objc_end:
1734
        return; // Handled by the caller.
1735
      case tok::objc_optional:
1736
      case tok::objc_required:
1737
        nextToken();
1738
        addUnwrappedLine();
1739
        return;
1740
      case tok::objc_autoreleasepool:
1741
        nextToken();
1742
        if (FormatTok->is(tok::l_brace)) {
1743
          if (Style.BraceWrapping.AfterControlStatement ==
1744
              FormatStyle::BWACS_Always) {
1745
            addUnwrappedLine();
1746
          }
1747
          parseBlock();
1748
        }
1749
        addUnwrappedLine();
1750
        return;
1751
      case tok::objc_synchronized:
1752
        nextToken();
1753
        if (FormatTok->is(tok::l_paren)) {
1754
          // Skip synchronization object
1755
          parseParens();
1756
        }
1757
        if (FormatTok->is(tok::l_brace)) {
1758
          if (Style.BraceWrapping.AfterControlStatement ==
1759
              FormatStyle::BWACS_Always) {
1760
            addUnwrappedLine();
1761
          }
1762
          parseBlock();
1763
        }
1764
        addUnwrappedLine();
1765
        return;
1766
      case tok::objc_try:
1767
        // This branch isn't strictly necessary (the kw_try case below would
1768
        // do this too after the tok::at is parsed above).  But be explicit.
1769
        parseTryCatch();
1770
        return;
1771
      default:
1772
        break;
1773
      }
1774
      break;
1775
    case tok::kw_requires: {
1776
      if (IsCpp) {
1777
        bool ParsedClause = parseRequires();
1778
        if (ParsedClause)
1779
          return;
1780
      } else {
1781
        nextToken();
1782
      }
1783
      break;
1784
    }
1785
    case tok::kw_enum:
1786
      // Ignore if this is part of "template <enum ..." or "... -> enum" or
1787
      // "template <..., enum ...>".
1788
      if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) {
1789
        nextToken();
1790
        break;
1791
      }
1792

1793
      // parseEnum falls through and does not yet add an unwrapped line as an
1794
      // enum definition can start a structural element.
1795
      if (!parseEnum())
1796
        break;
1797
      // This only applies to C++ and Verilog.
1798
      if (!IsCpp && !Style.isVerilog()) {
1799
        addUnwrappedLine();
1800
        return;
1801
      }
1802
      break;
1803
    case tok::kw_typedef:
1804
      nextToken();
1805
      if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1806
                             Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1807
                             Keywords.kw_CF_CLOSED_ENUM,
1808
                             Keywords.kw_NS_CLOSED_ENUM)) {
1809
        parseEnum();
1810
      }
1811
      break;
1812
    case tok::kw_class:
1813
      if (Style.isVerilog()) {
1814
        parseBlock();
1815
        addUnwrappedLine();
1816
        return;
1817
      }
1818
      if (Style.isTableGen()) {
1819
        // Do nothing special. In this case the l_brace becomes FunctionLBrace.
1820
        // This is same as def and so on.
1821
        nextToken();
1822
        break;
1823
      }
1824
      [[fallthrough]];
1825
    case tok::kw_struct:
1826
    case tok::kw_union:
1827
      if (parseStructLike())
1828
        return;
1829
      break;
1830
    case tok::kw_decltype:
1831
      nextToken();
1832
      if (FormatTok->is(tok::l_paren)) {
1833
        parseParens();
1834
        assert(FormatTok->Previous);
1835
        if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto,
1836
                                              tok::l_paren)) {
1837
          Line->SeenDecltypeAuto = true;
1838
        }
1839
      }
1840
      break;
1841
    case tok::period:
1842
      nextToken();
1843
      // In Java, classes have an implicit static member "class".
1844
      if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1845
          FormatTok->is(tok::kw_class)) {
1846
        nextToken();
1847
      }
1848
      if (Style.isJavaScript() && FormatTok &&
1849
          FormatTok->Tok.getIdentifierInfo()) {
1850
        // JavaScript only has pseudo keywords, all keywords are allowed to
1851
        // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1852
        nextToken();
1853
      }
1854
      break;
1855
    case tok::semi:
1856
      nextToken();
1857
      addUnwrappedLine();
1858
      return;
1859
    case tok::r_brace:
1860
      addUnwrappedLine();
1861
      return;
1862
    case tok::l_paren: {
1863
      parseParens();
1864
      // Break the unwrapped line if a K&R C function definition has a parameter
1865
      // declaration.
1866
      if (OpeningBrace || !IsCpp || !Previous || eof())
1867
        break;
1868
      if (isC78ParameterDecl(FormatTok,
1869
                             Tokens->peekNextToken(/*SkipComment=*/true),
1870
                             Previous)) {
1871
        addUnwrappedLine();
1872
        return;
1873
      }
1874
      break;
1875
    }
1876
    case tok::kw_operator:
1877
      nextToken();
1878
      if (FormatTok->isBinaryOperator())
1879
        nextToken();
1880
      break;
1881
    case tok::caret:
1882
      nextToken();
1883
      // Block return type.
1884
      if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) {
1885
        nextToken();
1886
        // Return types: pointers are ok too.
1887
        while (FormatTok->is(tok::star))
1888
          nextToken();
1889
      }
1890
      // Block argument list.
1891
      if (FormatTok->is(tok::l_paren))
1892
        parseParens();
1893
      // Block body.
1894
      if (FormatTok->is(tok::l_brace))
1895
        parseChildBlock();
1896
      break;
1897
    case tok::l_brace:
1898
      if (InRequiresExpression)
1899
        FormatTok->setFinalizedType(TT_BracedListLBrace);
1900
      if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1901
        IsDecltypeAutoFunction = Line->SeenDecltypeAuto;
1902
        // A block outside of parentheses must be the last part of a
1903
        // structural element.
1904
        // FIXME: Figure out cases where this is not true, and add projections
1905
        // for them (the one we know is missing are lambdas).
1906
        if (Style.Language == FormatStyle::LK_Java &&
1907
            Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1908
          // If necessary, we could set the type to something different than
1909
          // TT_FunctionLBrace.
1910
          if (Style.BraceWrapping.AfterControlStatement ==
1911
              FormatStyle::BWACS_Always) {
1912
            addUnwrappedLine();
1913
          }
1914
        } else if (Style.BraceWrapping.AfterFunction) {
1915
          addUnwrappedLine();
1916
        }
1917
        if (!Previous || Previous->isNot(TT_TypeDeclarationParen))
1918
          FormatTok->setFinalizedType(TT_FunctionLBrace);
1919
        parseBlock();
1920
        IsDecltypeAutoFunction = false;
1921
        addUnwrappedLine();
1922
        return;
1923
      }
1924
      // Otherwise this was a braced init list, and the structural
1925
      // element continues.
1926
      break;
1927
    case tok::kw_try:
1928
      if (Style.isJavaScript() && Line->MustBeDeclaration) {
1929
        // field/method declaration.
1930
        nextToken();
1931
        break;
1932
      }
1933
      // We arrive here when parsing function-try blocks.
1934
      if (Style.BraceWrapping.AfterFunction)
1935
        addUnwrappedLine();
1936
      parseTryCatch();
1937
      return;
1938
    case tok::identifier: {
1939
      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1940
          Line->MustBeDeclaration) {
1941
        addUnwrappedLine();
1942
        parseCSharpGenericTypeConstraint();
1943
        break;
1944
      }
1945
      if (FormatTok->is(TT_MacroBlockEnd)) {
1946
        addUnwrappedLine();
1947
        return;
1948
      }
1949

1950
      // Function declarations (as opposed to function expressions) are parsed
1951
      // on their own unwrapped line by continuing this loop. Function
1952
      // expressions (functions that are not on their own line) must not create
1953
      // a new unwrapped line, so they are special cased below.
1954
      size_t TokenCount = Line->Tokens.size();
1955
      if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1956
          (TokenCount > 1 ||
1957
           (TokenCount == 1 &&
1958
            Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) {
1959
        tryToParseJSFunction();
1960
        break;
1961
      }
1962
      if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1963
          FormatTok->is(Keywords.kw_interface)) {
1964
        if (Style.isJavaScript()) {
1965
          // In JavaScript/TypeScript, "interface" can be used as a standalone
1966
          // identifier, e.g. in `var interface = 1;`. If "interface" is
1967
          // followed by another identifier, it is very like to be an actual
1968
          // interface declaration.
1969
          unsigned StoredPosition = Tokens->getPosition();
1970
          FormatToken *Next = Tokens->getNextToken();
1971
          FormatTok = Tokens->setPosition(StoredPosition);
1972
          if (!mustBeJSIdent(Keywords, Next)) {
1973
            nextToken();
1974
            break;
1975
          }
1976
        }
1977
        parseRecord();
1978
        addUnwrappedLine();
1979
        return;
1980
      }
1981

1982
      if (Style.isVerilog()) {
1983
        if (FormatTok->is(Keywords.kw_table)) {
1984
          parseVerilogTable();
1985
          return;
1986
        }
1987
        if (Keywords.isVerilogBegin(*FormatTok) ||
1988
            Keywords.isVerilogHierarchy(*FormatTok)) {
1989
          parseBlock();
1990
          addUnwrappedLine();
1991
          return;
1992
        }
1993
      }
1994

1995
      if (!IsCpp && FormatTok->is(Keywords.kw_interface)) {
1996
        if (parseStructLike())
1997
          return;
1998
        break;
1999
      }
2000

2001
      if (IsCpp && FormatTok->is(TT_StatementMacro)) {
2002
        parseStatementMacro();
2003
        return;
2004
      }
2005

2006
      // See if the following token should start a new unwrapped line.
2007
      StringRef Text = FormatTok->TokenText;
2008

2009
      FormatToken *PreviousToken = FormatTok;
2010
      nextToken();
2011

2012
      // JS doesn't have macros, and within classes colons indicate fields, not
2013
      // labels.
2014
      if (Style.isJavaScript())
2015
        break;
2016

2017
      auto OneTokenSoFar = [&]() {
2018
        auto I = Line->Tokens.begin(), E = Line->Tokens.end();
2019
        while (I != E && I->Tok->is(tok::comment))
2020
          ++I;
2021
        if (Style.isVerilog())
2022
          while (I != E && I->Tok->is(tok::hash))
2023
            ++I;
2024
        return I != E && (++I == E);
2025
      };
2026
      if (OneTokenSoFar()) {
2027
        // Recognize function-like macro usages without trailing semicolon as
2028
        // well as free-standing macros like Q_OBJECT.
2029
        bool FunctionLike = FormatTok->is(tok::l_paren);
2030
        if (FunctionLike)
2031
          parseParens();
2032

2033
        bool FollowedByNewline =
2034
            CommentsBeforeNextToken.empty()
2035
                ? FormatTok->NewlinesBefore > 0
2036
                : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
2037

2038
        if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
2039
            tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
2040
          if (PreviousToken->isNot(TT_UntouchableMacroFunc))
2041
            PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
2042
          addUnwrappedLine();
2043
          return;
2044
        }
2045
      }
2046
      break;
2047
    }
2048
    case tok::equal:
2049
      if ((Style.isJavaScript() || Style.isCSharp()) &&
2050
          FormatTok->is(TT_FatArrow)) {
2051
        tryToParseChildBlock();
2052
        break;
2053
      }
2054

2055
      nextToken();
2056
      if (FormatTok->is(tok::l_brace)) {
2057
        // Block kind should probably be set to BK_BracedInit for any language.
2058
        // C# needs this change to ensure that array initialisers and object
2059
        // initialisers are indented the same way.
2060
        if (Style.isCSharp())
2061
          FormatTok->setBlockKind(BK_BracedInit);
2062
        // TableGen's defset statement has syntax of the form,
2063
        // `defset <type> <name> = { <statement>... }`
2064
        if (Style.isTableGen() &&
2065
            Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) {
2066
          FormatTok->setFinalizedType(TT_FunctionLBrace);
2067
          parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2068
                     /*MunchSemi=*/false);
2069
          addUnwrappedLine();
2070
          break;
2071
        }
2072
        nextToken();
2073
        parseBracedList();
2074
      } else if (Style.Language == FormatStyle::LK_Proto &&
2075
                 FormatTok->is(tok::less)) {
2076
        nextToken();
2077
        parseBracedList(/*IsAngleBracket=*/true);
2078
      }
2079
      break;
2080
    case tok::l_square:
2081
      parseSquare();
2082
      break;
2083
    case tok::kw_new:
2084
      parseNew();
2085
      break;
2086
    case tok::kw_switch:
2087
      if (Style.Language == FormatStyle::LK_Java)
2088
        parseSwitch(/*IsExpr=*/true);
2089
      else
2090
        nextToken();
2091
      break;
2092
    case tok::kw_case:
2093
      // Proto: there are no switch/case statements.
2094
      if (Style.Language == FormatStyle::LK_Proto) {
2095
        nextToken();
2096
        return;
2097
      }
2098
      // In Verilog switch is called case.
2099
      if (Style.isVerilog()) {
2100
        parseBlock();
2101
        addUnwrappedLine();
2102
        return;
2103
      }
2104
      if (Style.isJavaScript() && Line->MustBeDeclaration) {
2105
        // 'case: string' field declaration.
2106
        nextToken();
2107
        break;
2108
      }
2109
      parseCaseLabel();
2110
      break;
2111
    case tok::kw_default:
2112
      nextToken();
2113
      if (Style.isVerilog()) {
2114
        if (FormatTok->is(tok::colon)) {
2115
          // The label will be handled in the next iteration.
2116
          break;
2117
        }
2118
        if (FormatTok->is(Keywords.kw_clocking)) {
2119
          // A default clocking block.
2120
          parseBlock();
2121
          addUnwrappedLine();
2122
          return;
2123
        }
2124
        parseVerilogCaseLabel();
2125
        return;
2126
      }
2127
      break;
2128
    case tok::colon:
2129
      nextToken();
2130
      if (Style.isVerilog()) {
2131
        parseVerilogCaseLabel();
2132
        return;
2133
      }
2134
      break;
2135
    case tok::greater:
2136
      nextToken();
2137
      if (FormatTok->is(tok::l_brace))
2138
        FormatTok->Previous->setFinalizedType(TT_TemplateCloser);
2139
      break;
2140
    default:
2141
      nextToken();
2142
      break;
2143
    }
2144
  }
2145
}
2146

2147
bool UnwrappedLineParser::tryToParsePropertyAccessor() {
2148
  assert(FormatTok->is(tok::l_brace));
2149
  if (!Style.isCSharp())
2150
    return false;
2151
  // See if it's a property accessor.
2152
  if (FormatTok->Previous->isNot(tok::identifier))
2153
    return false;
2154

2155
  // See if we are inside a property accessor.
2156
  //
2157
  // Record the current tokenPosition so that we can advance and
2158
  // reset the current token. `Next` is not set yet so we need
2159
  // another way to advance along the token stream.
2160
  unsigned int StoredPosition = Tokens->getPosition();
2161
  FormatToken *Tok = Tokens->getNextToken();
2162

2163
  // A trivial property accessor is of the form:
2164
  // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2165
  // Track these as they do not require line breaks to be introduced.
2166
  bool HasSpecialAccessor = false;
2167
  bool IsTrivialPropertyAccessor = true;
2168
  while (!eof()) {
2169
    if (Tok->isAccessSpecifierKeyword() ||
2170
        Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get,
2171
                     Keywords.kw_init, Keywords.kw_set)) {
2172
      if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2173
        HasSpecialAccessor = true;
2174
      Tok = Tokens->getNextToken();
2175
      continue;
2176
    }
2177
    if (Tok->isNot(tok::r_brace))
2178
      IsTrivialPropertyAccessor = false;
2179
    break;
2180
  }
2181

2182
  if (!HasSpecialAccessor) {
2183
    Tokens->setPosition(StoredPosition);
2184
    return false;
2185
  }
2186

2187
  // Try to parse the property accessor:
2188
  // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2189
  Tokens->setPosition(StoredPosition);
2190
  if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2191
    addUnwrappedLine();
2192
  nextToken();
2193
  do {
2194
    switch (FormatTok->Tok.getKind()) {
2195
    case tok::r_brace:
2196
      nextToken();
2197
      if (FormatTok->is(tok::equal)) {
2198
        while (!eof() && FormatTok->isNot(tok::semi))
2199
          nextToken();
2200
        nextToken();
2201
      }
2202
      addUnwrappedLine();
2203
      return true;
2204
    case tok::l_brace:
2205
      ++Line->Level;
2206
      parseBlock(/*MustBeDeclaration=*/true);
2207
      addUnwrappedLine();
2208
      --Line->Level;
2209
      break;
2210
    case tok::equal:
2211
      if (FormatTok->is(TT_FatArrow)) {
2212
        ++Line->Level;
2213
        do {
2214
          nextToken();
2215
        } while (!eof() && FormatTok->isNot(tok::semi));
2216
        nextToken();
2217
        addUnwrappedLine();
2218
        --Line->Level;
2219
        break;
2220
      }
2221
      nextToken();
2222
      break;
2223
    default:
2224
      if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2225
                             Keywords.kw_set) &&
2226
          !IsTrivialPropertyAccessor) {
2227
        // Non-trivial get/set needs to be on its own line.
2228
        addUnwrappedLine();
2229
      }
2230
      nextToken();
2231
    }
2232
  } while (!eof());
2233

2234
  // Unreachable for well-formed code (paired '{' and '}').
2235
  return true;
2236
}
2237

2238
bool UnwrappedLineParser::tryToParseLambda() {
2239
  assert(FormatTok->is(tok::l_square));
2240
  if (!IsCpp) {
2241
    nextToken();
2242
    return false;
2243
  }
2244
  FormatToken &LSquare = *FormatTok;
2245
  if (!tryToParseLambdaIntroducer())
2246
    return false;
2247

2248
  bool SeenArrow = false;
2249
  bool InTemplateParameterList = false;
2250

2251
  while (FormatTok->isNot(tok::l_brace)) {
2252
    if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) {
2253
      nextToken();
2254
      continue;
2255
    }
2256
    switch (FormatTok->Tok.getKind()) {
2257
    case tok::l_brace:
2258
      break;
2259
    case tok::l_paren:
2260
      parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference);
2261
      break;
2262
    case tok::l_square:
2263
      parseSquare();
2264
      break;
2265
    case tok::less:
2266
      assert(FormatTok->Previous);
2267
      if (FormatTok->Previous->is(tok::r_square))
2268
        InTemplateParameterList = true;
2269
      nextToken();
2270
      break;
2271
    case tok::kw_auto:
2272
    case tok::kw_class:
2273
    case tok::kw_struct:
2274
    case tok::kw_union:
2275
    case tok::kw_template:
2276
    case tok::kw_typename:
2277
    case tok::amp:
2278
    case tok::star:
2279
    case tok::kw_const:
2280
    case tok::kw_constexpr:
2281
    case tok::kw_consteval:
2282
    case tok::comma:
2283
    case tok::greater:
2284
    case tok::identifier:
2285
    case tok::numeric_constant:
2286
    case tok::coloncolon:
2287
    case tok::kw_mutable:
2288
    case tok::kw_noexcept:
2289
    case tok::kw_static:
2290
      nextToken();
2291
      break;
2292
    // Specialization of a template with an integer parameter can contain
2293
    // arithmetic, logical, comparison and ternary operators.
2294
    //
2295
    // FIXME: This also accepts sequences of operators that are not in the scope
2296
    // of a template argument list.
2297
    //
2298
    // In a C++ lambda a template type can only occur after an arrow. We use
2299
    // this as an heuristic to distinguish between Objective-C expressions
2300
    // followed by an `a->b` expression, such as:
2301
    // ([obj func:arg] + a->b)
2302
    // Otherwise the code below would parse as a lambda.
2303
    case tok::plus:
2304
    case tok::minus:
2305
    case tok::exclaim:
2306
    case tok::tilde:
2307
    case tok::slash:
2308
    case tok::percent:
2309
    case tok::lessless:
2310
    case tok::pipe:
2311
    case tok::pipepipe:
2312
    case tok::ampamp:
2313
    case tok::caret:
2314
    case tok::equalequal:
2315
    case tok::exclaimequal:
2316
    case tok::greaterequal:
2317
    case tok::lessequal:
2318
    case tok::question:
2319
    case tok::colon:
2320
    case tok::ellipsis:
2321
    case tok::kw_true:
2322
    case tok::kw_false:
2323
      if (SeenArrow || InTemplateParameterList) {
2324
        nextToken();
2325
        break;
2326
      }
2327
      return true;
2328
    case tok::arrow:
2329
      // This might or might not actually be a lambda arrow (this could be an
2330
      // ObjC method invocation followed by a dereferencing arrow). We might
2331
      // reset this back to TT_Unknown in TokenAnnotator.
2332
      FormatTok->setFinalizedType(TT_LambdaArrow);
2333
      SeenArrow = true;
2334
      nextToken();
2335
      break;
2336
    case tok::kw_requires: {
2337
      auto *RequiresToken = FormatTok;
2338
      nextToken();
2339
      parseRequiresClause(RequiresToken);
2340
      break;
2341
    }
2342
    case tok::equal:
2343
      if (!InTemplateParameterList)
2344
        return true;
2345
      nextToken();
2346
      break;
2347
    default:
2348
      return true;
2349
    }
2350
  }
2351

2352
  FormatTok->setFinalizedType(TT_LambdaLBrace);
2353
  LSquare.setFinalizedType(TT_LambdaLSquare);
2354

2355
  NestedLambdas.push_back(Line->SeenDecltypeAuto);
2356
  parseChildBlock();
2357
  assert(!NestedLambdas.empty());
2358
  NestedLambdas.pop_back();
2359

2360
  return true;
2361
}
2362

2363
bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2364
  const FormatToken *Previous = FormatTok->Previous;
2365
  const FormatToken *LeftSquare = FormatTok;
2366
  nextToken();
2367
  if ((Previous && ((Previous->Tok.getIdentifierInfo() &&
2368
                     !Previous->isOneOf(tok::kw_return, tok::kw_co_await,
2369
                                        tok::kw_co_yield, tok::kw_co_return)) ||
2370
                    Previous->closesScope())) ||
2371
      LeftSquare->isCppStructuredBinding(IsCpp)) {
2372
    return false;
2373
  }
2374
  if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind()))
2375
    return false;
2376
  if (FormatTok->is(tok::r_square)) {
2377
    const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true);
2378
    if (Next->is(tok::greater))
2379
      return false;
2380
  }
2381
  parseSquare(/*LambdaIntroducer=*/true);
2382
  return true;
2383
}
2384

2385
void UnwrappedLineParser::tryToParseJSFunction() {
2386
  assert(FormatTok->is(Keywords.kw_function));
2387
  if (FormatTok->is(Keywords.kw_async))
2388
    nextToken();
2389
  // Consume "function".
2390
  nextToken();
2391

2392
  // Consume * (generator function). Treat it like C++'s overloaded operators.
2393
  if (FormatTok->is(tok::star)) {
2394
    FormatTok->setFinalizedType(TT_OverloadedOperator);
2395
    nextToken();
2396
  }
2397

2398
  // Consume function name.
2399
  if (FormatTok->is(tok::identifier))
2400
    nextToken();
2401

2402
  if (FormatTok->isNot(tok::l_paren))
2403
    return;
2404

2405
  // Parse formal parameter list.
2406
  parseParens();
2407

2408
  if (FormatTok->is(tok::colon)) {
2409
    // Parse a type definition.
2410
    nextToken();
2411

2412
    // Eat the type declaration. For braced inline object types, balance braces,
2413
    // otherwise just parse until finding an l_brace for the function body.
2414
    if (FormatTok->is(tok::l_brace))
2415
      tryToParseBracedList();
2416
    else
2417
      while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2418
        nextToken();
2419
  }
2420

2421
  if (FormatTok->is(tok::semi))
2422
    return;
2423

2424
  parseChildBlock();
2425
}
2426

2427
bool UnwrappedLineParser::tryToParseBracedList() {
2428
  if (FormatTok->is(BK_Unknown))
2429
    calculateBraceTypes();
2430
  assert(FormatTok->isNot(BK_Unknown));
2431
  if (FormatTok->is(BK_Block))
2432
    return false;
2433
  nextToken();
2434
  parseBracedList();
2435
  return true;
2436
}
2437

2438
bool UnwrappedLineParser::tryToParseChildBlock() {
2439
  assert(Style.isJavaScript() || Style.isCSharp());
2440
  assert(FormatTok->is(TT_FatArrow));
2441
  // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2442
  // They always start an expression or a child block if followed by a curly
2443
  // brace.
2444
  nextToken();
2445
  if (FormatTok->isNot(tok::l_brace))
2446
    return false;
2447
  parseChildBlock();
2448
  return true;
2449
}
2450

2451
bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) {
2452
  assert(!IsAngleBracket || !IsEnum);
2453
  bool HasError = false;
2454

2455
  // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2456
  // replace this by using parseAssignmentExpression() inside.
2457
  do {
2458
    if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2459
        tryToParseChildBlock()) {
2460
      continue;
2461
    }
2462
    if (Style.isJavaScript()) {
2463
      if (FormatTok->is(Keywords.kw_function)) {
2464
        tryToParseJSFunction();
2465
        continue;
2466
      }
2467
      if (FormatTok->is(tok::l_brace)) {
2468
        // Could be a method inside of a braced list `{a() { return 1; }}`.
2469
        if (tryToParseBracedList())
2470
          continue;
2471
        parseChildBlock();
2472
      }
2473
    }
2474
    if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) {
2475
      if (IsEnum) {
2476
        FormatTok->setBlockKind(BK_Block);
2477
        if (!Style.AllowShortEnumsOnASingleLine)
2478
          addUnwrappedLine();
2479
      }
2480
      nextToken();
2481
      return !HasError;
2482
    }
2483
    switch (FormatTok->Tok.getKind()) {
2484
    case tok::l_square:
2485
      if (Style.isCSharp())
2486
        parseSquare();
2487
      else
2488
        tryToParseLambda();
2489
      break;
2490
    case tok::l_paren:
2491
      parseParens();
2492
      // JavaScript can just have free standing methods and getters/setters in
2493
      // object literals. Detect them by a "{" following ")".
2494
      if (Style.isJavaScript()) {
2495
        if (FormatTok->is(tok::l_brace))
2496
          parseChildBlock();
2497
        break;
2498
      }
2499
      break;
2500
    case tok::l_brace:
2501
      // Assume there are no blocks inside a braced init list apart
2502
      // from the ones we explicitly parse out (like lambdas).
2503
      FormatTok->setBlockKind(BK_BracedInit);
2504
      if (!IsAngleBracket) {
2505
        auto *Prev = FormatTok->Previous;
2506
        if (Prev && Prev->is(tok::greater))
2507
          Prev->setFinalizedType(TT_TemplateCloser);
2508
      }
2509
      nextToken();
2510
      parseBracedList();
2511
      break;
2512
    case tok::less:
2513
      nextToken();
2514
      if (IsAngleBracket)
2515
        parseBracedList(/*IsAngleBracket=*/true);
2516
      break;
2517
    case tok::semi:
2518
      // JavaScript (or more precisely TypeScript) can have semicolons in braced
2519
      // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2520
      // used for error recovery if we have otherwise determined that this is
2521
      // a braced list.
2522
      if (Style.isJavaScript()) {
2523
        nextToken();
2524
        break;
2525
      }
2526
      HasError = true;
2527
      if (!IsEnum)
2528
        return false;
2529
      nextToken();
2530
      break;
2531
    case tok::comma:
2532
      nextToken();
2533
      if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2534
        addUnwrappedLine();
2535
      break;
2536
    default:
2537
      nextToken();
2538
      break;
2539
    }
2540
  } while (!eof());
2541
  return false;
2542
}
2543

2544
/// \brief Parses a pair of parentheses (and everything between them).
2545
/// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2546
/// double ampersands. This applies for all nested scopes as well.
2547
///
2548
/// Returns whether there is a `=` token between the parentheses.
2549
bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2550
  assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2551
  auto *LeftParen = FormatTok;
2552
  bool SeenEqual = false;
2553
  bool MightBeFoldExpr = false;
2554
  const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
2555
  nextToken();
2556
  do {
2557
    switch (FormatTok->Tok.getKind()) {
2558
    case tok::l_paren:
2559
      if (parseParens(AmpAmpTokenType))
2560
        SeenEqual = true;
2561
      if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2562
        parseChildBlock();
2563
      break;
2564
    case tok::r_paren: {
2565
      auto *Prev = LeftParen->Previous;
2566
      if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
2567
          Style.RemoveParentheses > FormatStyle::RPS_Leave) {
2568
        const auto *Next = Tokens->peekNextToken();
2569
        const bool DoubleParens =
2570
            Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
2571
        const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
2572
        const bool Blacklisted =
2573
            PrevPrev &&
2574
            (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
2575
             (SeenEqual &&
2576
              (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
2577
               PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if))));
2578
        const bool ReturnParens =
2579
            Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
2580
            ((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
2581
             (!NestedLambdas.empty() && !NestedLambdas.back())) &&
2582
            Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
2583
            Next->is(tok::semi);
2584
        if ((DoubleParens && !Blacklisted) || ReturnParens) {
2585
          LeftParen->Optional = true;
2586
          FormatTok->Optional = true;
2587
        }
2588
      }
2589
      if (Prev) {
2590
        if (Prev->is(TT_TypenameMacro)) {
2591
          LeftParen->setFinalizedType(TT_TypeDeclarationParen);
2592
          FormatTok->setFinalizedType(TT_TypeDeclarationParen);
2593
        } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) {
2594
          Prev->setFinalizedType(TT_TemplateCloser);
2595
        }
2596
      }
2597
      nextToken();
2598
      return SeenEqual;
2599
    }
2600
    case tok::r_brace:
2601
      // A "}" inside parenthesis is an error if there wasn't a matching "{".
2602
      return SeenEqual;
2603
    case tok::l_square:
2604
      tryToParseLambda();
2605
      break;
2606
    case tok::l_brace:
2607
      if (!tryToParseBracedList())
2608
        parseChildBlock();
2609
      break;
2610
    case tok::at:
2611
      nextToken();
2612
      if (FormatTok->is(tok::l_brace)) {
2613
        nextToken();
2614
        parseBracedList();
2615
      }
2616
      break;
2617
    case tok::ellipsis:
2618
      MightBeFoldExpr = true;
2619
      nextToken();
2620
      break;
2621
    case tok::equal:
2622
      SeenEqual = true;
2623
      if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2624
        tryToParseChildBlock();
2625
      else
2626
        nextToken();
2627
      break;
2628
    case tok::kw_class:
2629
      if (Style.isJavaScript())
2630
        parseRecord(/*ParseAsExpr=*/true);
2631
      else
2632
        nextToken();
2633
      break;
2634
    case tok::identifier:
2635
      if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function)))
2636
        tryToParseJSFunction();
2637
      else
2638
        nextToken();
2639
      break;
2640
    case tok::kw_switch:
2641
      if (Style.Language == FormatStyle::LK_Java)
2642
        parseSwitch(/*IsExpr=*/true);
2643
      else
2644
        nextToken();
2645
      break;
2646
    case tok::kw_requires: {
2647
      auto RequiresToken = FormatTok;
2648
      nextToken();
2649
      parseRequiresExpression(RequiresToken);
2650
      break;
2651
    }
2652
    case tok::ampamp:
2653
      if (AmpAmpTokenType != TT_Unknown)
2654
        FormatTok->setFinalizedType(AmpAmpTokenType);
2655
      [[fallthrough]];
2656
    default:
2657
      nextToken();
2658
      break;
2659
    }
2660
  } while (!eof());
2661
  return SeenEqual;
2662
}
2663

2664
void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2665
  if (!LambdaIntroducer) {
2666
    assert(FormatTok->is(tok::l_square) && "'[' expected.");
2667
    if (tryToParseLambda())
2668
      return;
2669
  }
2670
  do {
2671
    switch (FormatTok->Tok.getKind()) {
2672
    case tok::l_paren:
2673
      parseParens();
2674
      break;
2675
    case tok::r_square:
2676
      nextToken();
2677
      return;
2678
    case tok::r_brace:
2679
      // A "}" inside parenthesis is an error if there wasn't a matching "{".
2680
      return;
2681
    case tok::l_square:
2682
      parseSquare();
2683
      break;
2684
    case tok::l_brace: {
2685
      if (!tryToParseBracedList())
2686
        parseChildBlock();
2687
      break;
2688
    }
2689
    case tok::at:
2690
    case tok::colon:
2691
      nextToken();
2692
      if (FormatTok->is(tok::l_brace)) {
2693
        nextToken();
2694
        parseBracedList();
2695
      }
2696
      break;
2697
    default:
2698
      nextToken();
2699
      break;
2700
    }
2701
  } while (!eof());
2702
}
2703

2704
void UnwrappedLineParser::keepAncestorBraces() {
2705
  if (!Style.RemoveBracesLLVM)
2706
    return;
2707

2708
  const int MaxNestingLevels = 2;
2709
  const int Size = NestedTooDeep.size();
2710
  if (Size >= MaxNestingLevels)
2711
    NestedTooDeep[Size - MaxNestingLevels] = true;
2712
  NestedTooDeep.push_back(false);
2713
}
2714

2715
static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2716
  for (const auto &Token : llvm::reverse(Line.Tokens))
2717
    if (Token.Tok->isNot(tok::comment))
2718
      return Token.Tok;
2719

2720
  return nullptr;
2721
}
2722

2723
void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2724
  FormatToken *Tok = nullptr;
2725

2726
  if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2727
      PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) {
2728
    Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never
2729
              ? getLastNonComment(*Line)
2730
              : Line->Tokens.back().Tok;
2731
    assert(Tok);
2732
    if (Tok->BraceCount < 0) {
2733
      assert(Tok->BraceCount == -1);
2734
      Tok = nullptr;
2735
    } else {
2736
      Tok->BraceCount = -1;
2737
    }
2738
  }
2739

2740
  addUnwrappedLine();
2741
  ++Line->Level;
2742
  ++Line->UnbracedBodyLevel;
2743
  parseStructuralElement();
2744
  --Line->UnbracedBodyLevel;
2745

2746
  if (Tok) {
2747
    assert(!Line->InPPDirective);
2748
    Tok = nullptr;
2749
    for (const auto &L : llvm::reverse(*CurrentLines)) {
2750
      if (!L.InPPDirective && getLastNonComment(L)) {
2751
        Tok = L.Tokens.back().Tok;
2752
        break;
2753
      }
2754
    }
2755
    assert(Tok);
2756
    ++Tok->BraceCount;
2757
  }
2758

2759
  if (CheckEOF && eof())
2760
    addUnwrappedLine();
2761

2762
  --Line->Level;
2763
}
2764

2765
static void markOptionalBraces(FormatToken *LeftBrace) {
2766
  if (!LeftBrace)
2767
    return;
2768

2769
  assert(LeftBrace->is(tok::l_brace));
2770

2771
  FormatToken *RightBrace = LeftBrace->MatchingParen;
2772
  if (!RightBrace) {
2773
    assert(!LeftBrace->Optional);
2774
    return;
2775
  }
2776

2777
  assert(RightBrace->is(tok::r_brace));
2778
  assert(RightBrace->MatchingParen == LeftBrace);
2779
  assert(LeftBrace->Optional == RightBrace->Optional);
2780

2781
  LeftBrace->Optional = true;
2782
  RightBrace->Optional = true;
2783
}
2784

2785
void UnwrappedLineParser::handleAttributes() {
2786
  // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2787
  if (FormatTok->isAttribute())
2788
    nextToken();
2789
  else if (FormatTok->is(tok::l_square))
2790
    handleCppAttributes();
2791
}
2792

2793
bool UnwrappedLineParser::handleCppAttributes() {
2794
  // Handle [[likely]] / [[unlikely]] attributes.
2795
  assert(FormatTok->is(tok::l_square));
2796
  if (!tryToParseSimpleAttribute())
2797
    return false;
2798
  parseSquare();
2799
  return true;
2800
}
2801

2802
/// Returns whether \c Tok begins a block.
2803
bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const {
2804
  // FIXME: rename the function or make
2805
  // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work.
2806
  return Style.isVerilog() ? Keywords.isVerilogBegin(Tok)
2807
                           : Tok.is(tok::l_brace);
2808
}
2809

2810
FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2811
                                                  bool KeepBraces,
2812
                                                  bool IsVerilogAssert) {
2813
  assert((FormatTok->is(tok::kw_if) ||
2814
          (Style.isVerilog() &&
2815
           FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert,
2816
                              Keywords.kw_assume, Keywords.kw_cover))) &&
2817
         "'if' expected");
2818
  nextToken();
2819

2820
  if (IsVerilogAssert) {
2821
    // Handle `assert #0` and `assert final`.
2822
    if (FormatTok->is(Keywords.kw_verilogHash)) {
2823
      nextToken();
2824
      if (FormatTok->is(tok::numeric_constant))
2825
        nextToken();
2826
    } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property,
2827
                                  Keywords.kw_sequence)) {
2828
      nextToken();
2829
    }
2830
  }
2831

2832
  // TableGen's if statement has the form of `if <cond> then { ... }`.
2833
  if (Style.isTableGen()) {
2834
    while (!eof() && FormatTok->isNot(Keywords.kw_then)) {
2835
      // Simply skip until then. This range only contains a value.
2836
      nextToken();
2837
    }
2838
  }
2839

2840
  // Handle `if !consteval`.
2841
  if (FormatTok->is(tok::exclaim))
2842
    nextToken();
2843

2844
  bool KeepIfBraces = true;
2845
  if (FormatTok->is(tok::kw_consteval)) {
2846
    nextToken();
2847
  } else {
2848
    KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2849
    if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2850
      nextToken();
2851
    if (FormatTok->is(tok::l_paren)) {
2852
      FormatTok->setFinalizedType(TT_ConditionLParen);
2853
      parseParens();
2854
    }
2855
  }
2856
  handleAttributes();
2857
  // The then action is optional in Verilog assert statements.
2858
  if (IsVerilogAssert && FormatTok->is(tok::semi)) {
2859
    nextToken();
2860
    addUnwrappedLine();
2861
    return nullptr;
2862
  }
2863

2864
  bool NeedsUnwrappedLine = false;
2865
  keepAncestorBraces();
2866

2867
  FormatToken *IfLeftBrace = nullptr;
2868
  IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2869

2870
  if (isBlockBegin(*FormatTok)) {
2871
    FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2872
    IfLeftBrace = FormatTok;
2873
    CompoundStatementIndenter Indenter(this, Style, Line->Level);
2874
    parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2875
               /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2876
    setPreviousRBraceType(TT_ControlStatementRBrace);
2877
    if (Style.BraceWrapping.BeforeElse)
2878
      addUnwrappedLine();
2879
    else
2880
      NeedsUnwrappedLine = true;
2881
  } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) {
2882
    addUnwrappedLine();
2883
  } else {
2884
    parseUnbracedBody();
2885
  }
2886

2887
  if (Style.RemoveBracesLLVM) {
2888
    assert(!NestedTooDeep.empty());
2889
    KeepIfBraces = KeepIfBraces ||
2890
                   (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2891
                   NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2892
                   IfBlockKind == IfStmtKind::IfElseIf;
2893
  }
2894

2895
  bool KeepElseBraces = KeepIfBraces;
2896
  FormatToken *ElseLeftBrace = nullptr;
2897
  IfStmtKind Kind = IfStmtKind::IfOnly;
2898

2899
  if (FormatTok->is(tok::kw_else)) {
2900
    if (Style.RemoveBracesLLVM) {
2901
      NestedTooDeep.back() = false;
2902
      Kind = IfStmtKind::IfElse;
2903
    }
2904
    nextToken();
2905
    handleAttributes();
2906
    if (isBlockBegin(*FormatTok)) {
2907
      const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2908
      FormatTok->setFinalizedType(TT_ElseLBrace);
2909
      ElseLeftBrace = FormatTok;
2910
      CompoundStatementIndenter Indenter(this, Style, Line->Level);
2911
      IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2912
      FormatToken *IfLBrace =
2913
          parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2914
                     /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2915
      setPreviousRBraceType(TT_ElseRBrace);
2916
      if (FormatTok->is(tok::kw_else)) {
2917
        KeepElseBraces = KeepElseBraces ||
2918
                         ElseBlockKind == IfStmtKind::IfOnly ||
2919
                         ElseBlockKind == IfStmtKind::IfElseIf;
2920
      } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2921
        KeepElseBraces = true;
2922
        assert(ElseLeftBrace->MatchingParen);
2923
        markOptionalBraces(ElseLeftBrace);
2924
      }
2925
      addUnwrappedLine();
2926
    } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) {
2927
      const FormatToken *Previous = Tokens->getPreviousToken();
2928
      assert(Previous);
2929
      const bool IsPrecededByComment = Previous->is(tok::comment);
2930
      if (IsPrecededByComment) {
2931
        addUnwrappedLine();
2932
        ++Line->Level;
2933
      }
2934
      bool TooDeep = true;
2935
      if (Style.RemoveBracesLLVM) {
2936
        Kind = IfStmtKind::IfElseIf;
2937
        TooDeep = NestedTooDeep.pop_back_val();
2938
      }
2939
      ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2940
      if (Style.RemoveBracesLLVM)
2941
        NestedTooDeep.push_back(TooDeep);
2942
      if (IsPrecededByComment)
2943
        --Line->Level;
2944
    } else {
2945
      parseUnbracedBody(/*CheckEOF=*/true);
2946
    }
2947
  } else {
2948
    KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2949
    if (NeedsUnwrappedLine)
2950
      addUnwrappedLine();
2951
  }
2952

2953
  if (!Style.RemoveBracesLLVM)
2954
    return nullptr;
2955

2956
  assert(!NestedTooDeep.empty());
2957
  KeepElseBraces = KeepElseBraces ||
2958
                   (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2959
                   NestedTooDeep.back();
2960

2961
  NestedTooDeep.pop_back();
2962

2963
  if (!KeepIfBraces && !KeepElseBraces) {
2964
    markOptionalBraces(IfLeftBrace);
2965
    markOptionalBraces(ElseLeftBrace);
2966
  } else if (IfLeftBrace) {
2967
    FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2968
    if (IfRightBrace) {
2969
      assert(IfRightBrace->MatchingParen == IfLeftBrace);
2970
      assert(!IfLeftBrace->Optional);
2971
      assert(!IfRightBrace->Optional);
2972
      IfLeftBrace->MatchingParen = nullptr;
2973
      IfRightBrace->MatchingParen = nullptr;
2974
    }
2975
  }
2976

2977
  if (IfKind)
2978
    *IfKind = Kind;
2979

2980
  return IfLeftBrace;
2981
}
2982

2983
void UnwrappedLineParser::parseTryCatch() {
2984
  assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2985
  nextToken();
2986
  bool NeedsUnwrappedLine = false;
2987
  bool HasCtorInitializer = false;
2988
  if (FormatTok->is(tok::colon)) {
2989
    auto *Colon = FormatTok;
2990
    // We are in a function try block, what comes is an initializer list.
2991
    nextToken();
2992
    if (FormatTok->is(tok::identifier)) {
2993
      HasCtorInitializer = true;
2994
      Colon->setFinalizedType(TT_CtorInitializerColon);
2995
    }
2996

2997
    // In case identifiers were removed by clang-tidy, what might follow is
2998
    // multiple commas in sequence - before the first identifier.
2999
    while (FormatTok->is(tok::comma))
3000
      nextToken();
3001

3002
    while (FormatTok->is(tok::identifier)) {
3003
      nextToken();
3004
      if (FormatTok->is(tok::l_paren)) {
3005
        parseParens();
3006
      } else if (FormatTok->is(tok::l_brace)) {
3007
        nextToken();
3008
        parseBracedList();
3009
      }
3010

3011
      // In case identifiers were removed by clang-tidy, what might follow is
3012
      // multiple commas in sequence - after the first identifier.
3013
      while (FormatTok->is(tok::comma))
3014
        nextToken();
3015
    }
3016
  }
3017
  // Parse try with resource.
3018
  if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
3019
    parseParens();
3020

3021
  keepAncestorBraces();
3022

3023
  if (FormatTok->is(tok::l_brace)) {
3024
    if (HasCtorInitializer)
3025
      FormatTok->setFinalizedType(TT_FunctionLBrace);
3026
    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3027
    parseBlock();
3028
    if (Style.BraceWrapping.BeforeCatch)
3029
      addUnwrappedLine();
3030
    else
3031
      NeedsUnwrappedLine = true;
3032
  } else if (FormatTok->isNot(tok::kw_catch)) {
3033
    // The C++ standard requires a compound-statement after a try.
3034
    // If there's none, we try to assume there's a structuralElement
3035
    // and try to continue.
3036
    addUnwrappedLine();
3037
    ++Line->Level;
3038
    parseStructuralElement();
3039
    --Line->Level;
3040
  }
3041
  while (true) {
3042
    if (FormatTok->is(tok::at))
3043
      nextToken();
3044
    if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
3045
                             tok::kw___finally) ||
3046
          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3047
           FormatTok->is(Keywords.kw_finally)) ||
3048
          (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
3049
           FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
3050
      break;
3051
    }
3052
    nextToken();
3053
    while (FormatTok->isNot(tok::l_brace)) {
3054
      if (FormatTok->is(tok::l_paren)) {
3055
        parseParens();
3056
        continue;
3057
      }
3058
      if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
3059
        if (Style.RemoveBracesLLVM)
3060
          NestedTooDeep.pop_back();
3061
        return;
3062
      }
3063
      nextToken();
3064
    }
3065
    NeedsUnwrappedLine = false;
3066
    Line->MustBeDeclaration = false;
3067
    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3068
    parseBlock();
3069
    if (Style.BraceWrapping.BeforeCatch)
3070
      addUnwrappedLine();
3071
    else
3072
      NeedsUnwrappedLine = true;
3073
  }
3074

3075
  if (Style.RemoveBracesLLVM)
3076
    NestedTooDeep.pop_back();
3077

3078
  if (NeedsUnwrappedLine)
3079
    addUnwrappedLine();
3080
}
3081

3082
void UnwrappedLineParser::parseNamespace() {
3083
  assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
3084
         "'namespace' expected");
3085

3086
  const FormatToken &InitialToken = *FormatTok;
3087
  nextToken();
3088
  if (InitialToken.is(TT_NamespaceMacro)) {
3089
    parseParens();
3090
  } else {
3091
    while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
3092
                              tok::l_square, tok::period, tok::l_paren) ||
3093
           (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
3094
      if (FormatTok->is(tok::l_square))
3095
        parseSquare();
3096
      else if (FormatTok->is(tok::l_paren))
3097
        parseParens();
3098
      else
3099
        nextToken();
3100
    }
3101
  }
3102
  if (FormatTok->is(tok::l_brace)) {
3103
    FormatTok->setFinalizedType(TT_NamespaceLBrace);
3104

3105
    if (ShouldBreakBeforeBrace(Style, InitialToken))
3106
      addUnwrappedLine();
3107

3108
    unsigned AddLevels =
3109
        Style.NamespaceIndentation == FormatStyle::NI_All ||
3110
                (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
3111
                 DeclarationScopeStack.size() > 1)
3112
            ? 1u
3113
            : 0u;
3114
    bool ManageWhitesmithsBraces =
3115
        AddLevels == 0u &&
3116
        Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3117

3118
    // If we're in Whitesmiths mode, indent the brace if we're not indenting
3119
    // the whole block.
3120
    if (ManageWhitesmithsBraces)
3121
      ++Line->Level;
3122

3123
    // Munch the semicolon after a namespace. This is more common than one would
3124
    // think. Putting the semicolon into its own line is very ugly.
3125
    parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
3126
               /*KeepBraces=*/true, /*IfKind=*/nullptr,
3127
               ManageWhitesmithsBraces);
3128

3129
    addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
3130

3131
    if (ManageWhitesmithsBraces)
3132
      --Line->Level;
3133
  }
3134
  // FIXME: Add error handling.
3135
}
3136

3137
void UnwrappedLineParser::parseNew() {
3138
  assert(FormatTok->is(tok::kw_new) && "'new' expected");
3139
  nextToken();
3140

3141
  if (Style.isCSharp()) {
3142
    do {
3143
      // Handle constructor invocation, e.g. `new(field: value)`.
3144
      if (FormatTok->is(tok::l_paren))
3145
        parseParens();
3146

3147
      // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`.
3148
      if (FormatTok->is(tok::l_brace))
3149
        parseBracedList();
3150

3151
      if (FormatTok->isOneOf(tok::semi, tok::comma))
3152
        return;
3153

3154
      nextToken();
3155
    } while (!eof());
3156
  }
3157

3158
  if (Style.Language != FormatStyle::LK_Java)
3159
    return;
3160

3161
  // In Java, we can parse everything up to the parens, which aren't optional.
3162
  do {
3163
    // There should not be a ;, { or } before the new's open paren.
3164
    if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
3165
      return;
3166

3167
    // Consume the parens.
3168
    if (FormatTok->is(tok::l_paren)) {
3169
      parseParens();
3170

3171
      // If there is a class body of an anonymous class, consume that as child.
3172
      if (FormatTok->is(tok::l_brace))
3173
        parseChildBlock();
3174
      return;
3175
    }
3176
    nextToken();
3177
  } while (!eof());
3178
}
3179

3180
void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
3181
  keepAncestorBraces();
3182

3183
  if (isBlockBegin(*FormatTok)) {
3184
    FormatTok->setFinalizedType(TT_ControlStatementLBrace);
3185
    FormatToken *LeftBrace = FormatTok;
3186
    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3187
    parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
3188
               /*MunchSemi=*/true, KeepBraces);
3189
    setPreviousRBraceType(TT_ControlStatementRBrace);
3190
    if (!KeepBraces) {
3191
      assert(!NestedTooDeep.empty());
3192
      if (!NestedTooDeep.back())
3193
        markOptionalBraces(LeftBrace);
3194
    }
3195
    if (WrapRightBrace)
3196
      addUnwrappedLine();
3197
  } else {
3198
    parseUnbracedBody();
3199
  }
3200

3201
  if (!KeepBraces)
3202
    NestedTooDeep.pop_back();
3203
}
3204

3205
void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) {
3206
  assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) ||
3207
          (Style.isVerilog() &&
3208
           FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb,
3209
                              Keywords.kw_always_ff, Keywords.kw_always_latch,
3210
                              Keywords.kw_final, Keywords.kw_initial,
3211
                              Keywords.kw_foreach, Keywords.kw_forever,
3212
                              Keywords.kw_repeat))) &&
3213
         "'for', 'while' or foreach macro expected");
3214
  const bool KeepBraces = !Style.RemoveBracesLLVM ||
3215
                          !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
3216

3217
  nextToken();
3218
  // JS' for await ( ...
3219
  if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
3220
    nextToken();
3221
  if (IsCpp && FormatTok->is(tok::kw_co_await))
3222
    nextToken();
3223
  if (HasParens && FormatTok->is(tok::l_paren)) {
3224
    // The type is only set for Verilog basically because we were afraid to
3225
    // change the existing behavior for loops. See the discussion on D121756 for
3226
    // details.
3227
    if (Style.isVerilog())
3228
      FormatTok->setFinalizedType(TT_ConditionLParen);
3229
    parseParens();
3230
  }
3231

3232
  if (Style.isVerilog()) {
3233
    // Event control.
3234
    parseVerilogSensitivityList();
3235
  } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) &&
3236
             Tokens->getPreviousToken()->is(tok::r_paren)) {
3237
    nextToken();
3238
    addUnwrappedLine();
3239
    return;
3240
  }
3241

3242
  handleAttributes();
3243
  parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
3244
}
3245

3246
void UnwrappedLineParser::parseDoWhile() {
3247
  assert(FormatTok->is(tok::kw_do) && "'do' expected");
3248
  nextToken();
3249

3250
  parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
3251

3252
  // FIXME: Add error handling.
3253
  if (FormatTok->isNot(tok::kw_while)) {
3254
    addUnwrappedLine();
3255
    return;
3256
  }
3257

3258
  FormatTok->setFinalizedType(TT_DoWhile);
3259

3260
  // If in Whitesmiths mode, the line with the while() needs to be indented
3261
  // to the same level as the block.
3262
  if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
3263
    ++Line->Level;
3264

3265
  nextToken();
3266
  parseStructuralElement();
3267
}
3268

3269
void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
3270
  nextToken();
3271
  unsigned OldLineLevel = Line->Level;
3272

3273
  if (LeftAlignLabel)
3274
    Line->Level = 0;
3275
  else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
3276
    --Line->Level;
3277

3278
  if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
3279
      FormatTok->is(tok::l_brace)) {
3280

3281
    CompoundStatementIndenter Indenter(this, Line->Level,
3282
                                       Style.BraceWrapping.AfterCaseLabel,
3283
                                       Style.BraceWrapping.IndentBraces);
3284
    parseBlock();
3285
    if (FormatTok->is(tok::kw_break)) {
3286
      if (Style.BraceWrapping.AfterControlStatement ==
3287
          FormatStyle::BWACS_Always) {
3288
        addUnwrappedLine();
3289
        if (!Style.IndentCaseBlocks &&
3290
            Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
3291
          ++Line->Level;
3292
        }
3293
      }
3294
      parseStructuralElement();
3295
    }
3296
    addUnwrappedLine();
3297
  } else {
3298
    if (FormatTok->is(tok::semi))
3299
      nextToken();
3300
    addUnwrappedLine();
3301
  }
3302
  Line->Level = OldLineLevel;
3303
  if (FormatTok->isNot(tok::l_brace)) {
3304
    parseStructuralElement();
3305
    addUnwrappedLine();
3306
  }
3307
}
3308

3309
void UnwrappedLineParser::parseCaseLabel() {
3310
  assert(FormatTok->is(tok::kw_case) && "'case' expected");
3311
  auto *Case = FormatTok;
3312

3313
  // FIXME: fix handling of complex expressions here.
3314
  do {
3315
    nextToken();
3316
    if (FormatTok->is(tok::colon)) {
3317
      FormatTok->setFinalizedType(TT_CaseLabelColon);
3318
      break;
3319
    }
3320
    if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) {
3321
      FormatTok->setFinalizedType(TT_CaseLabelArrow);
3322
      Case->setFinalizedType(TT_SwitchExpressionLabel);
3323
      break;
3324
    }
3325
  } while (!eof());
3326
  parseLabel();
3327
}
3328

3329
void UnwrappedLineParser::parseSwitch(bool IsExpr) {
3330
  assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3331
  nextToken();
3332
  if (FormatTok->is(tok::l_paren))
3333
    parseParens();
3334

3335
  keepAncestorBraces();
3336

3337
  if (FormatTok->is(tok::l_brace)) {
3338
    CompoundStatementIndenter Indenter(this, Style, Line->Level);
3339
    FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace
3340
                                       : TT_ControlStatementLBrace);
3341
    if (IsExpr)
3342
      parseChildBlock();
3343
    else
3344
      parseBlock();
3345
    setPreviousRBraceType(TT_ControlStatementRBrace);
3346
    if (!IsExpr)
3347
      addUnwrappedLine();
3348
  } else {
3349
    addUnwrappedLine();
3350
    ++Line->Level;
3351
    parseStructuralElement();
3352
    --Line->Level;
3353
  }
3354

3355
  if (Style.RemoveBracesLLVM)
3356
    NestedTooDeep.pop_back();
3357
}
3358

3359
// Operators that can follow a C variable.
3360
static bool isCOperatorFollowingVar(tok::TokenKind Kind) {
3361
  switch (Kind) {
3362
  case tok::ampamp:
3363
  case tok::ampequal:
3364
  case tok::arrow:
3365
  case tok::caret:
3366
  case tok::caretequal:
3367
  case tok::comma:
3368
  case tok::ellipsis:
3369
  case tok::equal:
3370
  case tok::equalequal:
3371
  case tok::exclaim:
3372
  case tok::exclaimequal:
3373
  case tok::greater:
3374
  case tok::greaterequal:
3375
  case tok::greatergreater:
3376
  case tok::greatergreaterequal:
3377
  case tok::l_paren:
3378
  case tok::l_square:
3379
  case tok::less:
3380
  case tok::lessequal:
3381
  case tok::lessless:
3382
  case tok::lesslessequal:
3383
  case tok::minus:
3384
  case tok::minusequal:
3385
  case tok::minusminus:
3386
  case tok::percent:
3387
  case tok::percentequal:
3388
  case tok::period:
3389
  case tok::pipe:
3390
  case tok::pipeequal:
3391
  case tok::pipepipe:
3392
  case tok::plus:
3393
  case tok::plusequal:
3394
  case tok::plusplus:
3395
  case tok::question:
3396
  case tok::r_brace:
3397
  case tok::r_paren:
3398
  case tok::r_square:
3399
  case tok::semi:
3400
  case tok::slash:
3401
  case tok::slashequal:
3402
  case tok::star:
3403
  case tok::starequal:
3404
    return true;
3405
  default:
3406
    return false;
3407
  }
3408
}
3409

3410
void UnwrappedLineParser::parseAccessSpecifier() {
3411
  FormatToken *AccessSpecifierCandidate = FormatTok;
3412
  nextToken();
3413
  // Understand Qt's slots.
3414
  if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3415
    nextToken();
3416
  // Otherwise, we don't know what it is, and we'd better keep the next token.
3417
  if (FormatTok->is(tok::colon)) {
3418
    nextToken();
3419
    addUnwrappedLine();
3420
  } else if (FormatTok->isNot(tok::coloncolon) &&
3421
             !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3422
    // Not a variable name nor namespace name.
3423
    addUnwrappedLine();
3424
  } else if (AccessSpecifierCandidate) {
3425
    // Consider the access specifier to be a C identifier.
3426
    AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3427
  }
3428
}
3429

3430
/// \brief Parses a requires, decides if it is a clause or an expression.
3431
/// \pre The current token has to be the requires keyword.
3432
/// \returns true if it parsed a clause.
3433
bool UnwrappedLineParser::parseRequires() {
3434
  assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3435
  auto RequiresToken = FormatTok;
3436

3437
  // We try to guess if it is a requires clause, or a requires expression. For
3438
  // that we first consume the keyword and check the next token.
3439
  nextToken();
3440

3441
  switch (FormatTok->Tok.getKind()) {
3442
  case tok::l_brace:
3443
    // This can only be an expression, never a clause.
3444
    parseRequiresExpression(RequiresToken);
3445
    return false;
3446
  case tok::l_paren:
3447
    // Clauses and expression can start with a paren, it's unclear what we have.
3448
    break;
3449
  default:
3450
    // All other tokens can only be a clause.
3451
    parseRequiresClause(RequiresToken);
3452
    return true;
3453
  }
3454

3455
  // Looking forward we would have to decide if there are function declaration
3456
  // like arguments to the requires expression:
3457
  // requires (T t) {
3458
  // Or there is a constraint expression for the requires clause:
3459
  // requires (C<T> && ...
3460

3461
  // But first let's look behind.
3462
  auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3463

3464
  if (!PreviousNonComment ||
3465
      PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3466
    // If there is no token, or an expression left brace, we are a requires
3467
    // clause within a requires expression.
3468
    parseRequiresClause(RequiresToken);
3469
    return true;
3470
  }
3471

3472
  switch (PreviousNonComment->Tok.getKind()) {
3473
  case tok::greater:
3474
  case tok::r_paren:
3475
  case tok::kw_noexcept:
3476
  case tok::kw_const:
3477
    // This is a requires clause.
3478
    parseRequiresClause(RequiresToken);
3479
    return true;
3480
  case tok::amp:
3481
  case tok::ampamp: {
3482
    // This can be either:
3483
    // if (... && requires (T t) ...)
3484
    // Or
3485
    // void member(...) && requires (C<T> ...
3486
    // We check the one token before that for a const:
3487
    // void member(...) const && requires (C<T> ...
3488
    auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3489
    if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3490
      parseRequiresClause(RequiresToken);
3491
      return true;
3492
    }
3493
    break;
3494
  }
3495
  default:
3496
    if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) {
3497
      // This is a requires clause.
3498
      parseRequiresClause(RequiresToken);
3499
      return true;
3500
    }
3501
    // It's an expression.
3502
    parseRequiresExpression(RequiresToken);
3503
    return false;
3504
  }
3505

3506
  // Now we look forward and try to check if the paren content is a parameter
3507
  // list. The parameters can be cv-qualified and contain references or
3508
  // pointers.
3509
  // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3510
  // of stuff: typename, const, *, &, &&, ::, identifiers.
3511

3512
  unsigned StoredPosition = Tokens->getPosition();
3513
  FormatToken *NextToken = Tokens->getNextToken();
3514
  int Lookahead = 0;
3515
  auto PeekNext = [&Lookahead, &NextToken, this] {
3516
    ++Lookahead;
3517
    NextToken = Tokens->getNextToken();
3518
  };
3519

3520
  bool FoundType = false;
3521
  bool LastWasColonColon = false;
3522
  int OpenAngles = 0;
3523

3524
  for (; Lookahead < 50; PeekNext()) {
3525
    switch (NextToken->Tok.getKind()) {
3526
    case tok::kw_volatile:
3527
    case tok::kw_const:
3528
    case tok::comma:
3529
      if (OpenAngles == 0) {
3530
        FormatTok = Tokens->setPosition(StoredPosition);
3531
        parseRequiresExpression(RequiresToken);
3532
        return false;
3533
      }
3534
      break;
3535
    case tok::eof:
3536
      // Break out of the loop.
3537
      Lookahead = 50;
3538
      break;
3539
    case tok::coloncolon:
3540
      LastWasColonColon = true;
3541
      break;
3542
    case tok::kw_decltype:
3543
    case tok::identifier:
3544
      if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3545
        FormatTok = Tokens->setPosition(StoredPosition);
3546
        parseRequiresExpression(RequiresToken);
3547
        return false;
3548
      }
3549
      FoundType = true;
3550
      LastWasColonColon = false;
3551
      break;
3552
    case tok::less:
3553
      ++OpenAngles;
3554
      break;
3555
    case tok::greater:
3556
      --OpenAngles;
3557
      break;
3558
    default:
3559
      if (NextToken->isTypeName(LangOpts)) {
3560
        FormatTok = Tokens->setPosition(StoredPosition);
3561
        parseRequiresExpression(RequiresToken);
3562
        return false;
3563
      }
3564
      break;
3565
    }
3566
  }
3567
  // This seems to be a complicated expression, just assume it's a clause.
3568
  FormatTok = Tokens->setPosition(StoredPosition);
3569
  parseRequiresClause(RequiresToken);
3570
  return true;
3571
}
3572

3573
/// \brief Parses a requires clause.
3574
/// \param RequiresToken The requires keyword token, which starts this clause.
3575
/// \pre We need to be on the next token after the requires keyword.
3576
/// \sa parseRequiresExpression
3577
///
3578
/// Returns if it either has finished parsing the clause, or it detects, that
3579
/// the clause is incorrect.
3580
void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3581
  assert(FormatTok->getPreviousNonComment() == RequiresToken);
3582
  assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3583

3584
  // If there is no previous token, we are within a requires expression,
3585
  // otherwise we will always have the template or function declaration in front
3586
  // of it.
3587
  bool InRequiresExpression =
3588
      !RequiresToken->Previous ||
3589
      RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3590

3591
  RequiresToken->setFinalizedType(InRequiresExpression
3592
                                      ? TT_RequiresClauseInARequiresExpression
3593
                                      : TT_RequiresClause);
3594

3595
  // NOTE: parseConstraintExpression is only ever called from this function.
3596
  // It could be inlined into here.
3597
  parseConstraintExpression();
3598

3599
  if (!InRequiresExpression)
3600
    FormatTok->Previous->ClosesRequiresClause = true;
3601
}
3602

3603
/// \brief Parses a requires expression.
3604
/// \param RequiresToken The requires keyword token, which starts this clause.
3605
/// \pre We need to be on the next token after the requires keyword.
3606
/// \sa parseRequiresClause
3607
///
3608
/// Returns if it either has finished parsing the expression, or it detects,
3609
/// that the expression is incorrect.
3610
void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3611
  assert(FormatTok->getPreviousNonComment() == RequiresToken);
3612
  assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3613

3614
  RequiresToken->setFinalizedType(TT_RequiresExpression);
3615

3616
  if (FormatTok->is(tok::l_paren)) {
3617
    FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3618
    parseParens();
3619
  }
3620

3621
  if (FormatTok->is(tok::l_brace)) {
3622
    FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3623
    parseChildBlock();
3624
  }
3625
}
3626

3627
/// \brief Parses a constraint expression.
3628
///
3629
/// This is the body of a requires clause. It returns, when the parsing is
3630
/// complete, or the expression is incorrect.
3631
void UnwrappedLineParser::parseConstraintExpression() {
3632
  // The special handling for lambdas is needed since tryToParseLambda() eats a
3633
  // token and if a requires expression is the last part of a requires clause
3634
  // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3635
  // not set on the correct token. Thus we need to be aware if we even expect a
3636
  // lambda to be possible.
3637
  // template <typename T> requires requires { ... } [[nodiscard]] ...;
3638
  bool LambdaNextTimeAllowed = true;
3639

3640
  // Within lambda declarations, it is permitted to put a requires clause after
3641
  // its template parameter list, which would place the requires clause right
3642
  // before the parentheses of the parameters of the lambda declaration. Thus,
3643
  // we track if we expect to see grouping parentheses at all.
3644
  // Without this check, `requires foo<T> (T t)` in the below example would be
3645
  // seen as the whole requires clause, accidentally eating the parameters of
3646
  // the lambda.
3647
  // [&]<typename T> requires foo<T> (T t) { ... };
3648
  bool TopLevelParensAllowed = true;
3649

3650
  do {
3651
    bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3652

3653
    switch (FormatTok->Tok.getKind()) {
3654
    case tok::kw_requires: {
3655
      auto RequiresToken = FormatTok;
3656
      nextToken();
3657
      parseRequiresExpression(RequiresToken);
3658
      break;
3659
    }
3660

3661
    case tok::l_paren:
3662
      if (!TopLevelParensAllowed)
3663
        return;
3664
      parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3665
      TopLevelParensAllowed = false;
3666
      break;
3667

3668
    case tok::l_square:
3669
      if (!LambdaThisTimeAllowed || !tryToParseLambda())
3670
        return;
3671
      break;
3672

3673
    case tok::kw_const:
3674
    case tok::semi:
3675
    case tok::kw_class:
3676
    case tok::kw_struct:
3677
    case tok::kw_union:
3678
      return;
3679

3680
    case tok::l_brace:
3681
      // Potential function body.
3682
      return;
3683

3684
    case tok::ampamp:
3685
    case tok::pipepipe:
3686
      FormatTok->setFinalizedType(TT_BinaryOperator);
3687
      nextToken();
3688
      LambdaNextTimeAllowed = true;
3689
      TopLevelParensAllowed = true;
3690
      break;
3691

3692
    case tok::comma:
3693
    case tok::comment:
3694
      LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3695
      nextToken();
3696
      break;
3697

3698
    case tok::kw_sizeof:
3699
    case tok::greater:
3700
    case tok::greaterequal:
3701
    case tok::greatergreater:
3702
    case tok::less:
3703
    case tok::lessequal:
3704
    case tok::lessless:
3705
    case tok::equalequal:
3706
    case tok::exclaim:
3707
    case tok::exclaimequal:
3708
    case tok::plus:
3709
    case tok::minus:
3710
    case tok::star:
3711
    case tok::slash:
3712
      LambdaNextTimeAllowed = true;
3713
      TopLevelParensAllowed = true;
3714
      // Just eat them.
3715
      nextToken();
3716
      break;
3717

3718
    case tok::numeric_constant:
3719
    case tok::coloncolon:
3720
    case tok::kw_true:
3721
    case tok::kw_false:
3722
      TopLevelParensAllowed = false;
3723
      // Just eat them.
3724
      nextToken();
3725
      break;
3726

3727
    case tok::kw_static_cast:
3728
    case tok::kw_const_cast:
3729
    case tok::kw_reinterpret_cast:
3730
    case tok::kw_dynamic_cast:
3731
      nextToken();
3732
      if (FormatTok->isNot(tok::less))
3733
        return;
3734

3735
      nextToken();
3736
      parseBracedList(/*IsAngleBracket=*/true);
3737
      break;
3738

3739
    default:
3740
      if (!FormatTok->Tok.getIdentifierInfo()) {
3741
        // Identifiers are part of the default case, we check for more then
3742
        // tok::identifier to handle builtin type traits.
3743
        return;
3744
      }
3745

3746
      // We need to differentiate identifiers for a template deduction guide,
3747
      // variables, or function return types (the constraint expression has
3748
      // ended before that), and basically all other cases. But it's easier to
3749
      // check the other way around.
3750
      assert(FormatTok->Previous);
3751
      switch (FormatTok->Previous->Tok.getKind()) {
3752
      case tok::coloncolon:  // Nested identifier.
3753
      case tok::ampamp:      // Start of a function or variable for the
3754
      case tok::pipepipe:    // constraint expression. (binary)
3755
      case tok::exclaim:     // The same as above, but unary.
3756
      case tok::kw_requires: // Initial identifier of a requires clause.
3757
      case tok::equal:       // Initial identifier of a concept declaration.
3758
        break;
3759
      default:
3760
        return;
3761
      }
3762

3763
      // Read identifier with optional template declaration.
3764
      nextToken();
3765
      if (FormatTok->is(tok::less)) {
3766
        nextToken();
3767
        parseBracedList(/*IsAngleBracket=*/true);
3768
      }
3769
      TopLevelParensAllowed = false;
3770
      break;
3771
    }
3772
  } while (!eof());
3773
}
3774

3775
bool UnwrappedLineParser::parseEnum() {
3776
  const FormatToken &InitialToken = *FormatTok;
3777

3778
  // Won't be 'enum' for NS_ENUMs.
3779
  if (FormatTok->is(tok::kw_enum))
3780
    nextToken();
3781

3782
  // In TypeScript, "enum" can also be used as property name, e.g. in interface
3783
  // declarations. An "enum" keyword followed by a colon would be a syntax
3784
  // error and thus assume it is just an identifier.
3785
  if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3786
    return false;
3787

3788
  // In protobuf, "enum" can be used as a field name.
3789
  if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3790
    return false;
3791

3792
  if (IsCpp) {
3793
    // Eat up enum class ...
3794
    if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3795
      nextToken();
3796
    while (FormatTok->is(tok::l_square))
3797
      if (!handleCppAttributes())
3798
        return false;
3799
  }
3800

3801
  while (FormatTok->Tok.getIdentifierInfo() ||
3802
         FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3803
                            tok::greater, tok::comma, tok::question,
3804
                            tok::l_square)) {
3805
    if (Style.isVerilog()) {
3806
      FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName);
3807
      nextToken();
3808
      // In Verilog the base type can have dimensions.
3809
      while (FormatTok->is(tok::l_square))
3810
        parseSquare();
3811
    } else {
3812
      nextToken();
3813
    }
3814
    // We can have macros or attributes in between 'enum' and the enum name.
3815
    if (FormatTok->is(tok::l_paren))
3816
      parseParens();
3817
    if (FormatTok->is(tok::identifier)) {
3818
      nextToken();
3819
      // If there are two identifiers in a row, this is likely an elaborate
3820
      // return type. In Java, this can be "implements", etc.
3821
      if (IsCpp && FormatTok->is(tok::identifier))
3822
        return false;
3823
    }
3824
  }
3825

3826
  // Just a declaration or something is wrong.
3827
  if (FormatTok->isNot(tok::l_brace))
3828
    return true;
3829
  FormatTok->setFinalizedType(TT_EnumLBrace);
3830
  FormatTok->setBlockKind(BK_Block);
3831

3832
  if (Style.Language == FormatStyle::LK_Java) {
3833
    // Java enums are different.
3834
    parseJavaEnumBody();
3835
    return true;
3836
  }
3837
  if (Style.Language == FormatStyle::LK_Proto) {
3838
    parseBlock(/*MustBeDeclaration=*/true);
3839
    return true;
3840
  }
3841

3842
  if (!Style.AllowShortEnumsOnASingleLine &&
3843
      ShouldBreakBeforeBrace(Style, InitialToken)) {
3844
    addUnwrappedLine();
3845
  }
3846
  // Parse enum body.
3847
  nextToken();
3848
  if (!Style.AllowShortEnumsOnASingleLine) {
3849
    addUnwrappedLine();
3850
    Line->Level += 1;
3851
  }
3852
  bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true);
3853
  if (!Style.AllowShortEnumsOnASingleLine)
3854
    Line->Level -= 1;
3855
  if (HasError) {
3856
    if (FormatTok->is(tok::semi))
3857
      nextToken();
3858
    addUnwrappedLine();
3859
  }
3860
  setPreviousRBraceType(TT_EnumRBrace);
3861
  return true;
3862

3863
  // There is no addUnwrappedLine() here so that we fall through to parsing a
3864
  // structural element afterwards. Thus, in "enum A {} n, m;",
3865
  // "} n, m;" will end up in one unwrapped line.
3866
}
3867

3868
bool UnwrappedLineParser::parseStructLike() {
3869
  // parseRecord falls through and does not yet add an unwrapped line as a
3870
  // record declaration or definition can start a structural element.
3871
  parseRecord();
3872
  // This does not apply to Java, JavaScript and C#.
3873
  if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3874
      Style.isCSharp()) {
3875
    if (FormatTok->is(tok::semi))
3876
      nextToken();
3877
    addUnwrappedLine();
3878
    return true;
3879
  }
3880
  return false;
3881
}
3882

3883
namespace {
3884
// A class used to set and restore the Token position when peeking
3885
// ahead in the token source.
3886
class ScopedTokenPosition {
3887
  unsigned StoredPosition;
3888
  FormatTokenSource *Tokens;
3889

3890
public:
3891
  ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3892
    assert(Tokens && "Tokens expected to not be null");
3893
    StoredPosition = Tokens->getPosition();
3894
  }
3895

3896
  ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3897
};
3898
} // namespace
3899

3900
// Look to see if we have [[ by looking ahead, if
3901
// its not then rewind to the original position.
3902
bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3903
  ScopedTokenPosition AutoPosition(Tokens);
3904
  FormatToken *Tok = Tokens->getNextToken();
3905
  // We already read the first [ check for the second.
3906
  if (Tok->isNot(tok::l_square))
3907
    return false;
3908
  // Double check that the attribute is just something
3909
  // fairly simple.
3910
  while (Tok->isNot(tok::eof)) {
3911
    if (Tok->is(tok::r_square))
3912
      break;
3913
    Tok = Tokens->getNextToken();
3914
  }
3915
  if (Tok->is(tok::eof))
3916
    return false;
3917
  Tok = Tokens->getNextToken();
3918
  if (Tok->isNot(tok::r_square))
3919
    return false;
3920
  Tok = Tokens->getNextToken();
3921
  if (Tok->is(tok::semi))
3922
    return false;
3923
  return true;
3924
}
3925

3926
void UnwrappedLineParser::parseJavaEnumBody() {
3927
  assert(FormatTok->is(tok::l_brace));
3928
  const FormatToken *OpeningBrace = FormatTok;
3929

3930
  // Determine whether the enum is simple, i.e. does not have a semicolon or
3931
  // constants with class bodies. Simple enums can be formatted like braced
3932
  // lists, contracted to a single line, etc.
3933
  unsigned StoredPosition = Tokens->getPosition();
3934
  bool IsSimple = true;
3935
  FormatToken *Tok = Tokens->getNextToken();
3936
  while (Tok->isNot(tok::eof)) {
3937
    if (Tok->is(tok::r_brace))
3938
      break;
3939
    if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3940
      IsSimple = false;
3941
      break;
3942
    }
3943
    // FIXME: This will also mark enums with braces in the arguments to enum
3944
    // constants as "not simple". This is probably fine in practice, though.
3945
    Tok = Tokens->getNextToken();
3946
  }
3947
  FormatTok = Tokens->setPosition(StoredPosition);
3948

3949
  if (IsSimple) {
3950
    nextToken();
3951
    parseBracedList();
3952
    addUnwrappedLine();
3953
    return;
3954
  }
3955

3956
  // Parse the body of a more complex enum.
3957
  // First add a line for everything up to the "{".
3958
  nextToken();
3959
  addUnwrappedLine();
3960
  ++Line->Level;
3961

3962
  // Parse the enum constants.
3963
  while (!eof()) {
3964
    if (FormatTok->is(tok::l_brace)) {
3965
      // Parse the constant's class body.
3966
      parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3967
                 /*MunchSemi=*/false);
3968
    } else if (FormatTok->is(tok::l_paren)) {
3969
      parseParens();
3970
    } else if (FormatTok->is(tok::comma)) {
3971
      nextToken();
3972
      addUnwrappedLine();
3973
    } else if (FormatTok->is(tok::semi)) {
3974
      nextToken();
3975
      addUnwrappedLine();
3976
      break;
3977
    } else if (FormatTok->is(tok::r_brace)) {
3978
      addUnwrappedLine();
3979
      break;
3980
    } else {
3981
      nextToken();
3982
    }
3983
  }
3984

3985
  // Parse the class body after the enum's ";" if any.
3986
  parseLevel(OpeningBrace);
3987
  nextToken();
3988
  --Line->Level;
3989
  addUnwrappedLine();
3990
}
3991

3992
void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3993
  const FormatToken &InitialToken = *FormatTok;
3994
  nextToken();
3995

3996
  const FormatToken *ClassName = nullptr;
3997
  bool IsDerived = false;
3998
  auto IsNonMacroIdentifier = [](const FormatToken *Tok) {
3999
    return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper();
4000
  };
4001
  // JavaScript/TypeScript supports anonymous classes like:
4002
  // a = class extends foo { }
4003
  bool JSPastExtendsOrImplements = false;
4004
  // The actual identifier can be a nested name specifier, and in macros
4005
  // it is often token-pasted.
4006
  // An [[attribute]] can be before the identifier.
4007
  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
4008
                            tok::kw_alignas, tok::l_square) ||
4009
         FormatTok->isAttribute() ||
4010
         ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
4011
          FormatTok->isOneOf(tok::period, tok::comma))) {
4012
    if (Style.isJavaScript() &&
4013
        FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
4014
      JSPastExtendsOrImplements = true;
4015
      // JavaScript/TypeScript supports inline object types in
4016
      // extends/implements positions:
4017
      //     class Foo implements {bar: number} { }
4018
      nextToken();
4019
      if (FormatTok->is(tok::l_brace)) {
4020
        tryToParseBracedList();
4021
        continue;
4022
      }
4023
    }
4024
    if (FormatTok->is(tok::l_square) && handleCppAttributes())
4025
      continue;
4026
    const auto *Previous = FormatTok;
4027
    nextToken();
4028
    switch (FormatTok->Tok.getKind()) {
4029
    case tok::l_paren:
4030
      // We can have macros in between 'class' and the class name.
4031
      if (!IsNonMacroIdentifier(Previous) ||
4032
          // e.g. `struct macro(a) S { int i; };`
4033
          Previous->Previous == &InitialToken) {
4034
        parseParens();
4035
      }
4036
      break;
4037
    case tok::coloncolon:
4038
    case tok::hashhash:
4039
      break;
4040
    default:
4041
      if (!JSPastExtendsOrImplements && !ClassName &&
4042
          Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) {
4043
        ClassName = Previous;
4044
      }
4045
    }
4046
  }
4047

4048
  auto IsListInitialization = [&] {
4049
    if (!ClassName || IsDerived)
4050
      return false;
4051
    assert(FormatTok->is(tok::l_brace));
4052
    const auto *Prev = FormatTok->getPreviousNonComment();
4053
    assert(Prev);
4054
    return Prev != ClassName && Prev->is(tok::identifier) &&
4055
           Prev->isNot(Keywords.kw_final) && tryToParseBracedList();
4056
  };
4057

4058
  if (FormatTok->isOneOf(tok::colon, tok::less)) {
4059
    int AngleNestingLevel = 0;
4060
    do {
4061
      if (FormatTok->is(tok::less))
4062
        ++AngleNestingLevel;
4063
      else if (FormatTok->is(tok::greater))
4064
        --AngleNestingLevel;
4065

4066
      if (AngleNestingLevel == 0) {
4067
        if (FormatTok->is(tok::colon)) {
4068
          IsDerived = true;
4069
        } else if (FormatTok->is(tok::identifier) &&
4070
                   FormatTok->Previous->is(tok::coloncolon)) {
4071
          ClassName = FormatTok;
4072
        } else if (FormatTok->is(tok::l_paren) &&
4073
                   IsNonMacroIdentifier(FormatTok->Previous)) {
4074
          break;
4075
        }
4076
      }
4077
      if (FormatTok->is(tok::l_brace)) {
4078
        if (AngleNestingLevel == 0 && IsListInitialization())
4079
          return;
4080
        calculateBraceTypes(/*ExpectClassBody=*/true);
4081
        if (!tryToParseBracedList())
4082
          break;
4083
      }
4084
      if (FormatTok->is(tok::l_square)) {
4085
        FormatToken *Previous = FormatTok->Previous;
4086
        if (!Previous || (Previous->isNot(tok::r_paren) &&
4087
                          !Previous->isTypeOrIdentifier(LangOpts))) {
4088
          // Don't try parsing a lambda if we had a closing parenthesis before,
4089
          // it was probably a pointer to an array: int (*)[].
4090
          if (!tryToParseLambda())
4091
            continue;
4092
        } else {
4093
          parseSquare();
4094
          continue;
4095
        }
4096
      }
4097
      if (FormatTok->is(tok::semi))
4098
        return;
4099
      if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
4100
        addUnwrappedLine();
4101
        nextToken();
4102
        parseCSharpGenericTypeConstraint();
4103
        break;
4104
      }
4105
      nextToken();
4106
    } while (!eof());
4107
  }
4108

4109
  auto GetBraceTypes =
4110
      [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> {
4111
    switch (RecordTok.Tok.getKind()) {
4112
    case tok::kw_class:
4113
      return {TT_ClassLBrace, TT_ClassRBrace};
4114
    case tok::kw_struct:
4115
      return {TT_StructLBrace, TT_StructRBrace};
4116
    case tok::kw_union:
4117
      return {TT_UnionLBrace, TT_UnionRBrace};
4118
    default:
4119
      // Useful for e.g. interface.
4120
      return {TT_RecordLBrace, TT_RecordRBrace};
4121
    }
4122
  };
4123
  if (FormatTok->is(tok::l_brace)) {
4124
    if (IsListInitialization())
4125
      return;
4126
    auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken);
4127
    FormatTok->setFinalizedType(OpenBraceType);
4128
    if (ParseAsExpr) {
4129
      parseChildBlock();
4130
    } else {
4131
      if (ShouldBreakBeforeBrace(Style, InitialToken))
4132
        addUnwrappedLine();
4133

4134
      unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
4135
      parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
4136
    }
4137
    setPreviousRBraceType(ClosingBraceType);
4138
  }
4139
  // There is no addUnwrappedLine() here so that we fall through to parsing a
4140
  // structural element afterwards. Thus, in "class A {} n, m;",
4141
  // "} n, m;" will end up in one unwrapped line.
4142
}
4143

4144
void UnwrappedLineParser::parseObjCMethod() {
4145
  assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
4146
         "'(' or identifier expected.");
4147
  do {
4148
    if (FormatTok->is(tok::semi)) {
4149
      nextToken();
4150
      addUnwrappedLine();
4151
      return;
4152
    } else if (FormatTok->is(tok::l_brace)) {
4153
      if (Style.BraceWrapping.AfterFunction)
4154
        addUnwrappedLine();
4155
      parseBlock();
4156
      addUnwrappedLine();
4157
      return;
4158
    } else {
4159
      nextToken();
4160
    }
4161
  } while (!eof());
4162
}
4163

4164
void UnwrappedLineParser::parseObjCProtocolList() {
4165
  assert(FormatTok->is(tok::less) && "'<' expected.");
4166
  do {
4167
    nextToken();
4168
    // Early exit in case someone forgot a close angle.
4169
    if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4170
        FormatTok->isObjCAtKeyword(tok::objc_end)) {
4171
      return;
4172
    }
4173
  } while (!eof() && FormatTok->isNot(tok::greater));
4174
  nextToken(); // Skip '>'.
4175
}
4176

4177
void UnwrappedLineParser::parseObjCUntilAtEnd() {
4178
  do {
4179
    if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
4180
      nextToken();
4181
      addUnwrappedLine();
4182
      break;
4183
    }
4184
    if (FormatTok->is(tok::l_brace)) {
4185
      parseBlock();
4186
      // In ObjC interfaces, nothing should be following the "}".
4187
      addUnwrappedLine();
4188
    } else if (FormatTok->is(tok::r_brace)) {
4189
      // Ignore stray "}". parseStructuralElement doesn't consume them.
4190
      nextToken();
4191
      addUnwrappedLine();
4192
    } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
4193
      nextToken();
4194
      parseObjCMethod();
4195
    } else {
4196
      parseStructuralElement();
4197
    }
4198
  } while (!eof());
4199
}
4200

4201
void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
4202
  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
4203
         FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
4204
  nextToken();
4205
  nextToken(); // interface name
4206

4207
  // @interface can be followed by a lightweight generic
4208
  // specialization list, then either a base class or a category.
4209
  if (FormatTok->is(tok::less))
4210
    parseObjCLightweightGenerics();
4211
  if (FormatTok->is(tok::colon)) {
4212
    nextToken();
4213
    nextToken(); // base class name
4214
    // The base class can also have lightweight generics applied to it.
4215
    if (FormatTok->is(tok::less))
4216
      parseObjCLightweightGenerics();
4217
  } else if (FormatTok->is(tok::l_paren)) {
4218
    // Skip category, if present.
4219
    parseParens();
4220
  }
4221

4222
  if (FormatTok->is(tok::less))
4223
    parseObjCProtocolList();
4224

4225
  if (FormatTok->is(tok::l_brace)) {
4226
    if (Style.BraceWrapping.AfterObjCDeclaration)
4227
      addUnwrappedLine();
4228
    parseBlock(/*MustBeDeclaration=*/true);
4229
  }
4230

4231
  // With instance variables, this puts '}' on its own line.  Without instance
4232
  // variables, this ends the @interface line.
4233
  addUnwrappedLine();
4234

4235
  parseObjCUntilAtEnd();
4236
}
4237

4238
void UnwrappedLineParser::parseObjCLightweightGenerics() {
4239
  assert(FormatTok->is(tok::less));
4240
  // Unlike protocol lists, generic parameterizations support
4241
  // nested angles:
4242
  //
4243
  // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
4244
  //     NSObject <NSCopying, NSSecureCoding>
4245
  //
4246
  // so we need to count how many open angles we have left.
4247
  unsigned NumOpenAngles = 1;
4248
  do {
4249
    nextToken();
4250
    // Early exit in case someone forgot a close angle.
4251
    if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
4252
        FormatTok->isObjCAtKeyword(tok::objc_end)) {
4253
      break;
4254
    }
4255
    if (FormatTok->is(tok::less)) {
4256
      ++NumOpenAngles;
4257
    } else if (FormatTok->is(tok::greater)) {
4258
      assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
4259
      --NumOpenAngles;
4260
    }
4261
  } while (!eof() && NumOpenAngles != 0);
4262
  nextToken(); // Skip '>'.
4263
}
4264

4265
// Returns true for the declaration/definition form of @protocol,
4266
// false for the expression form.
4267
bool UnwrappedLineParser::parseObjCProtocol() {
4268
  assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
4269
  nextToken();
4270

4271
  if (FormatTok->is(tok::l_paren)) {
4272
    // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
4273
    return false;
4274
  }
4275

4276
  // The definition/declaration form,
4277
  // @protocol Foo
4278
  // - (int)someMethod;
4279
  // @end
4280

4281
  nextToken(); // protocol name
4282

4283
  if (FormatTok->is(tok::less))
4284
    parseObjCProtocolList();
4285

4286
  // Check for protocol declaration.
4287
  if (FormatTok->is(tok::semi)) {
4288
    nextToken();
4289
    addUnwrappedLine();
4290
    return true;
4291
  }
4292

4293
  addUnwrappedLine();
4294
  parseObjCUntilAtEnd();
4295
  return true;
4296
}
4297

4298
void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
4299
  bool IsImport = FormatTok->is(Keywords.kw_import);
4300
  assert(IsImport || FormatTok->is(tok::kw_export));
4301
  nextToken();
4302

4303
  // Consume the "default" in "export default class/function".
4304
  if (FormatTok->is(tok::kw_default))
4305
    nextToken();
4306

4307
  // Consume "async function", "function" and "default function", so that these
4308
  // get parsed as free-standing JS functions, i.e. do not require a trailing
4309
  // semicolon.
4310
  if (FormatTok->is(Keywords.kw_async))
4311
    nextToken();
4312
  if (FormatTok->is(Keywords.kw_function)) {
4313
    nextToken();
4314
    return;
4315
  }
4316

4317
  // For imports, `export *`, `export {...}`, consume the rest of the line up
4318
  // to the terminating `;`. For everything else, just return and continue
4319
  // parsing the structural element, i.e. the declaration or expression for
4320
  // `export default`.
4321
  if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
4322
      !FormatTok->isStringLiteral() &&
4323
      !(FormatTok->is(Keywords.kw_type) &&
4324
        Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) {
4325
    return;
4326
  }
4327

4328
  while (!eof()) {
4329
    if (FormatTok->is(tok::semi))
4330
      return;
4331
    if (Line->Tokens.empty()) {
4332
      // Common issue: Automatic Semicolon Insertion wrapped the line, so the
4333
      // import statement should terminate.
4334
      return;
4335
    }
4336
    if (FormatTok->is(tok::l_brace)) {
4337
      FormatTok->setBlockKind(BK_Block);
4338
      nextToken();
4339
      parseBracedList();
4340
    } else {
4341
      nextToken();
4342
    }
4343
  }
4344
}
4345

4346
void UnwrappedLineParser::parseStatementMacro() {
4347
  nextToken();
4348
  if (FormatTok->is(tok::l_paren))
4349
    parseParens();
4350
  if (FormatTok->is(tok::semi))
4351
    nextToken();
4352
  addUnwrappedLine();
4353
}
4354

4355
void UnwrappedLineParser::parseVerilogHierarchyIdentifier() {
4356
  // consume things like a::`b.c[d:e] or a::*
4357
  while (true) {
4358
    if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar,
4359
                           tok::coloncolon, tok::hash) ||
4360
        Keywords.isVerilogIdentifier(*FormatTok)) {
4361
      nextToken();
4362
    } else if (FormatTok->is(tok::l_square)) {
4363
      parseSquare();
4364
    } else {
4365
      break;
4366
    }
4367
  }
4368
}
4369

4370
void UnwrappedLineParser::parseVerilogSensitivityList() {
4371
  if (FormatTok->isNot(tok::at))
4372
    return;
4373
  nextToken();
4374
  // A block event expression has 2 at signs.
4375
  if (FormatTok->is(tok::at))
4376
    nextToken();
4377
  switch (FormatTok->Tok.getKind()) {
4378
  case tok::star:
4379
    nextToken();
4380
    break;
4381
  case tok::l_paren:
4382
    parseParens();
4383
    break;
4384
  default:
4385
    parseVerilogHierarchyIdentifier();
4386
    break;
4387
  }
4388
}
4389

4390
unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() {
4391
  unsigned AddLevels = 0;
4392

4393
  if (FormatTok->is(Keywords.kw_clocking)) {
4394
    nextToken();
4395
    if (Keywords.isVerilogIdentifier(*FormatTok))
4396
      nextToken();
4397
    parseVerilogSensitivityList();
4398
    if (FormatTok->is(tok::semi))
4399
      nextToken();
4400
  } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex,
4401
                                Keywords.kw_casez, Keywords.kw_randcase,
4402
                                Keywords.kw_randsequence)) {
4403
    if (Style.IndentCaseLabels)
4404
      AddLevels++;
4405
    nextToken();
4406
    if (FormatTok->is(tok::l_paren)) {
4407
      FormatTok->setFinalizedType(TT_ConditionLParen);
4408
      parseParens();
4409
    }
4410
    if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches))
4411
      nextToken();
4412
    // The case header has no semicolon.
4413
  } else {
4414
    // "module" etc.
4415
    nextToken();
4416
    // all the words like the name of the module and specifiers like
4417
    // "automatic" and the width of function return type
4418
    while (true) {
4419
      if (FormatTok->is(tok::l_square)) {
4420
        auto Prev = FormatTok->getPreviousNonComment();
4421
        if (Prev && Keywords.isVerilogIdentifier(*Prev))
4422
          Prev->setFinalizedType(TT_VerilogDimensionedTypeName);
4423
        parseSquare();
4424
      } else if (Keywords.isVerilogIdentifier(*FormatTok) ||
4425
                 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) {
4426
        nextToken();
4427
      } else {
4428
        break;
4429
      }
4430
    }
4431

4432
    auto NewLine = [this]() {
4433
      addUnwrappedLine();
4434
      Line->IsContinuation = true;
4435
    };
4436

4437
    // package imports
4438
    while (FormatTok->is(Keywords.kw_import)) {
4439
      NewLine();
4440
      nextToken();
4441
      parseVerilogHierarchyIdentifier();
4442
      if (FormatTok->is(tok::semi))
4443
        nextToken();
4444
    }
4445

4446
    // parameters and ports
4447
    if (FormatTok->is(Keywords.kw_verilogHash)) {
4448
      NewLine();
4449
      nextToken();
4450
      if (FormatTok->is(tok::l_paren)) {
4451
        FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4452
        parseParens();
4453
      }
4454
    }
4455
    if (FormatTok->is(tok::l_paren)) {
4456
      NewLine();
4457
      FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen);
4458
      parseParens();
4459
    }
4460

4461
    // extends and implements
4462
    if (FormatTok->is(Keywords.kw_extends)) {
4463
      NewLine();
4464
      nextToken();
4465
      parseVerilogHierarchyIdentifier();
4466
      if (FormatTok->is(tok::l_paren))
4467
        parseParens();
4468
    }
4469
    if (FormatTok->is(Keywords.kw_implements)) {
4470
      NewLine();
4471
      do {
4472
        nextToken();
4473
        parseVerilogHierarchyIdentifier();
4474
      } while (FormatTok->is(tok::comma));
4475
    }
4476

4477
    // Coverage event for cover groups.
4478
    if (FormatTok->is(tok::at)) {
4479
      NewLine();
4480
      parseVerilogSensitivityList();
4481
    }
4482

4483
    if (FormatTok->is(tok::semi))
4484
      nextToken(/*LevelDifference=*/1);
4485
    addUnwrappedLine();
4486
  }
4487

4488
  return AddLevels;
4489
}
4490

4491
void UnwrappedLineParser::parseVerilogTable() {
4492
  assert(FormatTok->is(Keywords.kw_table));
4493
  nextToken(/*LevelDifference=*/1);
4494
  addUnwrappedLine();
4495

4496
  auto InitialLevel = Line->Level++;
4497
  while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) {
4498
    FormatToken *Tok = FormatTok;
4499
    nextToken();
4500
    if (Tok->is(tok::semi))
4501
      addUnwrappedLine();
4502
    else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus))
4503
      Tok->setFinalizedType(TT_VerilogTableItem);
4504
  }
4505
  Line->Level = InitialLevel;
4506
  nextToken(/*LevelDifference=*/-1);
4507
  addUnwrappedLine();
4508
}
4509

4510
void UnwrappedLineParser::parseVerilogCaseLabel() {
4511
  // The label will get unindented in AnnotatingParser. If there are no leading
4512
  // spaces, indent the rest here so that things inside the block will be
4513
  // indented relative to things outside. We don't use parseLabel because we
4514
  // don't know whether this colon is a label or a ternary expression at this
4515
  // point.
4516
  auto OrigLevel = Line->Level;
4517
  auto FirstLine = CurrentLines->size();
4518
  if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1))
4519
    ++Line->Level;
4520
  else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok))
4521
    --Line->Level;
4522
  parseStructuralElement();
4523
  // Restore the indentation in both the new line and the line that has the
4524
  // label.
4525
  if (CurrentLines->size() > FirstLine)
4526
    (*CurrentLines)[FirstLine].Level = OrigLevel;
4527
  Line->Level = OrigLevel;
4528
}
4529

4530
bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
4531
  for (const auto &N : Line.Tokens) {
4532
    if (N.Tok->MacroCtx)
4533
      return true;
4534
    for (const UnwrappedLine &Child : N.Children)
4535
      if (containsExpansion(Child))
4536
        return true;
4537
  }
4538
  return false;
4539
}
4540

4541
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4542
  if (Line->Tokens.empty())
4543
    return;
4544
  LLVM_DEBUG({
4545
    if (!parsingPPDirective()) {
4546
      llvm::dbgs() << "Adding unwrapped line:\n";
4547
      printDebugInfo(*Line);
4548
    }
4549
  });
4550

4551
  // If this line closes a block when in Whitesmiths mode, remember that
4552
  // information so that the level can be decreased after the line is added.
4553
  // This has to happen after the addition of the line since the line itself
4554
  // needs to be indented.
4555
  bool ClosesWhitesmithsBlock =
4556
      Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4557
      Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4558

4559
  // If the current line was expanded from a macro call, we use it to
4560
  // reconstruct an unwrapped line from the structure of the expanded unwrapped
4561
  // line and the unexpanded token stream.
4562
  if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
4563
    if (!Reconstruct)
4564
      Reconstruct.emplace(Line->Level, Unexpanded);
4565
    Reconstruct->addLine(*Line);
4566

4567
    // While the reconstructed unexpanded lines are stored in the normal
4568
    // flow of lines, the expanded lines are stored on the side to be analyzed
4569
    // in an extra step.
4570
    CurrentExpandedLines.push_back(std::move(*Line));
4571

4572
    if (Reconstruct->finished()) {
4573
      UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
4574
      assert(!Reconstructed.Tokens.empty() &&
4575
             "Reconstructed must at least contain the macro identifier.");
4576
      assert(!parsingPPDirective());
4577
      LLVM_DEBUG({
4578
        llvm::dbgs() << "Adding unexpanded line:\n";
4579
        printDebugInfo(Reconstructed);
4580
      });
4581
      ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
4582
      Lines.push_back(std::move(Reconstructed));
4583
      CurrentExpandedLines.clear();
4584
      Reconstruct.reset();
4585
    }
4586
  } else {
4587
    // At the top level we only get here when no unexpansion is going on, or
4588
    // when conditional formatting led to unfinished macro reconstructions.
4589
    assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
4590
    CurrentLines->push_back(std::move(*Line));
4591
  }
4592
  Line->Tokens.clear();
4593
  Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4594
  Line->FirstStartColumn = 0;
4595
  Line->IsContinuation = false;
4596
  Line->SeenDecltypeAuto = false;
4597

4598
  if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4599
    --Line->Level;
4600
  if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
4601
    CurrentLines->append(
4602
        std::make_move_iterator(PreprocessorDirectives.begin()),
4603
        std::make_move_iterator(PreprocessorDirectives.end()));
4604
    PreprocessorDirectives.clear();
4605
  }
4606
  // Disconnect the current token from the last token on the previous line.
4607
  FormatTok->Previous = nullptr;
4608
}
4609

4610
bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4611

4612
bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4613
  return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4614
         FormatTok.NewlinesBefore > 0;
4615
}
4616

4617
// Checks if \p FormatTok is a line comment that continues the line comment
4618
// section on \p Line.
4619
static bool
4620
continuesLineCommentSection(const FormatToken &FormatTok,
4621
                            const UnwrappedLine &Line,
4622
                            const llvm::Regex &CommentPragmasRegex) {
4623
  if (Line.Tokens.empty())
4624
    return false;
4625

4626
  StringRef IndentContent = FormatTok.TokenText;
4627
  if (FormatTok.TokenText.starts_with("//") ||
4628
      FormatTok.TokenText.starts_with("/*")) {
4629
    IndentContent = FormatTok.TokenText.substr(2);
4630
  }
4631
  if (CommentPragmasRegex.match(IndentContent))
4632
    return false;
4633

4634
  // If Line starts with a line comment, then FormatTok continues the comment
4635
  // section if its original column is greater or equal to the original start
4636
  // column of the line.
4637
  //
4638
  // Define the min column token of a line as follows: if a line ends in '{' or
4639
  // contains a '{' followed by a line comment, then the min column token is
4640
  // that '{'. Otherwise, the min column token of the line is the first token of
4641
  // the line.
4642
  //
4643
  // If Line starts with a token other than a line comment, then FormatTok
4644
  // continues the comment section if its original column is greater than the
4645
  // original start column of the min column token of the line.
4646
  //
4647
  // For example, the second line comment continues the first in these cases:
4648
  //
4649
  // // first line
4650
  // // second line
4651
  //
4652
  // and:
4653
  //
4654
  // // first line
4655
  //  // second line
4656
  //
4657
  // and:
4658
  //
4659
  // int i; // first line
4660
  //  // second line
4661
  //
4662
  // and:
4663
  //
4664
  // do { // first line
4665
  //      // second line
4666
  //   int i;
4667
  // } while (true);
4668
  //
4669
  // and:
4670
  //
4671
  // enum {
4672
  //   a, // first line
4673
  //    // second line
4674
  //   b
4675
  // };
4676
  //
4677
  // The second line comment doesn't continue the first in these cases:
4678
  //
4679
  //   // first line
4680
  //  // second line
4681
  //
4682
  // and:
4683
  //
4684
  // int i; // first line
4685
  // // second line
4686
  //
4687
  // and:
4688
  //
4689
  // do { // first line
4690
  //   // second line
4691
  //   int i;
4692
  // } while (true);
4693
  //
4694
  // and:
4695
  //
4696
  // enum {
4697
  //   a, // first line
4698
  //   // second line
4699
  // };
4700
  const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4701

4702
  // Scan for '{//'. If found, use the column of '{' as a min column for line
4703
  // comment section continuation.
4704
  const FormatToken *PreviousToken = nullptr;
4705
  for (const UnwrappedLineNode &Node : Line.Tokens) {
4706
    if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4707
        isLineComment(*Node.Tok)) {
4708
      MinColumnToken = PreviousToken;
4709
      break;
4710
    }
4711
    PreviousToken = Node.Tok;
4712

4713
    // Grab the last newline preceding a token in this unwrapped line.
4714
    if (Node.Tok->NewlinesBefore > 0)
4715
      MinColumnToken = Node.Tok;
4716
  }
4717
  if (PreviousToken && PreviousToken->is(tok::l_brace))
4718
    MinColumnToken = PreviousToken;
4719

4720
  return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4721
                              MinColumnToken);
4722
}
4723

4724
void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4725
  bool JustComments = Line->Tokens.empty();
4726
  for (FormatToken *Tok : CommentsBeforeNextToken) {
4727
    // Line comments that belong to the same line comment section are put on the
4728
    // same line since later we might want to reflow content between them.
4729
    // Additional fine-grained breaking of line comment sections is controlled
4730
    // by the class BreakableLineCommentSection in case it is desirable to keep
4731
    // several line comment sections in the same unwrapped line.
4732
    //
4733
    // FIXME: Consider putting separate line comment sections as children to the
4734
    // unwrapped line instead.
4735
    Tok->ContinuesLineCommentSection =
4736
        continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4737
    if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4738
      addUnwrappedLine();
4739
    pushToken(Tok);
4740
  }
4741
  if (NewlineBeforeNext && JustComments)
4742
    addUnwrappedLine();
4743
  CommentsBeforeNextToken.clear();
4744
}
4745

4746
void UnwrappedLineParser::nextToken(int LevelDifference) {
4747
  if (eof())
4748
    return;
4749
  flushComments(isOnNewLine(*FormatTok));
4750
  pushToken(FormatTok);
4751
  FormatToken *Previous = FormatTok;
4752
  if (!Style.isJavaScript())
4753
    readToken(LevelDifference);
4754
  else
4755
    readTokenWithJavaScriptASI();
4756
  FormatTok->Previous = Previous;
4757
  if (Style.isVerilog()) {
4758
    // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4759
    // keywords like `begin`, we can't treat them the same as left braces
4760
    // because some contexts require one of them.  For example structs use
4761
    // braces and if blocks use keywords, and a left brace can occur in an if
4762
    // statement, but it is not a block.  For keywords like `end`, we simply
4763
    // treat them the same as right braces.
4764
    if (Keywords.isVerilogEnd(*FormatTok))
4765
      FormatTok->Tok.setKind(tok::r_brace);
4766
  }
4767
}
4768

4769
void UnwrappedLineParser::distributeComments(
4770
    const SmallVectorImpl<FormatToken *> &Comments,
4771
    const FormatToken *NextTok) {
4772
  // Whether or not a line comment token continues a line is controlled by
4773
  // the method continuesLineCommentSection, with the following caveat:
4774
  //
4775
  // Define a trail of Comments to be a nonempty proper postfix of Comments such
4776
  // that each comment line from the trail is aligned with the next token, if
4777
  // the next token exists. If a trail exists, the beginning of the maximal
4778
  // trail is marked as a start of a new comment section.
4779
  //
4780
  // For example in this code:
4781
  //
4782
  // int a; // line about a
4783
  //   // line 1 about b
4784
  //   // line 2 about b
4785
  //   int b;
4786
  //
4787
  // the two lines about b form a maximal trail, so there are two sections, the
4788
  // first one consisting of the single comment "// line about a" and the
4789
  // second one consisting of the next two comments.
4790
  if (Comments.empty())
4791
    return;
4792
  bool ShouldPushCommentsInCurrentLine = true;
4793
  bool HasTrailAlignedWithNextToken = false;
4794
  unsigned StartOfTrailAlignedWithNextToken = 0;
4795
  if (NextTok) {
4796
    // We are skipping the first element intentionally.
4797
    for (unsigned i = Comments.size() - 1; i > 0; --i) {
4798
      if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4799
        HasTrailAlignedWithNextToken = true;
4800
        StartOfTrailAlignedWithNextToken = i;
4801
      }
4802
    }
4803
  }
4804
  for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4805
    FormatToken *FormatTok = Comments[i];
4806
    if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4807
      FormatTok->ContinuesLineCommentSection = false;
4808
    } else {
4809
      FormatTok->ContinuesLineCommentSection =
4810
          continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4811
    }
4812
    if (!FormatTok->ContinuesLineCommentSection &&
4813
        (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4814
      ShouldPushCommentsInCurrentLine = false;
4815
    }
4816
    if (ShouldPushCommentsInCurrentLine)
4817
      pushToken(FormatTok);
4818
    else
4819
      CommentsBeforeNextToken.push_back(FormatTok);
4820
  }
4821
}
4822

4823
void UnwrappedLineParser::readToken(int LevelDifference) {
4824
  SmallVector<FormatToken *, 1> Comments;
4825
  bool PreviousWasComment = false;
4826
  bool FirstNonCommentOnLine = false;
4827
  do {
4828
    FormatTok = Tokens->getNextToken();
4829
    assert(FormatTok);
4830
    while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd,
4831
                              TT_ConflictAlternative)) {
4832
      if (FormatTok->is(TT_ConflictStart))
4833
        conditionalCompilationStart(/*Unreachable=*/false);
4834
      else if (FormatTok->is(TT_ConflictAlternative))
4835
        conditionalCompilationAlternative();
4836
      else if (FormatTok->is(TT_ConflictEnd))
4837
        conditionalCompilationEnd();
4838
      FormatTok = Tokens->getNextToken();
4839
      FormatTok->MustBreakBefore = true;
4840
      FormatTok->MustBreakBeforeFinalized = true;
4841
    }
4842

4843
    auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4844
                                      const FormatToken &Tok,
4845
                                      bool PreviousWasComment) {
4846
      auto IsFirstOnLine = [](const FormatToken &Tok) {
4847
        return Tok.HasUnescapedNewline || Tok.IsFirst;
4848
      };
4849

4850
      // Consider preprocessor directives preceded by block comments as first
4851
      // on line.
4852
      if (PreviousWasComment)
4853
        return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4854
      return IsFirstOnLine(Tok);
4855
    };
4856

4857
    FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4858
        FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4859
    PreviousWasComment = FormatTok->is(tok::comment);
4860

4861
    while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4862
           (!Style.isVerilog() ||
4863
            Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4864
           FirstNonCommentOnLine) {
4865
      distributeComments(Comments, FormatTok);
4866
      Comments.clear();
4867
      // If there is an unfinished unwrapped line, we flush the preprocessor
4868
      // directives only after that unwrapped line was finished later.
4869
      bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4870
      ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4871
      assert((LevelDifference >= 0 ||
4872
              static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4873
             "LevelDifference makes Line->Level negative");
4874
      Line->Level += LevelDifference;
4875
      // Comments stored before the preprocessor directive need to be output
4876
      // before the preprocessor directive, at the same level as the
4877
      // preprocessor directive, as we consider them to apply to the directive.
4878
      if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4879
          PPBranchLevel > 0) {
4880
        Line->Level += PPBranchLevel;
4881
      }
4882
      assert(Line->Level >= Line->UnbracedBodyLevel);
4883
      Line->Level -= Line->UnbracedBodyLevel;
4884
      flushComments(isOnNewLine(*FormatTok));
4885
      parsePPDirective();
4886
      PreviousWasComment = FormatTok->is(tok::comment);
4887
      FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4888
          FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4889
    }
4890

4891
    if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4892
        !Line->InPPDirective) {
4893
      continue;
4894
    }
4895

4896
    if (FormatTok->is(tok::identifier) &&
4897
        Macros.defined(FormatTok->TokenText) &&
4898
        // FIXME: Allow expanding macros in preprocessor directives.
4899
        !Line->InPPDirective) {
4900
      FormatToken *ID = FormatTok;
4901
      unsigned Position = Tokens->getPosition();
4902

4903
      // To correctly parse the code, we need to replace the tokens of the macro
4904
      // call with its expansion.
4905
      auto PreCall = std::move(Line);
4906
      Line.reset(new UnwrappedLine);
4907
      bool OldInExpansion = InExpansion;
4908
      InExpansion = true;
4909
      // We parse the macro call into a new line.
4910
      auto Args = parseMacroCall();
4911
      InExpansion = OldInExpansion;
4912
      assert(Line->Tokens.front().Tok == ID);
4913
      // And remember the unexpanded macro call tokens.
4914
      auto UnexpandedLine = std::move(Line);
4915
      // Reset to the old line.
4916
      Line = std::move(PreCall);
4917

4918
      LLVM_DEBUG({
4919
        llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
4920
        if (Args) {
4921
          llvm::dbgs() << "(";
4922
          for (const auto &Arg : Args.value())
4923
            for (const auto &T : Arg)
4924
              llvm::dbgs() << T->TokenText << " ";
4925
          llvm::dbgs() << ")";
4926
        }
4927
        llvm::dbgs() << "\n";
4928
      });
4929
      if (Macros.objectLike(ID->TokenText) && Args &&
4930
          !Macros.hasArity(ID->TokenText, Args->size())) {
4931
        // The macro is either
4932
        // - object-like, but we got argumnets, or
4933
        // - overloaded to be both object-like and function-like, but none of
4934
        //   the function-like arities match the number of arguments.
4935
        // Thus, expand as object-like macro.
4936
        LLVM_DEBUG(llvm::dbgs()
4937
                   << "Macro \"" << ID->TokenText
4938
                   << "\" not overloaded for arity " << Args->size()
4939
                   << "or not function-like, using object-like overload.");
4940
        Args.reset();
4941
        UnexpandedLine->Tokens.resize(1);
4942
        Tokens->setPosition(Position);
4943
        nextToken();
4944
        assert(!Args && Macros.objectLike(ID->TokenText));
4945
      }
4946
      if ((!Args && Macros.objectLike(ID->TokenText)) ||
4947
          (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
4948
        // Next, we insert the expanded tokens in the token stream at the
4949
        // current position, and continue parsing.
4950
        Unexpanded[ID] = std::move(UnexpandedLine);
4951
        SmallVector<FormatToken *, 8> Expansion =
4952
            Macros.expand(ID, std::move(Args));
4953
        if (!Expansion.empty())
4954
          FormatTok = Tokens->insertTokens(Expansion);
4955

4956
        LLVM_DEBUG({
4957
          llvm::dbgs() << "Expanded: ";
4958
          for (const auto &T : Expansion)
4959
            llvm::dbgs() << T->TokenText << " ";
4960
          llvm::dbgs() << "\n";
4961
        });
4962
      } else {
4963
        LLVM_DEBUG({
4964
          llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
4965
                       << "\", because it was used ";
4966
          if (Args)
4967
            llvm::dbgs() << "with " << Args->size();
4968
          else
4969
            llvm::dbgs() << "without";
4970
          llvm::dbgs() << " arguments, which doesn't match any definition.\n";
4971
        });
4972
        Tokens->setPosition(Position);
4973
        FormatTok = ID;
4974
      }
4975
    }
4976

4977
    if (FormatTok->isNot(tok::comment)) {
4978
      distributeComments(Comments, FormatTok);
4979
      Comments.clear();
4980
      return;
4981
    }
4982

4983
    Comments.push_back(FormatTok);
4984
  } while (!eof());
4985

4986
  distributeComments(Comments, nullptr);
4987
  Comments.clear();
4988
}
4989

4990
namespace {
4991
template <typename Iterator>
4992
void pushTokens(Iterator Begin, Iterator End,
4993
                llvm::SmallVectorImpl<FormatToken *> &Into) {
4994
  for (auto I = Begin; I != End; ++I) {
4995
    Into.push_back(I->Tok);
4996
    for (const auto &Child : I->Children)
4997
      pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
4998
  }
4999
}
5000
} // namespace
5001

5002
std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
5003
UnwrappedLineParser::parseMacroCall() {
5004
  std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
5005
  assert(Line->Tokens.empty());
5006
  nextToken();
5007
  if (FormatTok->isNot(tok::l_paren))
5008
    return Args;
5009
  unsigned Position = Tokens->getPosition();
5010
  FormatToken *Tok = FormatTok;
5011
  nextToken();
5012
  Args.emplace();
5013
  auto ArgStart = std::prev(Line->Tokens.end());
5014

5015
  int Parens = 0;
5016
  do {
5017
    switch (FormatTok->Tok.getKind()) {
5018
    case tok::l_paren:
5019
      ++Parens;
5020
      nextToken();
5021
      break;
5022
    case tok::r_paren: {
5023
      if (Parens > 0) {
5024
        --Parens;
5025
        nextToken();
5026
        break;
5027
      }
5028
      Args->push_back({});
5029
      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5030
      nextToken();
5031
      return Args;
5032
    }
5033
    case tok::comma: {
5034
      if (Parens > 0) {
5035
        nextToken();
5036
        break;
5037
      }
5038
      Args->push_back({});
5039
      pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
5040
      nextToken();
5041
      ArgStart = std::prev(Line->Tokens.end());
5042
      break;
5043
    }
5044
    default:
5045
      nextToken();
5046
      break;
5047
    }
5048
  } while (!eof());
5049
  Line->Tokens.resize(1);
5050
  Tokens->setPosition(Position);
5051
  FormatTok = Tok;
5052
  return {};
5053
}
5054

5055
void UnwrappedLineParser::pushToken(FormatToken *Tok) {
5056
  Line->Tokens.push_back(UnwrappedLineNode(Tok));
5057
  if (MustBreakBeforeNextToken) {
5058
    Line->Tokens.back().Tok->MustBreakBefore = true;
5059
    Line->Tokens.back().Tok->MustBreakBeforeFinalized = true;
5060
    MustBreakBeforeNextToken = false;
5061
  }
5062
}
5063

5064
} // end namespace format
5065
} // end namespace clang
5066

5067
Product

Resources

Company