CoCalc -- CommentParser.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/AST/CommentParser.cpp
³⁵²⁶⁰ views
1
//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
#include "clang/AST/CommentParser.h"
10
#include "clang/AST/CommentCommandTraits.h"
11
#include "clang/AST/CommentDiagnostic.h"
12
#include "clang/AST/CommentSema.h"
13
#include "clang/Basic/CharInfo.h"
14
#include "clang/Basic/SourceManager.h"
15
#include "llvm/Support/ErrorHandling.h"
16

17
namespace clang {
18

19
static inline bool isWhitespace(llvm::StringRef S) {
20
  for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21
    if (!isWhitespace(*I))
22
      return false;
23
  }
24
  return true;
25
}
26

27
namespace comments {
28

29
/// Re-lexes a sequence of tok::text tokens.
30
class TextTokenRetokenizer {
31
  llvm::BumpPtrAllocator &Allocator;
32
  Parser &P;
33

34
  /// This flag is set when there are no more tokens we can fetch from lexer.
35
  bool NoMoreInterestingTokens;
36

37
  /// Token buffer: tokens we have processed and lookahead.
38
  SmallVector<Token, 16> Toks;
39

40
  /// A position in \c Toks.
41
  struct Position {
42
    const char *BufferStart;
43
    const char *BufferEnd;
44
    const char *BufferPtr;
45
    SourceLocation BufferStartLoc;
46
    unsigned CurToken;
47
  };
48

49
  /// Current position in Toks.
50
  Position Pos;
51

52
  bool isEnd() const {
53
    return Pos.CurToken >= Toks.size();
54
  }
55

56
  /// Sets up the buffer pointers to point to current token.
57
  void setupBuffer() {
58
    assert(!isEnd());
59
    const Token &Tok = Toks[Pos.CurToken];
60

61
    Pos.BufferStart = Tok.getText().begin();
62
    Pos.BufferEnd = Tok.getText().end();
63
    Pos.BufferPtr = Pos.BufferStart;
64
    Pos.BufferStartLoc = Tok.getLocation();
65
  }
66

67
  SourceLocation getSourceLocation() const {
68
    const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69
    return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70
  }
71

72
  char peek() const {
73
    assert(!isEnd());
74
    assert(Pos.BufferPtr != Pos.BufferEnd);
75
    return *Pos.BufferPtr;
76
  }
77

78
  void consumeChar() {
79
    assert(!isEnd());
80
    assert(Pos.BufferPtr != Pos.BufferEnd);
81
    Pos.BufferPtr++;
82
    if (Pos.BufferPtr == Pos.BufferEnd) {
83
      Pos.CurToken++;
84
      if (isEnd() && !addToken())
85
        return;
86

87
      assert(!isEnd());
88
      setupBuffer();
89
    }
90
  }
91

92
  /// Extract a template type
93
  bool lexTemplate(SmallString<32> &WordText) {
94
    unsigned BracketCount = 0;
95
    while (!isEnd()) {
96
      const char C = peek();
97
      WordText.push_back(C);
98
      consumeChar();
99
      switch (C) {
100
      case '<': {
101
        BracketCount++;
102
        break;
103
      }
104
      case '>': {
105
        BracketCount--;
106
        if (!BracketCount)
107
          return true;
108
        break;
109
      }
110
      default:
111
        break;
112
      }
113
    }
114
    return false;
115
  }
116

117
  /// Add a token.
118
  /// Returns true on success, false if there are no interesting tokens to
119
  /// fetch from lexer.
120
  bool addToken() {
121
    if (NoMoreInterestingTokens)
122
      return false;
123

124
    if (P.Tok.is(tok::newline)) {
125
      // If we see a single newline token between text tokens, skip it.
126
      Token Newline = P.Tok;
127
      P.consumeToken();
128
      if (P.Tok.isNot(tok::text)) {
129
        P.putBack(Newline);
130
        NoMoreInterestingTokens = true;
131
        return false;
132
      }
133
    }
134
    if (P.Tok.isNot(tok::text)) {
135
      NoMoreInterestingTokens = true;
136
      return false;
137
    }
138

139
    Toks.push_back(P.Tok);
140
    P.consumeToken();
141
    if (Toks.size() == 1)
142
      setupBuffer();
143
    return true;
144
  }
145

146
  void consumeWhitespace() {
147
    while (!isEnd()) {
148
      if (isWhitespace(peek()))
149
        consumeChar();
150
      else
151
        break;
152
    }
153
  }
154

155
  void formTokenWithChars(Token &Result,
156
                          SourceLocation Loc,
157
                          const char *TokBegin,
158
                          unsigned TokLength,
159
                          StringRef Text) {
160
    Result.setLocation(Loc);
161
    Result.setKind(tok::text);
162
    Result.setLength(TokLength);
163
#ifndef NDEBUG
164
    Result.TextPtr = "<UNSET>";
165
    Result.IntVal = 7;
166
#endif
167
    Result.setText(Text);
168
  }
169

170
public:
171
  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172
      Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173
    Pos.CurToken = 0;
174
    addToken();
175
  }
176

177
  /// Extract a type argument
178
  bool lexType(Token &Tok) {
179
    if (isEnd())
180
      return false;
181

182
    // Save current position in case we need to rollback because the type is
183
    // empty.
184
    Position SavedPos = Pos;
185

186
    // Consume any leading whitespace.
187
    consumeWhitespace();
188
    SmallString<32> WordText;
189
    const char *WordBegin = Pos.BufferPtr;
190
    SourceLocation Loc = getSourceLocation();
191

192
    while (!isEnd()) {
193
      const char C = peek();
194
      // For non-whitespace characters we check if it's a template or otherwise
195
      // continue reading the text into a word.
196
      if (!isWhitespace(C)) {
197
        if (C == '<') {
198
          if (!lexTemplate(WordText))
199
            return false;
200
        } else {
201
          WordText.push_back(C);
202
          consumeChar();
203
        }
204
      } else {
205
        consumeChar();
206
        break;
207
      }
208
    }
209

210
    const unsigned Length = WordText.size();
211
    if (Length == 0) {
212
      Pos = SavedPos;
213
      return false;
214
    }
215

216
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
217

218
    memcpy(TextPtr, WordText.c_str(), Length + 1);
219
    StringRef Text = StringRef(TextPtr, Length);
220

221
    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
222
    return true;
223
  }
224

225
  // Check if this line starts with @par or \par
226
  bool startsWithParCommand() {
227
    unsigned Offset = 1;
228

229
    // Skip all whitespace characters at the beginning.
230
    // This needs to backtrack because Pos has already advanced past the
231
    // actual \par or @par command by the time this function is called.
232
    while (isWhitespace(*(Pos.BufferPtr - Offset)))
233
      Offset++;
234

235
    // Once we've reached the whitespace, backtrack and check if the previous
236
    // four characters are \par or @par.
237
    llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
238
    return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
239
  }
240

241
  /// Extract a par command argument-header.
242
  bool lexParHeading(Token &Tok) {
243
    if (isEnd())
244
      return false;
245

246
    Position SavedPos = Pos;
247

248
    consumeWhitespace();
249
    SmallString<32> WordText;
250
    const char *WordBegin = Pos.BufferPtr;
251
    SourceLocation Loc = getSourceLocation();
252

253
    if (!startsWithParCommand())
254
      return false;
255

256
    // Read until the end of this token, which is effectively the end of the
257
    // line. This gets us the content of the par header, if there is one.
258
    while (!isEnd()) {
259
      WordText.push_back(peek());
260
      if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
261
        consumeChar();
262
        break;
263
      }
264
      consumeChar();
265
    }
266

267
    unsigned Length = WordText.size();
268
    if (Length == 0) {
269
      Pos = SavedPos;
270
      return false;
271
    }
272

273
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
274

275
    memcpy(TextPtr, WordText.c_str(), Length + 1);
276
    StringRef Text = StringRef(TextPtr, Length);
277

278
    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
279
    return true;
280
  }
281

282
  /// Extract a word -- sequence of non-whitespace characters.
283
  bool lexWord(Token &Tok) {
284
    if (isEnd())
285
      return false;
286

287
    Position SavedPos = Pos;
288

289
    consumeWhitespace();
290
    SmallString<32> WordText;
291
    const char *WordBegin = Pos.BufferPtr;
292
    SourceLocation Loc = getSourceLocation();
293
    while (!isEnd()) {
294
      const char C = peek();
295
      if (!isWhitespace(C)) {
296
        WordText.push_back(C);
297
        consumeChar();
298
      } else
299
        break;
300
    }
301
    const unsigned Length = WordText.size();
302
    if (Length == 0) {
303
      Pos = SavedPos;
304
      return false;
305
    }
306

307
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
308

309
    memcpy(TextPtr, WordText.c_str(), Length + 1);
310
    StringRef Text = StringRef(TextPtr, Length);
311

312
    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
313
    return true;
314
  }
315

316
  bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
317
    if (isEnd())
318
      return false;
319

320
    Position SavedPos = Pos;
321

322
    consumeWhitespace();
323
    SmallString<32> WordText;
324
    const char *WordBegin = Pos.BufferPtr;
325
    SourceLocation Loc = getSourceLocation();
326
    bool Error = false;
327
    if (!isEnd()) {
328
      const char C = peek();
329
      if (C == OpenDelim) {
330
        WordText.push_back(C);
331
        consumeChar();
332
      } else
333
        Error = true;
334
    }
335
    char C = '\0';
336
    while (!Error && !isEnd()) {
337
      C = peek();
338
      WordText.push_back(C);
339
      consumeChar();
340
      if (C == CloseDelim)
341
        break;
342
    }
343
    if (!Error && C != CloseDelim)
344
      Error = true;
345

346
    if (Error) {
347
      Pos = SavedPos;
348
      return false;
349
    }
350

351
    const unsigned Length = WordText.size();
352
    char *TextPtr = Allocator.Allocate<char>(Length + 1);
353

354
    memcpy(TextPtr, WordText.c_str(), Length + 1);
355
    StringRef Text = StringRef(TextPtr, Length);
356

357
    formTokenWithChars(Tok, Loc, WordBegin,
358
                       Pos.BufferPtr - WordBegin, Text);
359
    return true;
360
  }
361

362
  /// Put back tokens that we didn't consume.
363
  void putBackLeftoverTokens() {
364
    if (isEnd())
365
      return;
366

367
    bool HavePartialTok = false;
368
    Token PartialTok;
369
    if (Pos.BufferPtr != Pos.BufferStart) {
370
      formTokenWithChars(PartialTok, getSourceLocation(),
371
                         Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
372
                         StringRef(Pos.BufferPtr,
373
                                   Pos.BufferEnd - Pos.BufferPtr));
374
      HavePartialTok = true;
375
      Pos.CurToken++;
376
    }
377

378
    P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
379
    Pos.CurToken = Toks.size();
380

381
    if (HavePartialTok)
382
      P.putBack(PartialTok);
383
  }
384
};
385

386
Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
387
               const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
388
               const CommandTraits &Traits):
389
    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
390
    Traits(Traits) {
391
  consumeToken();
392
}
393

394
void Parser::parseParamCommandArgs(ParamCommandComment *PC,
395
                                   TextTokenRetokenizer &Retokenizer) {
396
  Token Arg;
397
  // Check if argument looks like direction specification: [dir]
398
  // e.g., [in], [out], [in,out]
399
  if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
400
    S.actOnParamCommandDirectionArg(PC,
401
                                    Arg.getLocation(),
402
                                    Arg.getEndLocation(),
403
                                    Arg.getText());
404

405
  if (Retokenizer.lexWord(Arg))
406
    S.actOnParamCommandParamNameArg(PC,
407
                                    Arg.getLocation(),
408
                                    Arg.getEndLocation(),
409
                                    Arg.getText());
410
}
411

412
void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
413
                                    TextTokenRetokenizer &Retokenizer) {
414
  Token Arg;
415
  if (Retokenizer.lexWord(Arg))
416
    S.actOnTParamCommandParamNameArg(TPC,
417
                                     Arg.getLocation(),
418
                                     Arg.getEndLocation(),
419
                                     Arg.getText());
420
}
421

422
ArrayRef<Comment::Argument>
423
Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
424
  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
425
      Comment::Argument[NumArgs];
426
  unsigned ParsedArgs = 0;
427
  Token Arg;
428
  while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
429
    Args[ParsedArgs] = Comment::Argument{
430
        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
431
    ParsedArgs++;
432
  }
433

434
  return llvm::ArrayRef(Args, ParsedArgs);
435
}
436

437
ArrayRef<Comment::Argument>
438
Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
439
                              unsigned NumArgs) {
440
  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
441
      Comment::Argument[NumArgs];
442
  unsigned ParsedArgs = 0;
443
  Token Arg;
444

445
  while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
446
    Args[ParsedArgs] = Comment::Argument{
447
        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
448
    ParsedArgs++;
449
  }
450

451
  return llvm::ArrayRef(Args, ParsedArgs);
452
}
453

454
ArrayRef<Comment::Argument>
455
Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
456
                            unsigned NumArgs) {
457
  assert(NumArgs > 0);
458
  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
459
      Comment::Argument[NumArgs];
460
  unsigned ParsedArgs = 0;
461
  Token Arg;
462

463
  while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
464
    Args[ParsedArgs] = Comment::Argument{
465
        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
466
    ParsedArgs++;
467
  }
468

469
  return llvm::ArrayRef(Args, ParsedArgs);
470
}
471

472
BlockCommandComment *Parser::parseBlockCommand() {
473
  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
474

475
  ParamCommandComment *PC = nullptr;
476
  TParamCommandComment *TPC = nullptr;
477
  BlockCommandComment *BC = nullptr;
478
  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
479
  CommandMarkerKind CommandMarker =
480
      Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
481
  if (Info->IsParamCommand) {
482
    PC = S.actOnParamCommandStart(Tok.getLocation(),
483
                                  Tok.getEndLocation(),
484
                                  Tok.getCommandID(),
485
                                  CommandMarker);
486
  } else if (Info->IsTParamCommand) {
487
    TPC = S.actOnTParamCommandStart(Tok.getLocation(),
488
                                    Tok.getEndLocation(),
489
                                    Tok.getCommandID(),
490
                                    CommandMarker);
491
  } else {
492
    BC = S.actOnBlockCommandStart(Tok.getLocation(),
493
                                  Tok.getEndLocation(),
494
                                  Tok.getCommandID(),
495
                                  CommandMarker);
496
  }
497
  consumeToken();
498

499
  if (isTokBlockCommand()) {
500
    // Block command ahead.  We can't nest block commands, so pretend that this
501
    // command has an empty argument.
502
    ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
503
    if (PC) {
504
      S.actOnParamCommandFinish(PC, Paragraph);
505
      return PC;
506
    } else if (TPC) {
507
      S.actOnTParamCommandFinish(TPC, Paragraph);
508
      return TPC;
509
    } else {
510
      S.actOnBlockCommandFinish(BC, Paragraph);
511
      return BC;
512
    }
513
  }
514

515
  if (PC || TPC || Info->NumArgs > 0) {
516
    // In order to parse command arguments we need to retokenize a few
517
    // following text tokens.
518
    TextTokenRetokenizer Retokenizer(Allocator, *this);
519

520
    if (PC)
521
      parseParamCommandArgs(PC, Retokenizer);
522
    else if (TPC)
523
      parseTParamCommandArgs(TPC, Retokenizer);
524
    else if (Info->IsThrowsCommand)
525
      S.actOnBlockCommandArgs(
526
          BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
527
    else if (Info->IsParCommand)
528
      S.actOnBlockCommandArgs(BC,
529
                              parseParCommandArgs(Retokenizer, Info->NumArgs));
530
    else
531
      S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
532

533
    Retokenizer.putBackLeftoverTokens();
534
  }
535

536
  // If there's a block command ahead, we will attach an empty paragraph to
537
  // this command.
538
  bool EmptyParagraph = false;
539
  if (isTokBlockCommand())
540
    EmptyParagraph = true;
541
  else if (Tok.is(tok::newline)) {
542
    Token PrevTok = Tok;
543
    consumeToken();
544
    EmptyParagraph = isTokBlockCommand();
545
    putBack(PrevTok);
546
  }
547

548
  ParagraphComment *Paragraph;
549
  if (EmptyParagraph)
550
    Paragraph = S.actOnParagraphComment(std::nullopt);
551
  else {
552
    BlockContentComment *Block = parseParagraphOrBlockCommand();
553
    // Since we have checked for a block command, we should have parsed a
554
    // paragraph.
555
    Paragraph = cast<ParagraphComment>(Block);
556
  }
557

558
  if (PC) {
559
    S.actOnParamCommandFinish(PC, Paragraph);
560
    return PC;
561
  } else if (TPC) {
562
    S.actOnTParamCommandFinish(TPC, Paragraph);
563
    return TPC;
564
  } else {
565
    S.actOnBlockCommandFinish(BC, Paragraph);
566
    return BC;
567
  }
568
}
569

570
InlineCommandComment *Parser::parseInlineCommand() {
571
  assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
572
  const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
573

574
  const Token CommandTok = Tok;
575
  consumeToken();
576

577
  TextTokenRetokenizer Retokenizer(Allocator, *this);
578
  ArrayRef<Comment::Argument> Args =
579
      parseCommandArgs(Retokenizer, Info->NumArgs);
580

581
  InlineCommandComment *IC = S.actOnInlineCommand(
582
      CommandTok.getLocation(), CommandTok.getEndLocation(),
583
      CommandTok.getCommandID(), Args);
584

585
  if (Args.size() < Info->NumArgs) {
586
    Diag(CommandTok.getEndLocation().getLocWithOffset(1),
587
         diag::warn_doc_inline_command_not_enough_arguments)
588
        << CommandTok.is(tok::at_command) << Info->Name << Args.size()
589
        << Info->NumArgs
590
        << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
591
  }
592

593
  Retokenizer.putBackLeftoverTokens();
594

595
  return IC;
596
}
597

598
HTMLStartTagComment *Parser::parseHTMLStartTag() {
599
  assert(Tok.is(tok::html_start_tag));
600
  HTMLStartTagComment *HST =
601
      S.actOnHTMLStartTagStart(Tok.getLocation(),
602
                               Tok.getHTMLTagStartName());
603
  consumeToken();
604

605
  SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
606
  while (true) {
607
    switch (Tok.getKind()) {
608
    case tok::html_ident: {
609
      Token Ident = Tok;
610
      consumeToken();
611
      if (Tok.isNot(tok::html_equals)) {
612
        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
613
                                                       Ident.getHTMLIdent()));
614
        continue;
615
      }
616
      Token Equals = Tok;
617
      consumeToken();
618
      if (Tok.isNot(tok::html_quoted_string)) {
619
        Diag(Tok.getLocation(),
620
             diag::warn_doc_html_start_tag_expected_quoted_string)
621
          << SourceRange(Equals.getLocation());
622
        Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
623
                                                       Ident.getHTMLIdent()));
624
        while (Tok.is(tok::html_equals) ||
625
               Tok.is(tok::html_quoted_string))
626
          consumeToken();
627
        continue;
628
      }
629
      Attrs.push_back(HTMLStartTagComment::Attribute(
630
                              Ident.getLocation(),
631
                              Ident.getHTMLIdent(),
632
                              Equals.getLocation(),
633
                              SourceRange(Tok.getLocation(),
634
                                          Tok.getEndLocation()),
635
                              Tok.getHTMLQuotedString()));
636
      consumeToken();
637
      continue;
638
    }
639

640
    case tok::html_greater:
641
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
642
                                Tok.getLocation(),
643
                                /* IsSelfClosing = */ false);
644
      consumeToken();
645
      return HST;
646

647
    case tok::html_slash_greater:
648
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
649
                                Tok.getLocation(),
650
                                /* IsSelfClosing = */ true);
651
      consumeToken();
652
      return HST;
653

654
    case tok::html_equals:
655
    case tok::html_quoted_string:
656
      Diag(Tok.getLocation(),
657
           diag::warn_doc_html_start_tag_expected_ident_or_greater);
658
      while (Tok.is(tok::html_equals) ||
659
             Tok.is(tok::html_quoted_string))
660
        consumeToken();
661
      if (Tok.is(tok::html_ident) ||
662
          Tok.is(tok::html_greater) ||
663
          Tok.is(tok::html_slash_greater))
664
        continue;
665

666
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
667
                                SourceLocation(),
668
                                /* IsSelfClosing = */ false);
669
      return HST;
670

671
    default:
672
      // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
673
      S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
674
                                SourceLocation(),
675
                                /* IsSelfClosing = */ false);
676
      bool StartLineInvalid;
677
      const unsigned StartLine = SourceMgr.getPresumedLineNumber(
678
                                                  HST->getLocation(),
679
                                                  &StartLineInvalid);
680
      bool EndLineInvalid;
681
      const unsigned EndLine = SourceMgr.getPresumedLineNumber(
682
                                                  Tok.getLocation(),
683
                                                  &EndLineInvalid);
684
      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
685
        Diag(Tok.getLocation(),
686
             diag::warn_doc_html_start_tag_expected_ident_or_greater)
687
          << HST->getSourceRange();
688
      else {
689
        Diag(Tok.getLocation(),
690
             diag::warn_doc_html_start_tag_expected_ident_or_greater);
691
        Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
692
          << HST->getSourceRange();
693
      }
694
      return HST;
695
    }
696
  }
697
}
698

699
HTMLEndTagComment *Parser::parseHTMLEndTag() {
700
  assert(Tok.is(tok::html_end_tag));
701
  Token TokEndTag = Tok;
702
  consumeToken();
703
  SourceLocation Loc;
704
  if (Tok.is(tok::html_greater)) {
705
    Loc = Tok.getLocation();
706
    consumeToken();
707
  }
708

709
  return S.actOnHTMLEndTag(TokEndTag.getLocation(),
710
                           Loc,
711
                           TokEndTag.getHTMLTagEndName());
712
}
713

714
BlockContentComment *Parser::parseParagraphOrBlockCommand() {
715
  SmallVector<InlineContentComment *, 8> Content;
716

717
  while (true) {
718
    switch (Tok.getKind()) {
719
    case tok::verbatim_block_begin:
720
    case tok::verbatim_line_name:
721
    case tok::eof:
722
      break; // Block content or EOF ahead, finish this parapgaph.
723

724
    case tok::unknown_command:
725
      Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
726
                                              Tok.getEndLocation(),
727
                                              Tok.getUnknownCommandName()));
728
      consumeToken();
729
      continue;
730

731
    case tok::backslash_command:
732
    case tok::at_command: {
733
      const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
734
      if (Info->IsBlockCommand) {
735
        if (Content.size() == 0)
736
          return parseBlockCommand();
737
        break; // Block command ahead, finish this parapgaph.
738
      }
739
      if (Info->IsVerbatimBlockEndCommand) {
740
        Diag(Tok.getLocation(),
741
             diag::warn_verbatim_block_end_without_start)
742
          << Tok.is(tok::at_command)
743
          << Info->Name
744
          << SourceRange(Tok.getLocation(), Tok.getEndLocation());
745
        consumeToken();
746
        continue;
747
      }
748
      if (Info->IsUnknownCommand) {
749
        Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
750
                                                Tok.getEndLocation(),
751
                                                Info->getID()));
752
        consumeToken();
753
        continue;
754
      }
755
      assert(Info->IsInlineCommand);
756
      Content.push_back(parseInlineCommand());
757
      continue;
758
    }
759

760
    case tok::newline: {
761
      consumeToken();
762
      if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
763
        consumeToken();
764
        break; // Two newlines -- end of paragraph.
765
      }
766
      // Also allow [tok::newline, tok::text, tok::newline] if the middle
767
      // tok::text is just whitespace.
768
      if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
769
        Token WhitespaceTok = Tok;
770
        consumeToken();
771
        if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
772
          consumeToken();
773
          break;
774
        }
775
        // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
776
        putBack(WhitespaceTok);
777
      }
778
      if (Content.size() > 0)
779
        Content.back()->addTrailingNewline();
780
      continue;
781
    }
782

783
    // Don't deal with HTML tag soup now.
784
    case tok::html_start_tag:
785
      Content.push_back(parseHTMLStartTag());
786
      continue;
787

788
    case tok::html_end_tag:
789
      Content.push_back(parseHTMLEndTag());
790
      continue;
791

792
    case tok::text:
793
      Content.push_back(S.actOnText(Tok.getLocation(),
794
                                    Tok.getEndLocation(),
795
                                    Tok.getText()));
796
      consumeToken();
797
      continue;
798

799
    case tok::verbatim_block_line:
800
    case tok::verbatim_block_end:
801
    case tok::verbatim_line_text:
802
    case tok::html_ident:
803
    case tok::html_equals:
804
    case tok::html_quoted_string:
805
    case tok::html_greater:
806
    case tok::html_slash_greater:
807
      llvm_unreachable("should not see this token");
808
    }
809
    break;
810
  }
811

812
  return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
813
}
814

815
VerbatimBlockComment *Parser::parseVerbatimBlock() {
816
  assert(Tok.is(tok::verbatim_block_begin));
817

818
  VerbatimBlockComment *VB =
819
      S.actOnVerbatimBlockStart(Tok.getLocation(),
820
                                Tok.getVerbatimBlockID());
821
  consumeToken();
822

823
  // Don't create an empty line if verbatim opening command is followed
824
  // by a newline.
825
  if (Tok.is(tok::newline))
826
    consumeToken();
827

828
  SmallVector<VerbatimBlockLineComment *, 8> Lines;
829
  while (Tok.is(tok::verbatim_block_line) ||
830
         Tok.is(tok::newline)) {
831
    VerbatimBlockLineComment *Line;
832
    if (Tok.is(tok::verbatim_block_line)) {
833
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
834
                                      Tok.getVerbatimBlockText());
835
      consumeToken();
836
      if (Tok.is(tok::newline)) {
837
        consumeToken();
838
      }
839
    } else {
840
      // Empty line, just a tok::newline.
841
      Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
842
      consumeToken();
843
    }
844
    Lines.push_back(Line);
845
  }
846

847
  if (Tok.is(tok::verbatim_block_end)) {
848
    const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
849
    S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
850
                               S.copyArray(llvm::ArrayRef(Lines)));
851
    consumeToken();
852
  } else {
853
    // Unterminated \\verbatim block
854
    S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
855
                               S.copyArray(llvm::ArrayRef(Lines)));
856
  }
857

858
  return VB;
859
}
860

861
VerbatimLineComment *Parser::parseVerbatimLine() {
862
  assert(Tok.is(tok::verbatim_line_name));
863

864
  Token NameTok = Tok;
865
  consumeToken();
866

867
  SourceLocation TextBegin;
868
  StringRef Text;
869
  // Next token might not be a tok::verbatim_line_text if verbatim line
870
  // starting command comes just before a newline or comment end.
871
  if (Tok.is(tok::verbatim_line_text)) {
872
    TextBegin = Tok.getLocation();
873
    Text = Tok.getVerbatimLineText();
874
  } else {
875
    TextBegin = NameTok.getEndLocation();
876
    Text = "";
877
  }
878

879
  VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
880
                                                NameTok.getVerbatimLineID(),
881
                                                TextBegin,
882
                                                Text);
883
  consumeToken();
884
  return VL;
885
}
886

887
BlockContentComment *Parser::parseBlockContent() {
888
  switch (Tok.getKind()) {
889
  case tok::text:
890
  case tok::unknown_command:
891
  case tok::backslash_command:
892
  case tok::at_command:
893
  case tok::html_start_tag:
894
  case tok::html_end_tag:
895
    return parseParagraphOrBlockCommand();
896

897
  case tok::verbatim_block_begin:
898
    return parseVerbatimBlock();
899

900
  case tok::verbatim_line_name:
901
    return parseVerbatimLine();
902

903
  case tok::eof:
904
  case tok::newline:
905
  case tok::verbatim_block_line:
906
  case tok::verbatim_block_end:
907
  case tok::verbatim_line_text:
908
  case tok::html_ident:
909
  case tok::html_equals:
910
  case tok::html_quoted_string:
911
  case tok::html_greater:
912
  case tok::html_slash_greater:
913
    llvm_unreachable("should not see this token");
914
  }
915
  llvm_unreachable("bogus token kind");
916
}
917

918
FullComment *Parser::parseFullComment() {
919
  // Skip newlines at the beginning of the comment.
920
  while (Tok.is(tok::newline))
921
    consumeToken();
922

923
  SmallVector<BlockContentComment *, 8> Blocks;
924
  while (Tok.isNot(tok::eof)) {
925
    Blocks.push_back(parseBlockContent());
926

927
    // Skip extra newlines after paragraph end.
928
    while (Tok.is(tok::newline))
929
      consumeToken();
930
  }
931
  return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
932
}
933

934
} // end namespace comments
935
} // end namespace clang
936

937
Product

Resources

Company