Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/AST/CommentParser.cpp
35260 views
1
//===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/AST/CommentParser.h"
10
#include "clang/AST/CommentCommandTraits.h"
11
#include "clang/AST/CommentDiagnostic.h"
12
#include "clang/AST/CommentSema.h"
13
#include "clang/Basic/CharInfo.h"
14
#include "clang/Basic/SourceManager.h"
15
#include "llvm/Support/ErrorHandling.h"
16
17
namespace clang {
18
19
static inline bool isWhitespace(llvm::StringRef S) {
20
for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21
if (!isWhitespace(*I))
22
return false;
23
}
24
return true;
25
}
26
27
namespace comments {
28
29
/// Re-lexes a sequence of tok::text tokens.
30
class TextTokenRetokenizer {
31
llvm::BumpPtrAllocator &Allocator;
32
Parser &P;
33
34
/// This flag is set when there are no more tokens we can fetch from lexer.
35
bool NoMoreInterestingTokens;
36
37
/// Token buffer: tokens we have processed and lookahead.
38
SmallVector<Token, 16> Toks;
39
40
/// A position in \c Toks.
41
struct Position {
42
const char *BufferStart;
43
const char *BufferEnd;
44
const char *BufferPtr;
45
SourceLocation BufferStartLoc;
46
unsigned CurToken;
47
};
48
49
/// Current position in Toks.
50
Position Pos;
51
52
bool isEnd() const {
53
return Pos.CurToken >= Toks.size();
54
}
55
56
/// Sets up the buffer pointers to point to current token.
57
void setupBuffer() {
58
assert(!isEnd());
59
const Token &Tok = Toks[Pos.CurToken];
60
61
Pos.BufferStart = Tok.getText().begin();
62
Pos.BufferEnd = Tok.getText().end();
63
Pos.BufferPtr = Pos.BufferStart;
64
Pos.BufferStartLoc = Tok.getLocation();
65
}
66
67
SourceLocation getSourceLocation() const {
68
const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69
return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70
}
71
72
char peek() const {
73
assert(!isEnd());
74
assert(Pos.BufferPtr != Pos.BufferEnd);
75
return *Pos.BufferPtr;
76
}
77
78
void consumeChar() {
79
assert(!isEnd());
80
assert(Pos.BufferPtr != Pos.BufferEnd);
81
Pos.BufferPtr++;
82
if (Pos.BufferPtr == Pos.BufferEnd) {
83
Pos.CurToken++;
84
if (isEnd() && !addToken())
85
return;
86
87
assert(!isEnd());
88
setupBuffer();
89
}
90
}
91
92
/// Extract a template type
93
bool lexTemplate(SmallString<32> &WordText) {
94
unsigned BracketCount = 0;
95
while (!isEnd()) {
96
const char C = peek();
97
WordText.push_back(C);
98
consumeChar();
99
switch (C) {
100
case '<': {
101
BracketCount++;
102
break;
103
}
104
case '>': {
105
BracketCount--;
106
if (!BracketCount)
107
return true;
108
break;
109
}
110
default:
111
break;
112
}
113
}
114
return false;
115
}
116
117
/// Add a token.
118
/// Returns true on success, false if there are no interesting tokens to
119
/// fetch from lexer.
120
bool addToken() {
121
if (NoMoreInterestingTokens)
122
return false;
123
124
if (P.Tok.is(tok::newline)) {
125
// If we see a single newline token between text tokens, skip it.
126
Token Newline = P.Tok;
127
P.consumeToken();
128
if (P.Tok.isNot(tok::text)) {
129
P.putBack(Newline);
130
NoMoreInterestingTokens = true;
131
return false;
132
}
133
}
134
if (P.Tok.isNot(tok::text)) {
135
NoMoreInterestingTokens = true;
136
return false;
137
}
138
139
Toks.push_back(P.Tok);
140
P.consumeToken();
141
if (Toks.size() == 1)
142
setupBuffer();
143
return true;
144
}
145
146
void consumeWhitespace() {
147
while (!isEnd()) {
148
if (isWhitespace(peek()))
149
consumeChar();
150
else
151
break;
152
}
153
}
154
155
void formTokenWithChars(Token &Result,
156
SourceLocation Loc,
157
const char *TokBegin,
158
unsigned TokLength,
159
StringRef Text) {
160
Result.setLocation(Loc);
161
Result.setKind(tok::text);
162
Result.setLength(TokLength);
163
#ifndef NDEBUG
164
Result.TextPtr = "<UNSET>";
165
Result.IntVal = 7;
166
#endif
167
Result.setText(Text);
168
}
169
170
public:
171
TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172
Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173
Pos.CurToken = 0;
174
addToken();
175
}
176
177
/// Extract a type argument
178
bool lexType(Token &Tok) {
179
if (isEnd())
180
return false;
181
182
// Save current position in case we need to rollback because the type is
183
// empty.
184
Position SavedPos = Pos;
185
186
// Consume any leading whitespace.
187
consumeWhitespace();
188
SmallString<32> WordText;
189
const char *WordBegin = Pos.BufferPtr;
190
SourceLocation Loc = getSourceLocation();
191
192
while (!isEnd()) {
193
const char C = peek();
194
// For non-whitespace characters we check if it's a template or otherwise
195
// continue reading the text into a word.
196
if (!isWhitespace(C)) {
197
if (C == '<') {
198
if (!lexTemplate(WordText))
199
return false;
200
} else {
201
WordText.push_back(C);
202
consumeChar();
203
}
204
} else {
205
consumeChar();
206
break;
207
}
208
}
209
210
const unsigned Length = WordText.size();
211
if (Length == 0) {
212
Pos = SavedPos;
213
return false;
214
}
215
216
char *TextPtr = Allocator.Allocate<char>(Length + 1);
217
218
memcpy(TextPtr, WordText.c_str(), Length + 1);
219
StringRef Text = StringRef(TextPtr, Length);
220
221
formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
222
return true;
223
}
224
225
// Check if this line starts with @par or \par
226
bool startsWithParCommand() {
227
unsigned Offset = 1;
228
229
// Skip all whitespace characters at the beginning.
230
// This needs to backtrack because Pos has already advanced past the
231
// actual \par or @par command by the time this function is called.
232
while (isWhitespace(*(Pos.BufferPtr - Offset)))
233
Offset++;
234
235
// Once we've reached the whitespace, backtrack and check if the previous
236
// four characters are \par or @par.
237
llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
238
return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
239
}
240
241
/// Extract a par command argument-header.
242
bool lexParHeading(Token &Tok) {
243
if (isEnd())
244
return false;
245
246
Position SavedPos = Pos;
247
248
consumeWhitespace();
249
SmallString<32> WordText;
250
const char *WordBegin = Pos.BufferPtr;
251
SourceLocation Loc = getSourceLocation();
252
253
if (!startsWithParCommand())
254
return false;
255
256
// Read until the end of this token, which is effectively the end of the
257
// line. This gets us the content of the par header, if there is one.
258
while (!isEnd()) {
259
WordText.push_back(peek());
260
if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
261
consumeChar();
262
break;
263
}
264
consumeChar();
265
}
266
267
unsigned Length = WordText.size();
268
if (Length == 0) {
269
Pos = SavedPos;
270
return false;
271
}
272
273
char *TextPtr = Allocator.Allocate<char>(Length + 1);
274
275
memcpy(TextPtr, WordText.c_str(), Length + 1);
276
StringRef Text = StringRef(TextPtr, Length);
277
278
formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
279
return true;
280
}
281
282
/// Extract a word -- sequence of non-whitespace characters.
283
bool lexWord(Token &Tok) {
284
if (isEnd())
285
return false;
286
287
Position SavedPos = Pos;
288
289
consumeWhitespace();
290
SmallString<32> WordText;
291
const char *WordBegin = Pos.BufferPtr;
292
SourceLocation Loc = getSourceLocation();
293
while (!isEnd()) {
294
const char C = peek();
295
if (!isWhitespace(C)) {
296
WordText.push_back(C);
297
consumeChar();
298
} else
299
break;
300
}
301
const unsigned Length = WordText.size();
302
if (Length == 0) {
303
Pos = SavedPos;
304
return false;
305
}
306
307
char *TextPtr = Allocator.Allocate<char>(Length + 1);
308
309
memcpy(TextPtr, WordText.c_str(), Length + 1);
310
StringRef Text = StringRef(TextPtr, Length);
311
312
formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
313
return true;
314
}
315
316
bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
317
if (isEnd())
318
return false;
319
320
Position SavedPos = Pos;
321
322
consumeWhitespace();
323
SmallString<32> WordText;
324
const char *WordBegin = Pos.BufferPtr;
325
SourceLocation Loc = getSourceLocation();
326
bool Error = false;
327
if (!isEnd()) {
328
const char C = peek();
329
if (C == OpenDelim) {
330
WordText.push_back(C);
331
consumeChar();
332
} else
333
Error = true;
334
}
335
char C = '\0';
336
while (!Error && !isEnd()) {
337
C = peek();
338
WordText.push_back(C);
339
consumeChar();
340
if (C == CloseDelim)
341
break;
342
}
343
if (!Error && C != CloseDelim)
344
Error = true;
345
346
if (Error) {
347
Pos = SavedPos;
348
return false;
349
}
350
351
const unsigned Length = WordText.size();
352
char *TextPtr = Allocator.Allocate<char>(Length + 1);
353
354
memcpy(TextPtr, WordText.c_str(), Length + 1);
355
StringRef Text = StringRef(TextPtr, Length);
356
357
formTokenWithChars(Tok, Loc, WordBegin,
358
Pos.BufferPtr - WordBegin, Text);
359
return true;
360
}
361
362
/// Put back tokens that we didn't consume.
363
void putBackLeftoverTokens() {
364
if (isEnd())
365
return;
366
367
bool HavePartialTok = false;
368
Token PartialTok;
369
if (Pos.BufferPtr != Pos.BufferStart) {
370
formTokenWithChars(PartialTok, getSourceLocation(),
371
Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
372
StringRef(Pos.BufferPtr,
373
Pos.BufferEnd - Pos.BufferPtr));
374
HavePartialTok = true;
375
Pos.CurToken++;
376
}
377
378
P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
379
Pos.CurToken = Toks.size();
380
381
if (HavePartialTok)
382
P.putBack(PartialTok);
383
}
384
};
385
386
Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
387
const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
388
const CommandTraits &Traits):
389
L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
390
Traits(Traits) {
391
consumeToken();
392
}
393
394
void Parser::parseParamCommandArgs(ParamCommandComment *PC,
395
TextTokenRetokenizer &Retokenizer) {
396
Token Arg;
397
// Check if argument looks like direction specification: [dir]
398
// e.g., [in], [out], [in,out]
399
if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
400
S.actOnParamCommandDirectionArg(PC,
401
Arg.getLocation(),
402
Arg.getEndLocation(),
403
Arg.getText());
404
405
if (Retokenizer.lexWord(Arg))
406
S.actOnParamCommandParamNameArg(PC,
407
Arg.getLocation(),
408
Arg.getEndLocation(),
409
Arg.getText());
410
}
411
412
void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
413
TextTokenRetokenizer &Retokenizer) {
414
Token Arg;
415
if (Retokenizer.lexWord(Arg))
416
S.actOnTParamCommandParamNameArg(TPC,
417
Arg.getLocation(),
418
Arg.getEndLocation(),
419
Arg.getText());
420
}
421
422
ArrayRef<Comment::Argument>
423
Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
424
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
425
Comment::Argument[NumArgs];
426
unsigned ParsedArgs = 0;
427
Token Arg;
428
while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
429
Args[ParsedArgs] = Comment::Argument{
430
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
431
ParsedArgs++;
432
}
433
434
return llvm::ArrayRef(Args, ParsedArgs);
435
}
436
437
ArrayRef<Comment::Argument>
438
Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
439
unsigned NumArgs) {
440
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
441
Comment::Argument[NumArgs];
442
unsigned ParsedArgs = 0;
443
Token Arg;
444
445
while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
446
Args[ParsedArgs] = Comment::Argument{
447
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
448
ParsedArgs++;
449
}
450
451
return llvm::ArrayRef(Args, ParsedArgs);
452
}
453
454
ArrayRef<Comment::Argument>
455
Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
456
unsigned NumArgs) {
457
assert(NumArgs > 0);
458
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
459
Comment::Argument[NumArgs];
460
unsigned ParsedArgs = 0;
461
Token Arg;
462
463
while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
464
Args[ParsedArgs] = Comment::Argument{
465
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
466
ParsedArgs++;
467
}
468
469
return llvm::ArrayRef(Args, ParsedArgs);
470
}
471
472
BlockCommandComment *Parser::parseBlockCommand() {
473
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
474
475
ParamCommandComment *PC = nullptr;
476
TParamCommandComment *TPC = nullptr;
477
BlockCommandComment *BC = nullptr;
478
const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
479
CommandMarkerKind CommandMarker =
480
Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
481
if (Info->IsParamCommand) {
482
PC = S.actOnParamCommandStart(Tok.getLocation(),
483
Tok.getEndLocation(),
484
Tok.getCommandID(),
485
CommandMarker);
486
} else if (Info->IsTParamCommand) {
487
TPC = S.actOnTParamCommandStart(Tok.getLocation(),
488
Tok.getEndLocation(),
489
Tok.getCommandID(),
490
CommandMarker);
491
} else {
492
BC = S.actOnBlockCommandStart(Tok.getLocation(),
493
Tok.getEndLocation(),
494
Tok.getCommandID(),
495
CommandMarker);
496
}
497
consumeToken();
498
499
if (isTokBlockCommand()) {
500
// Block command ahead. We can't nest block commands, so pretend that this
501
// command has an empty argument.
502
ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
503
if (PC) {
504
S.actOnParamCommandFinish(PC, Paragraph);
505
return PC;
506
} else if (TPC) {
507
S.actOnTParamCommandFinish(TPC, Paragraph);
508
return TPC;
509
} else {
510
S.actOnBlockCommandFinish(BC, Paragraph);
511
return BC;
512
}
513
}
514
515
if (PC || TPC || Info->NumArgs > 0) {
516
// In order to parse command arguments we need to retokenize a few
517
// following text tokens.
518
TextTokenRetokenizer Retokenizer(Allocator, *this);
519
520
if (PC)
521
parseParamCommandArgs(PC, Retokenizer);
522
else if (TPC)
523
parseTParamCommandArgs(TPC, Retokenizer);
524
else if (Info->IsThrowsCommand)
525
S.actOnBlockCommandArgs(
526
BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
527
else if (Info->IsParCommand)
528
S.actOnBlockCommandArgs(BC,
529
parseParCommandArgs(Retokenizer, Info->NumArgs));
530
else
531
S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
532
533
Retokenizer.putBackLeftoverTokens();
534
}
535
536
// If there's a block command ahead, we will attach an empty paragraph to
537
// this command.
538
bool EmptyParagraph = false;
539
if (isTokBlockCommand())
540
EmptyParagraph = true;
541
else if (Tok.is(tok::newline)) {
542
Token PrevTok = Tok;
543
consumeToken();
544
EmptyParagraph = isTokBlockCommand();
545
putBack(PrevTok);
546
}
547
548
ParagraphComment *Paragraph;
549
if (EmptyParagraph)
550
Paragraph = S.actOnParagraphComment(std::nullopt);
551
else {
552
BlockContentComment *Block = parseParagraphOrBlockCommand();
553
// Since we have checked for a block command, we should have parsed a
554
// paragraph.
555
Paragraph = cast<ParagraphComment>(Block);
556
}
557
558
if (PC) {
559
S.actOnParamCommandFinish(PC, Paragraph);
560
return PC;
561
} else if (TPC) {
562
S.actOnTParamCommandFinish(TPC, Paragraph);
563
return TPC;
564
} else {
565
S.actOnBlockCommandFinish(BC, Paragraph);
566
return BC;
567
}
568
}
569
570
InlineCommandComment *Parser::parseInlineCommand() {
571
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
572
const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
573
574
const Token CommandTok = Tok;
575
consumeToken();
576
577
TextTokenRetokenizer Retokenizer(Allocator, *this);
578
ArrayRef<Comment::Argument> Args =
579
parseCommandArgs(Retokenizer, Info->NumArgs);
580
581
InlineCommandComment *IC = S.actOnInlineCommand(
582
CommandTok.getLocation(), CommandTok.getEndLocation(),
583
CommandTok.getCommandID(), Args);
584
585
if (Args.size() < Info->NumArgs) {
586
Diag(CommandTok.getEndLocation().getLocWithOffset(1),
587
diag::warn_doc_inline_command_not_enough_arguments)
588
<< CommandTok.is(tok::at_command) << Info->Name << Args.size()
589
<< Info->NumArgs
590
<< SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
591
}
592
593
Retokenizer.putBackLeftoverTokens();
594
595
return IC;
596
}
597
598
HTMLStartTagComment *Parser::parseHTMLStartTag() {
599
assert(Tok.is(tok::html_start_tag));
600
HTMLStartTagComment *HST =
601
S.actOnHTMLStartTagStart(Tok.getLocation(),
602
Tok.getHTMLTagStartName());
603
consumeToken();
604
605
SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
606
while (true) {
607
switch (Tok.getKind()) {
608
case tok::html_ident: {
609
Token Ident = Tok;
610
consumeToken();
611
if (Tok.isNot(tok::html_equals)) {
612
Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
613
Ident.getHTMLIdent()));
614
continue;
615
}
616
Token Equals = Tok;
617
consumeToken();
618
if (Tok.isNot(tok::html_quoted_string)) {
619
Diag(Tok.getLocation(),
620
diag::warn_doc_html_start_tag_expected_quoted_string)
621
<< SourceRange(Equals.getLocation());
622
Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
623
Ident.getHTMLIdent()));
624
while (Tok.is(tok::html_equals) ||
625
Tok.is(tok::html_quoted_string))
626
consumeToken();
627
continue;
628
}
629
Attrs.push_back(HTMLStartTagComment::Attribute(
630
Ident.getLocation(),
631
Ident.getHTMLIdent(),
632
Equals.getLocation(),
633
SourceRange(Tok.getLocation(),
634
Tok.getEndLocation()),
635
Tok.getHTMLQuotedString()));
636
consumeToken();
637
continue;
638
}
639
640
case tok::html_greater:
641
S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
642
Tok.getLocation(),
643
/* IsSelfClosing = */ false);
644
consumeToken();
645
return HST;
646
647
case tok::html_slash_greater:
648
S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
649
Tok.getLocation(),
650
/* IsSelfClosing = */ true);
651
consumeToken();
652
return HST;
653
654
case tok::html_equals:
655
case tok::html_quoted_string:
656
Diag(Tok.getLocation(),
657
diag::warn_doc_html_start_tag_expected_ident_or_greater);
658
while (Tok.is(tok::html_equals) ||
659
Tok.is(tok::html_quoted_string))
660
consumeToken();
661
if (Tok.is(tok::html_ident) ||
662
Tok.is(tok::html_greater) ||
663
Tok.is(tok::html_slash_greater))
664
continue;
665
666
S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
667
SourceLocation(),
668
/* IsSelfClosing = */ false);
669
return HST;
670
671
default:
672
// Not a token from an HTML start tag. Thus HTML tag prematurely ended.
673
S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
674
SourceLocation(),
675
/* IsSelfClosing = */ false);
676
bool StartLineInvalid;
677
const unsigned StartLine = SourceMgr.getPresumedLineNumber(
678
HST->getLocation(),
679
&StartLineInvalid);
680
bool EndLineInvalid;
681
const unsigned EndLine = SourceMgr.getPresumedLineNumber(
682
Tok.getLocation(),
683
&EndLineInvalid);
684
if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
685
Diag(Tok.getLocation(),
686
diag::warn_doc_html_start_tag_expected_ident_or_greater)
687
<< HST->getSourceRange();
688
else {
689
Diag(Tok.getLocation(),
690
diag::warn_doc_html_start_tag_expected_ident_or_greater);
691
Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
692
<< HST->getSourceRange();
693
}
694
return HST;
695
}
696
}
697
}
698
699
HTMLEndTagComment *Parser::parseHTMLEndTag() {
700
assert(Tok.is(tok::html_end_tag));
701
Token TokEndTag = Tok;
702
consumeToken();
703
SourceLocation Loc;
704
if (Tok.is(tok::html_greater)) {
705
Loc = Tok.getLocation();
706
consumeToken();
707
}
708
709
return S.actOnHTMLEndTag(TokEndTag.getLocation(),
710
Loc,
711
TokEndTag.getHTMLTagEndName());
712
}
713
714
BlockContentComment *Parser::parseParagraphOrBlockCommand() {
715
SmallVector<InlineContentComment *, 8> Content;
716
717
while (true) {
718
switch (Tok.getKind()) {
719
case tok::verbatim_block_begin:
720
case tok::verbatim_line_name:
721
case tok::eof:
722
break; // Block content or EOF ahead, finish this parapgaph.
723
724
case tok::unknown_command:
725
Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
726
Tok.getEndLocation(),
727
Tok.getUnknownCommandName()));
728
consumeToken();
729
continue;
730
731
case tok::backslash_command:
732
case tok::at_command: {
733
const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
734
if (Info->IsBlockCommand) {
735
if (Content.size() == 0)
736
return parseBlockCommand();
737
break; // Block command ahead, finish this parapgaph.
738
}
739
if (Info->IsVerbatimBlockEndCommand) {
740
Diag(Tok.getLocation(),
741
diag::warn_verbatim_block_end_without_start)
742
<< Tok.is(tok::at_command)
743
<< Info->Name
744
<< SourceRange(Tok.getLocation(), Tok.getEndLocation());
745
consumeToken();
746
continue;
747
}
748
if (Info->IsUnknownCommand) {
749
Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
750
Tok.getEndLocation(),
751
Info->getID()));
752
consumeToken();
753
continue;
754
}
755
assert(Info->IsInlineCommand);
756
Content.push_back(parseInlineCommand());
757
continue;
758
}
759
760
case tok::newline: {
761
consumeToken();
762
if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
763
consumeToken();
764
break; // Two newlines -- end of paragraph.
765
}
766
// Also allow [tok::newline, tok::text, tok::newline] if the middle
767
// tok::text is just whitespace.
768
if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
769
Token WhitespaceTok = Tok;
770
consumeToken();
771
if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
772
consumeToken();
773
break;
774
}
775
// We have [tok::newline, tok::text, non-newline]. Put back tok::text.
776
putBack(WhitespaceTok);
777
}
778
if (Content.size() > 0)
779
Content.back()->addTrailingNewline();
780
continue;
781
}
782
783
// Don't deal with HTML tag soup now.
784
case tok::html_start_tag:
785
Content.push_back(parseHTMLStartTag());
786
continue;
787
788
case tok::html_end_tag:
789
Content.push_back(parseHTMLEndTag());
790
continue;
791
792
case tok::text:
793
Content.push_back(S.actOnText(Tok.getLocation(),
794
Tok.getEndLocation(),
795
Tok.getText()));
796
consumeToken();
797
continue;
798
799
case tok::verbatim_block_line:
800
case tok::verbatim_block_end:
801
case tok::verbatim_line_text:
802
case tok::html_ident:
803
case tok::html_equals:
804
case tok::html_quoted_string:
805
case tok::html_greater:
806
case tok::html_slash_greater:
807
llvm_unreachable("should not see this token");
808
}
809
break;
810
}
811
812
return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
813
}
814
815
VerbatimBlockComment *Parser::parseVerbatimBlock() {
816
assert(Tok.is(tok::verbatim_block_begin));
817
818
VerbatimBlockComment *VB =
819
S.actOnVerbatimBlockStart(Tok.getLocation(),
820
Tok.getVerbatimBlockID());
821
consumeToken();
822
823
// Don't create an empty line if verbatim opening command is followed
824
// by a newline.
825
if (Tok.is(tok::newline))
826
consumeToken();
827
828
SmallVector<VerbatimBlockLineComment *, 8> Lines;
829
while (Tok.is(tok::verbatim_block_line) ||
830
Tok.is(tok::newline)) {
831
VerbatimBlockLineComment *Line;
832
if (Tok.is(tok::verbatim_block_line)) {
833
Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
834
Tok.getVerbatimBlockText());
835
consumeToken();
836
if (Tok.is(tok::newline)) {
837
consumeToken();
838
}
839
} else {
840
// Empty line, just a tok::newline.
841
Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
842
consumeToken();
843
}
844
Lines.push_back(Line);
845
}
846
847
if (Tok.is(tok::verbatim_block_end)) {
848
const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
849
S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
850
S.copyArray(llvm::ArrayRef(Lines)));
851
consumeToken();
852
} else {
853
// Unterminated \\verbatim block
854
S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
855
S.copyArray(llvm::ArrayRef(Lines)));
856
}
857
858
return VB;
859
}
860
861
VerbatimLineComment *Parser::parseVerbatimLine() {
862
assert(Tok.is(tok::verbatim_line_name));
863
864
Token NameTok = Tok;
865
consumeToken();
866
867
SourceLocation TextBegin;
868
StringRef Text;
869
// Next token might not be a tok::verbatim_line_text if verbatim line
870
// starting command comes just before a newline or comment end.
871
if (Tok.is(tok::verbatim_line_text)) {
872
TextBegin = Tok.getLocation();
873
Text = Tok.getVerbatimLineText();
874
} else {
875
TextBegin = NameTok.getEndLocation();
876
Text = "";
877
}
878
879
VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
880
NameTok.getVerbatimLineID(),
881
TextBegin,
882
Text);
883
consumeToken();
884
return VL;
885
}
886
887
BlockContentComment *Parser::parseBlockContent() {
888
switch (Tok.getKind()) {
889
case tok::text:
890
case tok::unknown_command:
891
case tok::backslash_command:
892
case tok::at_command:
893
case tok::html_start_tag:
894
case tok::html_end_tag:
895
return parseParagraphOrBlockCommand();
896
897
case tok::verbatim_block_begin:
898
return parseVerbatimBlock();
899
900
case tok::verbatim_line_name:
901
return parseVerbatimLine();
902
903
case tok::eof:
904
case tok::newline:
905
case tok::verbatim_block_line:
906
case tok::verbatim_block_end:
907
case tok::verbatim_line_text:
908
case tok::html_ident:
909
case tok::html_equals:
910
case tok::html_quoted_string:
911
case tok::html_greater:
912
case tok::html_slash_greater:
913
llvm_unreachable("should not see this token");
914
}
915
llvm_unreachable("bogus token kind");
916
}
917
918
FullComment *Parser::parseFullComment() {
919
// Skip newlines at the beginning of the comment.
920
while (Tok.is(tok::newline))
921
consumeToken();
922
923
SmallVector<BlockContentComment *, 8> Blocks;
924
while (Tok.isNot(tok::eof)) {
925
Blocks.push_back(parseBlockContent());
926
927
// Skip extra newlines after paragraph end.
928
while (Tok.is(tok::newline))
929
consumeToken();
930
}
931
return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
932
}
933
934
} // end namespace comments
935
} // end namespace clang
936
937