Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
35233 views
1
//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Implement the Lexer for TableGen.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "TGLexer.h"
14
#include "llvm/ADT/ArrayRef.h"
15
#include "llvm/ADT/StringSwitch.h"
16
#include "llvm/ADT/Twine.h"
17
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
18
#include "llvm/Support/Compiler.h"
19
#include "llvm/Support/MemoryBuffer.h"
20
#include "llvm/Support/SourceMgr.h"
21
#include "llvm/TableGen/Error.h"
22
#include <algorithm>
23
#include <cctype>
24
#include <cerrno>
25
#include <cstdint>
26
#include <cstdio>
27
#include <cstdlib>
28
#include <cstring>
29
30
using namespace llvm;
31
32
namespace {
33
// A list of supported preprocessing directives with their
34
// internal token kinds and names.
35
struct {
36
tgtok::TokKind Kind;
37
const char *Word;
38
} PreprocessorDirs[] = {
39
{ tgtok::Ifdef, "ifdef" },
40
{ tgtok::Ifndef, "ifndef" },
41
{ tgtok::Else, "else" },
42
{ tgtok::Endif, "endif" },
43
{ tgtok::Define, "define" }
44
};
45
} // end anonymous namespace
46
47
TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
48
CurBuffer = SrcMgr.getMainFileID();
49
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
50
CurPtr = CurBuf.begin();
51
TokStart = nullptr;
52
53
// Pretend that we enter the "top-level" include file.
54
PrepIncludeStack.push_back(
55
std::make_unique<std::vector<PreprocessorControlDesc>>());
56
57
// Put all macros defined in the command line into the DefinedMacros set.
58
for (const std::string &MacroName : Macros)
59
DefinedMacros.insert(MacroName);
60
}
61
62
SMLoc TGLexer::getLoc() const {
63
return SMLoc::getFromPointer(TokStart);
64
}
65
66
SMRange TGLexer::getLocRange() const {
67
return {getLoc(), SMLoc::getFromPointer(CurPtr)};
68
}
69
70
/// ReturnError - Set the error to the specified string at the specified
71
/// location. This is defined to always return tgtok::Error.
72
tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
73
PrintError(Loc, Msg);
74
return tgtok::Error;
75
}
76
77
tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
78
return ReturnError(SMLoc::getFromPointer(Loc), Msg);
79
}
80
81
bool TGLexer::processEOF() {
82
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
83
if (ParentIncludeLoc != SMLoc()) {
84
// If prepExitInclude() detects a problem with the preprocessing
85
// control stack, it will return false. Pretend that we reached
86
// the final EOF and stop lexing more tokens by returning false
87
// to LexToken().
88
if (!prepExitInclude(false))
89
return false;
90
91
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
92
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
93
CurPtr = ParentIncludeLoc.getPointer();
94
// Make sure TokStart points into the parent file's buffer.
95
// LexToken() assigns to it before calling getNextChar(),
96
// so it is pointing into the included file now.
97
TokStart = CurPtr;
98
return true;
99
}
100
101
// Pretend that we exit the "top-level" include file.
102
// Note that in case of an error (e.g. control stack imbalance)
103
// the routine will issue a fatal error.
104
prepExitInclude(true);
105
return false;
106
}
107
108
int TGLexer::getNextChar() {
109
char CurChar = *CurPtr++;
110
switch (CurChar) {
111
default:
112
return (unsigned char)CurChar;
113
114
case 0: {
115
// A NUL character in the stream is either the end of the current buffer or
116
// a spurious NUL in the file. Disambiguate that here.
117
if (CurPtr - 1 == CurBuf.end()) {
118
--CurPtr; // Arrange for another call to return EOF again.
119
return EOF;
120
}
121
PrintError(getLoc(),
122
"NUL character is invalid in source; treated as space");
123
return ' ';
124
}
125
126
case '\n':
127
case '\r':
128
// Handle the newline character by ignoring it and incrementing the line
129
// count. However, be careful about 'dos style' files with \n\r in them.
130
// Only treat a \n\r or \r\n as a single line.
131
if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
132
*CurPtr != CurChar)
133
++CurPtr; // Eat the two char newline sequence.
134
return '\n';
135
}
136
}
137
138
int TGLexer::peekNextChar(int Index) const {
139
return *(CurPtr + Index);
140
}
141
142
tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
143
TokStart = CurPtr;
144
// This always consumes at least one character.
145
int CurChar = getNextChar();
146
147
switch (CurChar) {
148
default:
149
// Handle letters: [a-zA-Z_]
150
if (isalpha(CurChar) || CurChar == '_')
151
return LexIdentifier();
152
153
// Unknown character, emit an error.
154
return ReturnError(TokStart, "Unexpected character");
155
case EOF:
156
// Lex next token, if we just left an include file.
157
// Note that leaving an include file means that the next
158
// symbol is located at the end of the 'include "..."'
159
// construct, so LexToken() is called with default
160
// false parameter.
161
if (processEOF())
162
return LexToken();
163
164
// Return EOF denoting the end of lexing.
165
return tgtok::Eof;
166
167
case ':': return tgtok::colon;
168
case ';': return tgtok::semi;
169
case ',': return tgtok::comma;
170
case '<': return tgtok::less;
171
case '>': return tgtok::greater;
172
case ']': return tgtok::r_square;
173
case '{': return tgtok::l_brace;
174
case '}': return tgtok::r_brace;
175
case '(': return tgtok::l_paren;
176
case ')': return tgtok::r_paren;
177
case '=': return tgtok::equal;
178
case '?': return tgtok::question;
179
case '#':
180
if (FileOrLineStart) {
181
tgtok::TokKind Kind = prepIsDirective();
182
if (Kind != tgtok::Error)
183
return lexPreprocessor(Kind);
184
}
185
186
return tgtok::paste;
187
188
// The period is a separate case so we can recognize the "..."
189
// range punctuator.
190
case '.':
191
if (peekNextChar(0) == '.') {
192
++CurPtr; // Eat second dot.
193
if (peekNextChar(0) == '.') {
194
++CurPtr; // Eat third dot.
195
return tgtok::dotdotdot;
196
}
197
return ReturnError(TokStart, "Invalid '..' punctuation");
198
}
199
return tgtok::dot;
200
201
case '\r':
202
PrintFatalError("getNextChar() must never return '\r'");
203
return tgtok::Error;
204
205
case ' ':
206
case '\t':
207
// Ignore whitespace.
208
return LexToken(FileOrLineStart);
209
case '\n':
210
// Ignore whitespace, and identify the new line.
211
return LexToken(true);
212
case '/':
213
// If this is the start of a // comment, skip until the end of the line or
214
// the end of the buffer.
215
if (*CurPtr == '/')
216
SkipBCPLComment();
217
else if (*CurPtr == '*') {
218
if (SkipCComment())
219
return tgtok::Error;
220
} else // Otherwise, this is an error.
221
return ReturnError(TokStart, "Unexpected character");
222
return LexToken(FileOrLineStart);
223
case '-': case '+':
224
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
225
case '7': case '8': case '9': {
226
int NextChar = 0;
227
if (isdigit(CurChar)) {
228
// Allow identifiers to start with a number if it is followed by
229
// an identifier. This can happen with paste operations like
230
// foo#8i.
231
int i = 0;
232
do {
233
NextChar = peekNextChar(i++);
234
} while (isdigit(NextChar));
235
236
if (NextChar == 'x' || NextChar == 'b') {
237
// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
238
// likely a number.
239
int NextNextChar = peekNextChar(i);
240
switch (NextNextChar) {
241
default:
242
break;
243
case '0': case '1':
244
if (NextChar == 'b')
245
return LexNumber();
246
[[fallthrough]];
247
case '2': case '3': case '4': case '5':
248
case '6': case '7': case '8': case '9':
249
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
250
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
251
if (NextChar == 'x')
252
return LexNumber();
253
break;
254
}
255
}
256
}
257
258
if (isalpha(NextChar) || NextChar == '_')
259
return LexIdentifier();
260
261
return LexNumber();
262
}
263
case '"': return LexString();
264
case '$': return LexVarName();
265
case '[': return LexBracket();
266
case '!': return LexExclaim();
267
}
268
}
269
270
/// LexString - Lex "[^"]*"
271
tgtok::TokKind TGLexer::LexString() {
272
const char *StrStart = CurPtr;
273
274
CurStrVal = "";
275
276
while (*CurPtr != '"') {
277
// If we hit the end of the buffer, report an error.
278
if (*CurPtr == 0 && CurPtr == CurBuf.end())
279
return ReturnError(StrStart, "End of file in string literal");
280
281
if (*CurPtr == '\n' || *CurPtr == '\r')
282
return ReturnError(StrStart, "End of line in string literal");
283
284
if (*CurPtr != '\\') {
285
CurStrVal += *CurPtr++;
286
continue;
287
}
288
289
++CurPtr;
290
291
switch (*CurPtr) {
292
case '\\': case '\'': case '"':
293
// These turn into their literal character.
294
CurStrVal += *CurPtr++;
295
break;
296
case 't':
297
CurStrVal += '\t';
298
++CurPtr;
299
break;
300
case 'n':
301
CurStrVal += '\n';
302
++CurPtr;
303
break;
304
305
case '\n':
306
case '\r':
307
return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
308
309
// If we hit the end of the buffer, report an error.
310
case '\0':
311
if (CurPtr == CurBuf.end())
312
return ReturnError(StrStart, "End of file in string literal");
313
[[fallthrough]];
314
default:
315
return ReturnError(CurPtr, "invalid escape in string literal");
316
}
317
}
318
319
++CurPtr;
320
return tgtok::StrVal;
321
}
322
323
tgtok::TokKind TGLexer::LexVarName() {
324
if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
325
return ReturnError(TokStart, "Invalid variable name");
326
327
// Otherwise, we're ok, consume the rest of the characters.
328
const char *VarNameStart = CurPtr++;
329
330
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
331
++CurPtr;
332
333
CurStrVal.assign(VarNameStart, CurPtr);
334
return tgtok::VarName;
335
}
336
337
tgtok::TokKind TGLexer::LexIdentifier() {
338
// The first letter is [a-zA-Z_].
339
const char *IdentStart = TokStart;
340
341
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
342
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
343
++CurPtr;
344
345
// Check to see if this identifier is a reserved keyword.
346
StringRef Str(IdentStart, CurPtr-IdentStart);
347
348
tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
349
.Case("int", tgtok::Int)
350
.Case("bit", tgtok::Bit)
351
.Case("bits", tgtok::Bits)
352
.Case("string", tgtok::String)
353
.Case("list", tgtok::List)
354
.Case("code", tgtok::Code)
355
.Case("dag", tgtok::Dag)
356
.Case("class", tgtok::Class)
357
.Case("def", tgtok::Def)
358
.Case("true", tgtok::TrueVal)
359
.Case("false", tgtok::FalseVal)
360
.Case("foreach", tgtok::Foreach)
361
.Case("defm", tgtok::Defm)
362
.Case("defset", tgtok::Defset)
363
.Case("deftype", tgtok::Deftype)
364
.Case("multiclass", tgtok::MultiClass)
365
.Case("field", tgtok::Field)
366
.Case("let", tgtok::Let)
367
.Case("in", tgtok::In)
368
.Case("defvar", tgtok::Defvar)
369
.Case("include", tgtok::Include)
370
.Case("if", tgtok::If)
371
.Case("then", tgtok::Then)
372
.Case("else", tgtok::ElseKW)
373
.Case("assert", tgtok::Assert)
374
.Case("dump", tgtok::Dump)
375
.Default(tgtok::Id);
376
377
// A couple of tokens require special processing.
378
switch (Kind) {
379
case tgtok::Include:
380
if (LexInclude()) return tgtok::Error;
381
return Lex();
382
case tgtok::Id:
383
CurStrVal.assign(Str.begin(), Str.end());
384
break;
385
default:
386
break;
387
}
388
389
return Kind;
390
}
391
392
/// LexInclude - We just read the "include" token. Get the string token that
393
/// comes next and enter the include.
394
bool TGLexer::LexInclude() {
395
// The token after the include must be a string.
396
tgtok::TokKind Tok = LexToken();
397
if (Tok == tgtok::Error) return true;
398
if (Tok != tgtok::StrVal) {
399
PrintError(getLoc(), "Expected filename after include");
400
return true;
401
}
402
403
// Get the string.
404
std::string Filename = CurStrVal;
405
std::string IncludedFile;
406
407
CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
408
IncludedFile);
409
if (!CurBuffer) {
410
PrintError(getLoc(), "Could not find include file '" + Filename + "'");
411
return true;
412
}
413
414
Dependencies.insert(IncludedFile);
415
// Save the line number and lex buffer of the includer.
416
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
417
CurPtr = CurBuf.begin();
418
419
PrepIncludeStack.push_back(
420
std::make_unique<std::vector<PreprocessorControlDesc>>());
421
return false;
422
}
423
424
/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
425
/// Or we may end up at the end of the buffer.
426
void TGLexer::SkipBCPLComment() {
427
++CurPtr; // skip the second slash.
428
auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
429
CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
430
}
431
432
/// SkipCComment - This skips C-style /**/ comments. The only difference from C
433
/// is that we allow nesting.
434
bool TGLexer::SkipCComment() {
435
++CurPtr; // skip the star.
436
unsigned CommentDepth = 1;
437
438
while (true) {
439
int CurChar = getNextChar();
440
switch (CurChar) {
441
case EOF:
442
PrintError(TokStart, "Unterminated comment!");
443
return true;
444
case '*':
445
// End of the comment?
446
if (CurPtr[0] != '/') break;
447
448
++CurPtr; // End the */.
449
if (--CommentDepth == 0)
450
return false;
451
break;
452
case '/':
453
// Start of a nested comment?
454
if (CurPtr[0] != '*') break;
455
++CurPtr;
456
++CommentDepth;
457
break;
458
}
459
}
460
}
461
462
/// LexNumber - Lex:
463
/// [-+]?[0-9]+
464
/// 0x[0-9a-fA-F]+
465
/// 0b[01]+
466
tgtok::TokKind TGLexer::LexNumber() {
467
unsigned Base = 0;
468
const char *NumStart;
469
470
// Check if it's a hex or a binary value.
471
if (CurPtr[-1] == '0') {
472
NumStart = CurPtr + 1;
473
if (CurPtr[0] == 'x') {
474
Base = 16;
475
do
476
++CurPtr;
477
while (isxdigit(CurPtr[0]));
478
} else if (CurPtr[0] == 'b') {
479
Base = 2;
480
do
481
++CurPtr;
482
while (CurPtr[0] == '0' || CurPtr[0] == '1');
483
}
484
}
485
486
// For a hex or binary value, we always convert it to an unsigned value.
487
bool IsMinus = false;
488
489
// Check if it's a decimal value.
490
if (Base == 0) {
491
// Check for a sign without a digit.
492
if (!isdigit(CurPtr[0])) {
493
if (CurPtr[-1] == '-')
494
return tgtok::minus;
495
else if (CurPtr[-1] == '+')
496
return tgtok::plus;
497
}
498
499
Base = 10;
500
NumStart = TokStart;
501
IsMinus = CurPtr[-1] == '-';
502
503
while (isdigit(CurPtr[0]))
504
++CurPtr;
505
}
506
507
// Requires at least one digit.
508
if (CurPtr == NumStart)
509
return ReturnError(TokStart, "Invalid number");
510
511
errno = 0;
512
if (IsMinus)
513
CurIntVal = strtoll(NumStart, nullptr, Base);
514
else
515
CurIntVal = strtoull(NumStart, nullptr, Base);
516
517
if (errno == EINVAL)
518
return ReturnError(TokStart, "Invalid number");
519
if (errno == ERANGE)
520
return ReturnError(TokStart, "Number out of range");
521
522
return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;
523
}
524
525
/// LexBracket - We just read '['. If this is a code block, return it,
526
/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
527
tgtok::TokKind TGLexer::LexBracket() {
528
if (CurPtr[0] != '{')
529
return tgtok::l_square;
530
++CurPtr;
531
const char *CodeStart = CurPtr;
532
while (true) {
533
int Char = getNextChar();
534
if (Char == EOF) break;
535
536
if (Char != '}') continue;
537
538
Char = getNextChar();
539
if (Char == EOF) break;
540
if (Char == ']') {
541
CurStrVal.assign(CodeStart, CurPtr-2);
542
return tgtok::CodeFragment;
543
}
544
}
545
546
return ReturnError(CodeStart - 2, "Unterminated code block");
547
}
548
549
/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
550
tgtok::TokKind TGLexer::LexExclaim() {
551
if (!isalpha(*CurPtr))
552
return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
553
554
const char *Start = CurPtr++;
555
while (isalpha(*CurPtr))
556
++CurPtr;
557
558
// Check to see which operator this is.
559
tgtok::TokKind Kind =
560
StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
561
.Case("eq", tgtok::XEq)
562
.Case("ne", tgtok::XNe)
563
.Case("le", tgtok::XLe)
564
.Case("lt", tgtok::XLt)
565
.Case("ge", tgtok::XGe)
566
.Case("gt", tgtok::XGt)
567
.Case("if", tgtok::XIf)
568
.Case("cond", tgtok::XCond)
569
.Case("isa", tgtok::XIsA)
570
.Case("head", tgtok::XHead)
571
.Case("tail", tgtok::XTail)
572
.Case("size", tgtok::XSize)
573
.Case("con", tgtok::XConcat)
574
.Case("dag", tgtok::XDag)
575
.Case("add", tgtok::XADD)
576
.Case("sub", tgtok::XSUB)
577
.Case("mul", tgtok::XMUL)
578
.Case("div", tgtok::XDIV)
579
.Case("not", tgtok::XNOT)
580
.Case("logtwo", tgtok::XLOG2)
581
.Case("and", tgtok::XAND)
582
.Case("or", tgtok::XOR)
583
.Case("xor", tgtok::XXOR)
584
.Case("shl", tgtok::XSHL)
585
.Case("sra", tgtok::XSRA)
586
.Case("srl", tgtok::XSRL)
587
.Case("cast", tgtok::XCast)
588
.Case("empty", tgtok::XEmpty)
589
.Case("subst", tgtok::XSubst)
590
.Case("foldl", tgtok::XFoldl)
591
.Case("foreach", tgtok::XForEach)
592
.Case("filter", tgtok::XFilter)
593
.Case("listconcat", tgtok::XListConcat)
594
.Case("listsplat", tgtok::XListSplat)
595
.Case("listremove", tgtok::XListRemove)
596
.Case("range", tgtok::XRange)
597
.Case("strconcat", tgtok::XStrConcat)
598
.Case("interleave", tgtok::XInterleave)
599
.Case("substr", tgtok::XSubstr)
600
.Case("find", tgtok::XFind)
601
.Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
602
.Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
603
.Case("getdagarg", tgtok::XGetDagArg)
604
.Case("getdagname", tgtok::XGetDagName)
605
.Case("setdagarg", tgtok::XSetDagArg)
606
.Case("setdagname", tgtok::XSetDagName)
607
.Case("exists", tgtok::XExists)
608
.Case("tolower", tgtok::XToLower)
609
.Case("toupper", tgtok::XToUpper)
610
.Case("repr", tgtok::XRepr)
611
.Default(tgtok::Error);
612
613
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
614
}
615
616
bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
617
// Report an error, if preprocessor control stack for the current
618
// file is not empty.
619
if (!PrepIncludeStack.back()->empty()) {
620
prepReportPreprocessorStackError();
621
622
return false;
623
}
624
625
// Pop the preprocessing controls from the include stack.
626
if (PrepIncludeStack.empty()) {
627
PrintFatalError("Preprocessor include stack is empty");
628
}
629
630
PrepIncludeStack.pop_back();
631
632
if (IncludeStackMustBeEmpty) {
633
if (!PrepIncludeStack.empty())
634
PrintFatalError("Preprocessor include stack is not empty");
635
} else {
636
if (PrepIncludeStack.empty())
637
PrintFatalError("Preprocessor include stack is empty");
638
}
639
640
return true;
641
}
642
643
tgtok::TokKind TGLexer::prepIsDirective() const {
644
for (const auto &PD : PreprocessorDirs) {
645
int NextChar = *CurPtr;
646
bool Match = true;
647
unsigned I = 0;
648
for (; I < strlen(PD.Word); ++I) {
649
if (NextChar != PD.Word[I]) {
650
Match = false;
651
break;
652
}
653
654
NextChar = peekNextChar(I + 1);
655
}
656
657
// Check for whitespace after the directive. If there is no whitespace,
658
// then we do not recognize it as a preprocessing directive.
659
if (Match) {
660
tgtok::TokKind Kind = PD.Kind;
661
662
// New line and EOF may follow only #else/#endif. It will be reported
663
// as an error for #ifdef/#define after the call to prepLexMacroName().
664
if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
665
NextChar == '\n' ||
666
// It looks like TableGen does not support '\r' as the actual
667
// carriage return, e.g. getNextChar() treats a single '\r'
668
// as '\n'. So we do the same here.
669
NextChar == '\r')
670
return Kind;
671
672
// Allow comments after some directives, e.g.:
673
// #else// OR #else/**/
674
// #endif// OR #endif/**/
675
//
676
// Note that we do allow comments after #ifdef/#define here, e.g.
677
// #ifdef/**/ AND #ifdef//
678
// #define/**/ AND #define//
679
//
680
// These cases will be reported as incorrect after calling
681
// prepLexMacroName(). We could have supported C-style comments
682
// after #ifdef/#define, but this would complicate the code
683
// for little benefit.
684
if (NextChar == '/') {
685
NextChar = peekNextChar(I + 1);
686
687
if (NextChar == '*' || NextChar == '/')
688
return Kind;
689
690
// Pretend that we do not recognize the directive.
691
}
692
}
693
}
694
695
return tgtok::Error;
696
}
697
698
bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
699
TokStart = CurPtr;
700
701
for (const auto &PD : PreprocessorDirs)
702
if (PD.Kind == Kind) {
703
// Advance CurPtr to the end of the preprocessing word.
704
CurPtr += strlen(PD.Word);
705
return true;
706
}
707
708
PrintFatalError("Unsupported preprocessing token in "
709
"prepEatPreprocessorDirective()");
710
return false;
711
}
712
713
tgtok::TokKind TGLexer::lexPreprocessor(
714
tgtok::TokKind Kind, bool ReturnNextLiveToken) {
715
716
// We must be looking at a preprocessing directive. Eat it!
717
if (!prepEatPreprocessorDirective(Kind))
718
PrintFatalError("lexPreprocessor() called for unknown "
719
"preprocessor directive");
720
721
if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
722
StringRef MacroName = prepLexMacroName();
723
StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
724
if (MacroName.empty())
725
return ReturnError(TokStart, "Expected macro name after " + IfTokName);
726
727
bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
728
729
// Canonicalize ifndef's MacroIsDefined to its ifdef equivalent.
730
if (Kind == tgtok::Ifndef)
731
MacroIsDefined = !MacroIsDefined;
732
733
// Regardless of whether we are processing tokens or not,
734
// we put the #ifdef control on stack.
735
// Note that MacroIsDefined has been canonicalized against ifdef.
736
PrepIncludeStack.back()->push_back(
737
{tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
738
739
if (!prepSkipDirectiveEnd())
740
return ReturnError(CurPtr, "Only comments are supported after " +
741
IfTokName + " NAME");
742
743
// If we were not processing tokens before this #ifdef,
744
// then just return back to the lines skipping code.
745
if (!ReturnNextLiveToken)
746
return Kind;
747
748
// If we were processing tokens before this #ifdef,
749
// and the macro is defined, then just return the next token.
750
if (MacroIsDefined)
751
return LexToken();
752
753
// We were processing tokens before this #ifdef, and the macro
754
// is not defined, so we have to start skipping the lines.
755
// If the skipping is successful, it will return the token following
756
// either #else or #endif corresponding to this #ifdef.
757
if (prepSkipRegion(ReturnNextLiveToken))
758
return LexToken();
759
760
return tgtok::Error;
761
} else if (Kind == tgtok::Else) {
762
// Check if this #else is correct before calling prepSkipDirectiveEnd(),
763
// which will move CurPtr away from the beginning of #else.
764
if (PrepIncludeStack.back()->empty())
765
return ReturnError(TokStart, "#else without #ifdef or #ifndef");
766
767
PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
768
769
if (IfdefEntry.Kind != tgtok::Ifdef) {
770
PrintError(TokStart, "double #else");
771
return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
772
}
773
774
// Replace the corresponding #ifdef's control with its negation
775
// on the control stack.
776
PrepIncludeStack.back()->pop_back();
777
PrepIncludeStack.back()->push_back(
778
{Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
779
780
if (!prepSkipDirectiveEnd())
781
return ReturnError(CurPtr, "Only comments are supported after #else");
782
783
// If we were processing tokens before this #else,
784
// we have to start skipping lines until the matching #endif.
785
if (ReturnNextLiveToken) {
786
if (prepSkipRegion(ReturnNextLiveToken))
787
return LexToken();
788
789
return tgtok::Error;
790
}
791
792
// Return to the lines skipping code.
793
return Kind;
794
} else if (Kind == tgtok::Endif) {
795
// Check if this #endif is correct before calling prepSkipDirectiveEnd(),
796
// which will move CurPtr away from the beginning of #endif.
797
if (PrepIncludeStack.back()->empty())
798
return ReturnError(TokStart, "#endif without #ifdef");
799
800
auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
801
802
if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
803
IfdefOrElseEntry.Kind != tgtok::Else) {
804
PrintFatalError("Invalid preprocessor control on the stack");
805
return tgtok::Error;
806
}
807
808
if (!prepSkipDirectiveEnd())
809
return ReturnError(CurPtr, "Only comments are supported after #endif");
810
811
PrepIncludeStack.back()->pop_back();
812
813
// If we were processing tokens before this #endif, then
814
// we should continue it.
815
if (ReturnNextLiveToken) {
816
return LexToken();
817
}
818
819
// Return to the lines skipping code.
820
return Kind;
821
} else if (Kind == tgtok::Define) {
822
StringRef MacroName = prepLexMacroName();
823
if (MacroName.empty())
824
return ReturnError(TokStart, "Expected macro name after #define");
825
826
if (!DefinedMacros.insert(MacroName).second)
827
PrintWarning(getLoc(),
828
"Duplicate definition of macro: " + Twine(MacroName));
829
830
if (!prepSkipDirectiveEnd())
831
return ReturnError(CurPtr,
832
"Only comments are supported after #define NAME");
833
834
if (!ReturnNextLiveToken) {
835
PrintFatalError("#define must be ignored during the lines skipping");
836
return tgtok::Error;
837
}
838
839
return LexToken();
840
}
841
842
PrintFatalError("Preprocessing directive is not supported");
843
return tgtok::Error;
844
}
845
846
bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
847
if (!MustNeverBeFalse)
848
PrintFatalError("Invalid recursion.");
849
850
do {
851
// Skip all symbols to the line end.
852
while (*CurPtr != '\n')
853
++CurPtr;
854
855
// Find the first non-whitespace symbol in the next line(s).
856
if (!prepSkipLineBegin())
857
return false;
858
859
// If the first non-blank/comment symbol on the line is '#',
860
// it may be a start of preprocessing directive.
861
//
862
// If it is not '#' just go to the next line.
863
if (*CurPtr == '#')
864
++CurPtr;
865
else
866
continue;
867
868
tgtok::TokKind Kind = prepIsDirective();
869
870
// If we did not find a preprocessing directive or it is #define,
871
// then just skip to the next line. We do not have to do anything
872
// for #define in the line-skipping mode.
873
if (Kind == tgtok::Error || Kind == tgtok::Define)
874
continue;
875
876
tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
877
878
// If lexPreprocessor() encountered an error during lexing this
879
// preprocessor idiom, then return false to the calling lexPreprocessor().
880
// This will force tgtok::Error to be returned to the tokens processing.
881
if (ProcessedKind == tgtok::Error)
882
return false;
883
884
if (Kind != ProcessedKind)
885
PrintFatalError("prepIsDirective() and lexPreprocessor() "
886
"returned different token kinds");
887
888
// If this preprocessing directive enables tokens processing,
889
// then return to the lexPreprocessor() and get to the next token.
890
// We can move from line-skipping mode to processing tokens only
891
// due to #else or #endif.
892
if (prepIsProcessingEnabled()) {
893
if (Kind != tgtok::Else && Kind != tgtok::Endif) {
894
PrintFatalError("Tokens processing was enabled by an unexpected "
895
"preprocessing directive");
896
return false;
897
}
898
899
return true;
900
}
901
} while (CurPtr != CurBuf.end());
902
903
// We have reached the end of the file, but never left the lines-skipping
904
// mode. This means there is no matching #endif.
905
prepReportPreprocessorStackError();
906
return false;
907
}
908
909
StringRef TGLexer::prepLexMacroName() {
910
// Skip whitespaces between the preprocessing directive and the macro name.
911
while (*CurPtr == ' ' || *CurPtr == '\t')
912
++CurPtr;
913
914
TokStart = CurPtr;
915
// Macro names start with [a-zA-Z_].
916
if (*CurPtr != '_' && !isalpha(*CurPtr))
917
return "";
918
919
// Match the rest of the identifier regex: [0-9a-zA-Z_]*
920
while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
921
++CurPtr;
922
923
return StringRef(TokStart, CurPtr - TokStart);
924
}
925
926
bool TGLexer::prepSkipLineBegin() {
927
while (CurPtr != CurBuf.end()) {
928
switch (*CurPtr) {
929
case ' ':
930
case '\t':
931
case '\n':
932
case '\r':
933
break;
934
935
case '/': {
936
int NextChar = peekNextChar(1);
937
if (NextChar == '*') {
938
// Skip C-style comment.
939
// Note that we do not care about skipping the C++-style comments.
940
// If the line contains "//", it may not contain any processable
941
// preprocessing directive. Just return CurPtr pointing to
942
// the first '/' in this case. We also do not care about
943
// incorrect symbols after the first '/' - we are in lines-skipping
944
// mode, so incorrect code is allowed to some extent.
945
946
// Set TokStart to the beginning of the comment to enable proper
947
// diagnostic printing in case of error in SkipCComment().
948
TokStart = CurPtr;
949
950
// CurPtr must point to '*' before call to SkipCComment().
951
++CurPtr;
952
if (SkipCComment())
953
return false;
954
} else {
955
// CurPtr points to the non-whitespace '/'.
956
return true;
957
}
958
959
// We must not increment CurPtr after the comment was lexed.
960
continue;
961
}
962
963
default:
964
return true;
965
}
966
967
++CurPtr;
968
}
969
970
// We have reached the end of the file. Return to the lines skipping
971
// code, and allow it to handle the EOF as needed.
972
return true;
973
}
974
975
bool TGLexer::prepSkipDirectiveEnd() {
976
while (CurPtr != CurBuf.end()) {
977
switch (*CurPtr) {
978
case ' ':
979
case '\t':
980
break;
981
982
case '\n':
983
case '\r':
984
return true;
985
986
case '/': {
987
int NextChar = peekNextChar(1);
988
if (NextChar == '/') {
989
// Skip C++-style comment.
990
// We may just return true now, but let's skip to the line/buffer end
991
// to simplify the method specification.
992
++CurPtr;
993
SkipBCPLComment();
994
} else if (NextChar == '*') {
995
// When we are skipping C-style comment at the end of a preprocessing
996
// directive, we can skip several lines. If any meaningful TD token
997
// follows the end of the C-style comment on the same line, it will
998
// be considered as an invalid usage of TD token.
999
// For example, we want to forbid usages like this one:
1000
// #define MACRO class Class {}
1001
// But with C-style comments we also disallow the following:
1002
// #define MACRO /* This macro is used
1003
// to ... */ class Class {}
1004
// One can argue that this should be allowed, but it does not seem
1005
// to be worth of the complication. Moreover, this matches
1006
// the C preprocessor behavior.
1007
1008
// Set TokStart to the beginning of the comment to enable proper
1009
// diagnostic printer in case of error in SkipCComment().
1010
TokStart = CurPtr;
1011
++CurPtr;
1012
if (SkipCComment())
1013
return false;
1014
} else {
1015
TokStart = CurPtr;
1016
PrintError(CurPtr, "Unexpected character");
1017
return false;
1018
}
1019
1020
// We must not increment CurPtr after the comment was lexed.
1021
continue;
1022
}
1023
1024
default:
1025
// Do not allow any non-whitespaces after the directive.
1026
TokStart = CurPtr;
1027
return false;
1028
}
1029
1030
++CurPtr;
1031
}
1032
1033
return true;
1034
}
1035
1036
bool TGLexer::prepIsProcessingEnabled() {
1037
for (const PreprocessorControlDesc &I :
1038
llvm::reverse(*PrepIncludeStack.back()))
1039
if (!I.IsDefined)
1040
return false;
1041
1042
return true;
1043
}
1044
1045
void TGLexer::prepReportPreprocessorStackError() {
1046
if (PrepIncludeStack.back()->empty())
1047
PrintFatalError("prepReportPreprocessorStackError() called with "
1048
"empty control stack");
1049
1050
auto &PrepControl = PrepIncludeStack.back()->back();
1051
PrintError(CurBuf.end(), "Reached EOF without matching #endif");
1052
PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
1053
1054
TokStart = CurPtr;
1055
}
1056
1057