CoCalc -- TGLexer.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
³⁵²³³ views
1
//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Implement the Lexer for TableGen.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "TGLexer.h"
14
#include "llvm/ADT/ArrayRef.h"
15
#include "llvm/ADT/StringSwitch.h"
16
#include "llvm/ADT/Twine.h"
17
#include "llvm/Config/config.h" // for strtoull()/strtoll() define
18
#include "llvm/Support/Compiler.h"
19
#include "llvm/Support/MemoryBuffer.h"
20
#include "llvm/Support/SourceMgr.h"
21
#include "llvm/TableGen/Error.h"
22
#include <algorithm>
23
#include <cctype>
24
#include <cerrno>
25
#include <cstdint>
26
#include <cstdio>
27
#include <cstdlib>
28
#include <cstring>
29

30
using namespace llvm;
31

32
namespace {
33
// A list of supported preprocessing directives with their
34
// internal token kinds and names.
35
struct {
36
  tgtok::TokKind Kind;
37
  const char *Word;
38
} PreprocessorDirs[] = {
39
  { tgtok::Ifdef, "ifdef" },
40
  { tgtok::Ifndef, "ifndef" },
41
  { tgtok::Else, "else" },
42
  { tgtok::Endif, "endif" },
43
  { tgtok::Define, "define" }
44
};
45
} // end anonymous namespace
46

47
TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
48
  CurBuffer = SrcMgr.getMainFileID();
49
  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
50
  CurPtr = CurBuf.begin();
51
  TokStart = nullptr;
52

53
  // Pretend that we enter the "top-level" include file.
54
  PrepIncludeStack.push_back(
55
      std::make_unique<std::vector<PreprocessorControlDesc>>());
56

57
  // Put all macros defined in the command line into the DefinedMacros set.
58
  for (const std::string &MacroName : Macros)
59
    DefinedMacros.insert(MacroName);
60
}
61

62
SMLoc TGLexer::getLoc() const {
63
  return SMLoc::getFromPointer(TokStart);
64
}
65

66
SMRange TGLexer::getLocRange() const {
67
  return {getLoc(), SMLoc::getFromPointer(CurPtr)};
68
}
69

70
/// ReturnError - Set the error to the specified string at the specified
71
/// location.  This is defined to always return tgtok::Error.
72
tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
73
  PrintError(Loc, Msg);
74
  return tgtok::Error;
75
}
76

77
tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
78
  return ReturnError(SMLoc::getFromPointer(Loc), Msg);
79
}
80

81
bool TGLexer::processEOF() {
82
  SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
83
  if (ParentIncludeLoc != SMLoc()) {
84
    // If prepExitInclude() detects a problem with the preprocessing
85
    // control stack, it will return false.  Pretend that we reached
86
    // the final EOF and stop lexing more tokens by returning false
87
    // to LexToken().
88
    if (!prepExitInclude(false))
89
      return false;
90

91
    CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
92
    CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
93
    CurPtr = ParentIncludeLoc.getPointer();
94
    // Make sure TokStart points into the parent file's buffer.
95
    // LexToken() assigns to it before calling getNextChar(),
96
    // so it is pointing into the included file now.
97
    TokStart = CurPtr;
98
    return true;
99
  }
100

101
  // Pretend that we exit the "top-level" include file.
102
  // Note that in case of an error (e.g. control stack imbalance)
103
  // the routine will issue a fatal error.
104
  prepExitInclude(true);
105
  return false;
106
}
107

108
int TGLexer::getNextChar() {
109
  char CurChar = *CurPtr++;
110
  switch (CurChar) {
111
  default:
112
    return (unsigned char)CurChar;
113

114
  case 0: {
115
    // A NUL character in the stream is either the end of the current buffer or
116
    // a spurious NUL in the file.  Disambiguate that here.
117
    if (CurPtr - 1 == CurBuf.end()) {
118
      --CurPtr; // Arrange for another call to return EOF again.
119
      return EOF;
120
    }
121
    PrintError(getLoc(),
122
               "NUL character is invalid in source; treated as space");
123
    return ' ';
124
  }
125

126
  case '\n':
127
  case '\r':
128
    // Handle the newline character by ignoring it and incrementing the line
129
    // count.  However, be careful about 'dos style' files with \n\r in them.
130
    // Only treat a \n\r or \r\n as a single line.
131
    if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
132
        *CurPtr != CurChar)
133
      ++CurPtr;  // Eat the two char newline sequence.
134
    return '\n';
135
  }
136
}
137

138
int TGLexer::peekNextChar(int Index) const {
139
  return *(CurPtr + Index);
140
}
141

142
tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
143
  TokStart = CurPtr;
144
  // This always consumes at least one character.
145
  int CurChar = getNextChar();
146

147
  switch (CurChar) {
148
  default:
149
    // Handle letters: [a-zA-Z_]
150
    if (isalpha(CurChar) || CurChar == '_')
151
      return LexIdentifier();
152

153
    // Unknown character, emit an error.
154
    return ReturnError(TokStart, "Unexpected character");
155
  case EOF:
156
    // Lex next token, if we just left an include file.
157
    // Note that leaving an include file means that the next
158
    // symbol is located at the end of the 'include "..."'
159
    // construct, so LexToken() is called with default
160
    // false parameter.
161
    if (processEOF())
162
      return LexToken();
163

164
    // Return EOF denoting the end of lexing.
165
    return tgtok::Eof;
166

167
  case ':': return tgtok::colon;
168
  case ';': return tgtok::semi;
169
  case ',': return tgtok::comma;
170
  case '<': return tgtok::less;
171
  case '>': return tgtok::greater;
172
  case ']': return tgtok::r_square;
173
  case '{': return tgtok::l_brace;
174
  case '}': return tgtok::r_brace;
175
  case '(': return tgtok::l_paren;
176
  case ')': return tgtok::r_paren;
177
  case '=': return tgtok::equal;
178
  case '?': return tgtok::question;
179
  case '#':
180
    if (FileOrLineStart) {
181
      tgtok::TokKind Kind = prepIsDirective();
182
      if (Kind != tgtok::Error)
183
        return lexPreprocessor(Kind);
184
    }
185

186
    return tgtok::paste;
187

188
  // The period is a separate case so we can recognize the "..."
189
  // range punctuator.
190
  case '.':
191
    if (peekNextChar(0) == '.') {
192
      ++CurPtr; // Eat second dot.
193
      if (peekNextChar(0) == '.') {
194
        ++CurPtr; // Eat third dot.
195
        return tgtok::dotdotdot;
196
      }
197
      return ReturnError(TokStart, "Invalid '..' punctuation");
198
    }
199
    return tgtok::dot;
200

201
  case '\r':
202
    PrintFatalError("getNextChar() must never return '\r'");
203
    return tgtok::Error;
204

205
  case ' ':
206
  case '\t':
207
    // Ignore whitespace.
208
    return LexToken(FileOrLineStart);
209
  case '\n':
210
    // Ignore whitespace, and identify the new line.
211
    return LexToken(true);
212
  case '/':
213
    // If this is the start of a // comment, skip until the end of the line or
214
    // the end of the buffer.
215
    if (*CurPtr == '/')
216
      SkipBCPLComment();
217
    else if (*CurPtr == '*') {
218
      if (SkipCComment())
219
        return tgtok::Error;
220
    } else // Otherwise, this is an error.
221
      return ReturnError(TokStart, "Unexpected character");
222
    return LexToken(FileOrLineStart);
223
  case '-': case '+':
224
  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
225
  case '7': case '8': case '9': {
226
    int NextChar = 0;
227
    if (isdigit(CurChar)) {
228
      // Allow identifiers to start with a number if it is followed by
229
      // an identifier.  This can happen with paste operations like
230
      // foo#8i.
231
      int i = 0;
232
      do {
233
        NextChar = peekNextChar(i++);
234
      } while (isdigit(NextChar));
235

236
      if (NextChar == 'x' || NextChar == 'b') {
237
        // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
238
        // likely a number.
239
        int NextNextChar = peekNextChar(i);
240
        switch (NextNextChar) {
241
        default:
242
          break;
243
        case '0': case '1':
244
          if (NextChar == 'b')
245
            return LexNumber();
246
          [[fallthrough]];
247
        case '2': case '3': case '4': case '5':
248
        case '6': case '7': case '8': case '9':
249
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
250
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
251
          if (NextChar == 'x')
252
            return LexNumber();
253
          break;
254
        }
255
      }
256
    }
257

258
    if (isalpha(NextChar) || NextChar == '_')
259
      return LexIdentifier();
260

261
    return LexNumber();
262
  }
263
  case '"': return LexString();
264
  case '$': return LexVarName();
265
  case '[': return LexBracket();
266
  case '!': return LexExclaim();
267
  }
268
}
269

270
/// LexString - Lex "[^"]*"
271
tgtok::TokKind TGLexer::LexString() {
272
  const char *StrStart = CurPtr;
273

274
  CurStrVal = "";
275

276
  while (*CurPtr != '"') {
277
    // If we hit the end of the buffer, report an error.
278
    if (*CurPtr == 0 && CurPtr == CurBuf.end())
279
      return ReturnError(StrStart, "End of file in string literal");
280

281
    if (*CurPtr == '\n' || *CurPtr == '\r')
282
      return ReturnError(StrStart, "End of line in string literal");
283

284
    if (*CurPtr != '\\') {
285
      CurStrVal += *CurPtr++;
286
      continue;
287
    }
288

289
    ++CurPtr;
290

291
    switch (*CurPtr) {
292
    case '\\': case '\'': case '"':
293
      // These turn into their literal character.
294
      CurStrVal += *CurPtr++;
295
      break;
296
    case 't':
297
      CurStrVal += '\t';
298
      ++CurPtr;
299
      break;
300
    case 'n':
301
      CurStrVal += '\n';
302
      ++CurPtr;
303
      break;
304

305
    case '\n':
306
    case '\r':
307
      return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
308

309
    // If we hit the end of the buffer, report an error.
310
    case '\0':
311
      if (CurPtr == CurBuf.end())
312
        return ReturnError(StrStart, "End of file in string literal");
313
      [[fallthrough]];
314
    default:
315
      return ReturnError(CurPtr, "invalid escape in string literal");
316
    }
317
  }
318

319
  ++CurPtr;
320
  return tgtok::StrVal;
321
}
322

323
tgtok::TokKind TGLexer::LexVarName() {
324
  if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
325
    return ReturnError(TokStart, "Invalid variable name");
326

327
  // Otherwise, we're ok, consume the rest of the characters.
328
  const char *VarNameStart = CurPtr++;
329

330
  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
331
    ++CurPtr;
332

333
  CurStrVal.assign(VarNameStart, CurPtr);
334
  return tgtok::VarName;
335
}
336

337
tgtok::TokKind TGLexer::LexIdentifier() {
338
  // The first letter is [a-zA-Z_].
339
  const char *IdentStart = TokStart;
340

341
  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
342
  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
343
    ++CurPtr;
344

345
  // Check to see if this identifier is a reserved keyword.
346
  StringRef Str(IdentStart, CurPtr-IdentStart);
347

348
  tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
349
                            .Case("int", tgtok::Int)
350
                            .Case("bit", tgtok::Bit)
351
                            .Case("bits", tgtok::Bits)
352
                            .Case("string", tgtok::String)
353
                            .Case("list", tgtok::List)
354
                            .Case("code", tgtok::Code)
355
                            .Case("dag", tgtok::Dag)
356
                            .Case("class", tgtok::Class)
357
                            .Case("def", tgtok::Def)
358
                            .Case("true", tgtok::TrueVal)
359
                            .Case("false", tgtok::FalseVal)
360
                            .Case("foreach", tgtok::Foreach)
361
                            .Case("defm", tgtok::Defm)
362
                            .Case("defset", tgtok::Defset)
363
                            .Case("deftype", tgtok::Deftype)
364
                            .Case("multiclass", tgtok::MultiClass)
365
                            .Case("field", tgtok::Field)
366
                            .Case("let", tgtok::Let)
367
                            .Case("in", tgtok::In)
368
                            .Case("defvar", tgtok::Defvar)
369
                            .Case("include", tgtok::Include)
370
                            .Case("if", tgtok::If)
371
                            .Case("then", tgtok::Then)
372
                            .Case("else", tgtok::ElseKW)
373
                            .Case("assert", tgtok::Assert)
374
                            .Case("dump", tgtok::Dump)
375
                            .Default(tgtok::Id);
376

377
  // A couple of tokens require special processing.
378
  switch (Kind) {
379
    case tgtok::Include:
380
      if (LexInclude()) return tgtok::Error;
381
      return Lex();
382
    case tgtok::Id:
383
      CurStrVal.assign(Str.begin(), Str.end());
384
      break;
385
    default:
386
      break;
387
  }
388

389
  return Kind;
390
}
391

392
/// LexInclude - We just read the "include" token.  Get the string token that
393
/// comes next and enter the include.
394
bool TGLexer::LexInclude() {
395
  // The token after the include must be a string.
396
  tgtok::TokKind Tok = LexToken();
397
  if (Tok == tgtok::Error) return true;
398
  if (Tok != tgtok::StrVal) {
399
    PrintError(getLoc(), "Expected filename after include");
400
    return true;
401
  }
402

403
  // Get the string.
404
  std::string Filename = CurStrVal;
405
  std::string IncludedFile;
406

407
  CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
408
                                    IncludedFile);
409
  if (!CurBuffer) {
410
    PrintError(getLoc(), "Could not find include file '" + Filename + "'");
411
    return true;
412
  }
413

414
  Dependencies.insert(IncludedFile);
415
  // Save the line number and lex buffer of the includer.
416
  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
417
  CurPtr = CurBuf.begin();
418

419
  PrepIncludeStack.push_back(
420
      std::make_unique<std::vector<PreprocessorControlDesc>>());
421
  return false;
422
}
423

424
/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.
425
/// Or we may end up at the end of the buffer.
426
void TGLexer::SkipBCPLComment() {
427
  ++CurPtr;  // skip the second slash.
428
  auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
429
  CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
430
}
431

432
/// SkipCComment - This skips C-style /**/ comments.  The only difference from C
433
/// is that we allow nesting.
434
bool TGLexer::SkipCComment() {
435
  ++CurPtr;  // skip the star.
436
  unsigned CommentDepth = 1;
437

438
  while (true) {
439
    int CurChar = getNextChar();
440
    switch (CurChar) {
441
    case EOF:
442
      PrintError(TokStart, "Unterminated comment!");
443
      return true;
444
    case '*':
445
      // End of the comment?
446
      if (CurPtr[0] != '/') break;
447

448
      ++CurPtr;   // End the */.
449
      if (--CommentDepth == 0)
450
        return false;
451
      break;
452
    case '/':
453
      // Start of a nested comment?
454
      if (CurPtr[0] != '*') break;
455
      ++CurPtr;
456
      ++CommentDepth;
457
      break;
458
    }
459
  }
460
}
461

462
/// LexNumber - Lex:
463
///    [-+]?[0-9]+
464
///    0x[0-9a-fA-F]+
465
///    0b[01]+
466
tgtok::TokKind TGLexer::LexNumber() {
467
  unsigned Base = 0;
468
  const char *NumStart;
469

470
  // Check if it's a hex or a binary value.
471
  if (CurPtr[-1] == '0') {
472
    NumStart = CurPtr + 1;
473
    if (CurPtr[0] == 'x') {
474
      Base = 16;
475
      do
476
        ++CurPtr;
477
      while (isxdigit(CurPtr[0]));
478
    } else if (CurPtr[0] == 'b') {
479
      Base = 2;
480
      do
481
        ++CurPtr;
482
      while (CurPtr[0] == '0' || CurPtr[0] == '1');
483
    }
484
  }
485

486
  // For a hex or binary value, we always convert it to an unsigned value.
487
  bool IsMinus = false;
488

489
  // Check if it's a decimal value.
490
  if (Base == 0) {
491
    // Check for a sign without a digit.
492
    if (!isdigit(CurPtr[0])) {
493
      if (CurPtr[-1] == '-')
494
        return tgtok::minus;
495
      else if (CurPtr[-1] == '+')
496
        return tgtok::plus;
497
    }
498

499
    Base = 10;
500
    NumStart = TokStart;
501
    IsMinus = CurPtr[-1] == '-';
502

503
    while (isdigit(CurPtr[0]))
504
      ++CurPtr;
505
  }
506

507
  // Requires at least one digit.
508
  if (CurPtr == NumStart)
509
    return ReturnError(TokStart, "Invalid number");
510

511
  errno = 0;
512
  if (IsMinus)
513
    CurIntVal = strtoll(NumStart, nullptr, Base);
514
  else
515
    CurIntVal = strtoull(NumStart, nullptr, Base);
516

517
  if (errno == EINVAL)
518
    return ReturnError(TokStart, "Invalid number");
519
  if (errno == ERANGE)
520
    return ReturnError(TokStart, "Number out of range");
521

522
  return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;
523
}
524

525
/// LexBracket - We just read '['.  If this is a code block, return it,
526
/// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
527
tgtok::TokKind TGLexer::LexBracket() {
528
  if (CurPtr[0] != '{')
529
    return tgtok::l_square;
530
  ++CurPtr;
531
  const char *CodeStart = CurPtr;
532
  while (true) {
533
    int Char = getNextChar();
534
    if (Char == EOF) break;
535

536
    if (Char != '}') continue;
537

538
    Char = getNextChar();
539
    if (Char == EOF) break;
540
    if (Char == ']') {
541
      CurStrVal.assign(CodeStart, CurPtr-2);
542
      return tgtok::CodeFragment;
543
    }
544
  }
545

546
  return ReturnError(CodeStart - 2, "Unterminated code block");
547
}
548

549
/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
550
tgtok::TokKind TGLexer::LexExclaim() {
551
  if (!isalpha(*CurPtr))
552
    return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
553

554
  const char *Start = CurPtr++;
555
  while (isalpha(*CurPtr))
556
    ++CurPtr;
557

558
  // Check to see which operator this is.
559
  tgtok::TokKind Kind =
560
      StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
561
          .Case("eq", tgtok::XEq)
562
          .Case("ne", tgtok::XNe)
563
          .Case("le", tgtok::XLe)
564
          .Case("lt", tgtok::XLt)
565
          .Case("ge", tgtok::XGe)
566
          .Case("gt", tgtok::XGt)
567
          .Case("if", tgtok::XIf)
568
          .Case("cond", tgtok::XCond)
569
          .Case("isa", tgtok::XIsA)
570
          .Case("head", tgtok::XHead)
571
          .Case("tail", tgtok::XTail)
572
          .Case("size", tgtok::XSize)
573
          .Case("con", tgtok::XConcat)
574
          .Case("dag", tgtok::XDag)
575
          .Case("add", tgtok::XADD)
576
          .Case("sub", tgtok::XSUB)
577
          .Case("mul", tgtok::XMUL)
578
          .Case("div", tgtok::XDIV)
579
          .Case("not", tgtok::XNOT)
580
          .Case("logtwo", tgtok::XLOG2)
581
          .Case("and", tgtok::XAND)
582
          .Case("or", tgtok::XOR)
583
          .Case("xor", tgtok::XXOR)
584
          .Case("shl", tgtok::XSHL)
585
          .Case("sra", tgtok::XSRA)
586
          .Case("srl", tgtok::XSRL)
587
          .Case("cast", tgtok::XCast)
588
          .Case("empty", tgtok::XEmpty)
589
          .Case("subst", tgtok::XSubst)
590
          .Case("foldl", tgtok::XFoldl)
591
          .Case("foreach", tgtok::XForEach)
592
          .Case("filter", tgtok::XFilter)
593
          .Case("listconcat", tgtok::XListConcat)
594
          .Case("listsplat", tgtok::XListSplat)
595
          .Case("listremove", tgtok::XListRemove)
596
          .Case("range", tgtok::XRange)
597
          .Case("strconcat", tgtok::XStrConcat)
598
          .Case("interleave", tgtok::XInterleave)
599
          .Case("substr", tgtok::XSubstr)
600
          .Case("find", tgtok::XFind)
601
          .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
602
          .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
603
          .Case("getdagarg", tgtok::XGetDagArg)
604
          .Case("getdagname", tgtok::XGetDagName)
605
          .Case("setdagarg", tgtok::XSetDagArg)
606
          .Case("setdagname", tgtok::XSetDagName)
607
          .Case("exists", tgtok::XExists)
608
          .Case("tolower", tgtok::XToLower)
609
          .Case("toupper", tgtok::XToUpper)
610
          .Case("repr", tgtok::XRepr)
611
          .Default(tgtok::Error);
612

613
  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
614
}
615

616
bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
617
  // Report an error, if preprocessor control stack for the current
618
  // file is not empty.
619
  if (!PrepIncludeStack.back()->empty()) {
620
    prepReportPreprocessorStackError();
621

622
    return false;
623
  }
624

625
  // Pop the preprocessing controls from the include stack.
626
  if (PrepIncludeStack.empty()) {
627
    PrintFatalError("Preprocessor include stack is empty");
628
  }
629

630
  PrepIncludeStack.pop_back();
631

632
  if (IncludeStackMustBeEmpty) {
633
    if (!PrepIncludeStack.empty())
634
      PrintFatalError("Preprocessor include stack is not empty");
635
  } else {
636
    if (PrepIncludeStack.empty())
637
      PrintFatalError("Preprocessor include stack is empty");
638
  }
639

640
  return true;
641
}
642

643
tgtok::TokKind TGLexer::prepIsDirective() const {
644
  for (const auto &PD : PreprocessorDirs) {
645
    int NextChar = *CurPtr;
646
    bool Match = true;
647
    unsigned I = 0;
648
    for (; I < strlen(PD.Word); ++I) {
649
      if (NextChar != PD.Word[I]) {
650
        Match = false;
651
        break;
652
      }
653

654
      NextChar = peekNextChar(I + 1);
655
    }
656

657
    // Check for whitespace after the directive.  If there is no whitespace,
658
    // then we do not recognize it as a preprocessing directive.
659
    if (Match) {
660
      tgtok::TokKind Kind = PD.Kind;
661

662
      // New line and EOF may follow only #else/#endif.  It will be reported
663
      // as an error for #ifdef/#define after the call to prepLexMacroName().
664
      if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
665
          NextChar == '\n' ||
666
          // It looks like TableGen does not support '\r' as the actual
667
          // carriage return, e.g. getNextChar() treats a single '\r'
668
          // as '\n'.  So we do the same here.
669
          NextChar == '\r')
670
        return Kind;
671

672
      // Allow comments after some directives, e.g.:
673
      //     #else// OR #else/**/
674
      //     #endif// OR #endif/**/
675
      //
676
      // Note that we do allow comments after #ifdef/#define here, e.g.
677
      //     #ifdef/**/ AND #ifdef//
678
      //     #define/**/ AND #define//
679
      //
680
      // These cases will be reported as incorrect after calling
681
      // prepLexMacroName().  We could have supported C-style comments
682
      // after #ifdef/#define, but this would complicate the code
683
      // for little benefit.
684
      if (NextChar == '/') {
685
        NextChar = peekNextChar(I + 1);
686

687
        if (NextChar == '*' || NextChar == '/')
688
          return Kind;
689

690
        // Pretend that we do not recognize the directive.
691
      }
692
    }
693
  }
694

695
  return tgtok::Error;
696
}
697

698
bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
699
  TokStart = CurPtr;
700

701
  for (const auto &PD : PreprocessorDirs)
702
    if (PD.Kind == Kind) {
703
      // Advance CurPtr to the end of the preprocessing word.
704
      CurPtr += strlen(PD.Word);
705
      return true;
706
    }
707

708
  PrintFatalError("Unsupported preprocessing token in "
709
                  "prepEatPreprocessorDirective()");
710
  return false;
711
}
712

713
tgtok::TokKind TGLexer::lexPreprocessor(
714
    tgtok::TokKind Kind, bool ReturnNextLiveToken) {
715

716
  // We must be looking at a preprocessing directive.  Eat it!
717
  if (!prepEatPreprocessorDirective(Kind))
718
    PrintFatalError("lexPreprocessor() called for unknown "
719
                    "preprocessor directive");
720

721
  if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {
722
    StringRef MacroName = prepLexMacroName();
723
    StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
724
    if (MacroName.empty())
725
      return ReturnError(TokStart, "Expected macro name after " + IfTokName);
726

727
    bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
728

729
    // Canonicalize ifndef's MacroIsDefined to its ifdef equivalent.
730
    if (Kind == tgtok::Ifndef)
731
      MacroIsDefined = !MacroIsDefined;
732

733
    // Regardless of whether we are processing tokens or not,
734
    // we put the #ifdef control on stack.
735
    // Note that MacroIsDefined has been canonicalized against ifdef.
736
    PrepIncludeStack.back()->push_back(
737
        {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
738

739
    if (!prepSkipDirectiveEnd())
740
      return ReturnError(CurPtr, "Only comments are supported after " +
741
                                     IfTokName + " NAME");
742

743
    // If we were not processing tokens before this #ifdef,
744
    // then just return back to the lines skipping code.
745
    if (!ReturnNextLiveToken)
746
      return Kind;
747

748
    // If we were processing tokens before this #ifdef,
749
    // and the macro is defined, then just return the next token.
750
    if (MacroIsDefined)
751
      return LexToken();
752

753
    // We were processing tokens before this #ifdef, and the macro
754
    // is not defined, so we have to start skipping the lines.
755
    // If the skipping is successful, it will return the token following
756
    // either #else or #endif corresponding to this #ifdef.
757
    if (prepSkipRegion(ReturnNextLiveToken))
758
      return LexToken();
759

760
    return tgtok::Error;
761
  } else if (Kind == tgtok::Else) {
762
    // Check if this #else is correct before calling prepSkipDirectiveEnd(),
763
    // which will move CurPtr away from the beginning of #else.
764
    if (PrepIncludeStack.back()->empty())
765
      return ReturnError(TokStart, "#else without #ifdef or #ifndef");
766

767
    PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
768

769
    if (IfdefEntry.Kind != tgtok::Ifdef) {
770
      PrintError(TokStart, "double #else");
771
      return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
772
    }
773

774
    // Replace the corresponding #ifdef's control with its negation
775
    // on the control stack.
776
    PrepIncludeStack.back()->pop_back();
777
    PrepIncludeStack.back()->push_back(
778
        {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
779

780
    if (!prepSkipDirectiveEnd())
781
      return ReturnError(CurPtr, "Only comments are supported after #else");
782

783
    // If we were processing tokens before this #else,
784
    // we have to start skipping lines until the matching #endif.
785
    if (ReturnNextLiveToken) {
786
      if (prepSkipRegion(ReturnNextLiveToken))
787
        return LexToken();
788

789
      return tgtok::Error;
790
    }
791

792
    // Return to the lines skipping code.
793
    return Kind;
794
  } else if (Kind == tgtok::Endif) {
795
    // Check if this #endif is correct before calling prepSkipDirectiveEnd(),
796
    // which will move CurPtr away from the beginning of #endif.
797
    if (PrepIncludeStack.back()->empty())
798
      return ReturnError(TokStart, "#endif without #ifdef");
799

800
    auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
801

802
    if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
803
        IfdefOrElseEntry.Kind != tgtok::Else) {
804
      PrintFatalError("Invalid preprocessor control on the stack");
805
      return tgtok::Error;
806
    }
807

808
    if (!prepSkipDirectiveEnd())
809
      return ReturnError(CurPtr, "Only comments are supported after #endif");
810

811
    PrepIncludeStack.back()->pop_back();
812

813
    // If we were processing tokens before this #endif, then
814
    // we should continue it.
815
    if (ReturnNextLiveToken) {
816
      return LexToken();
817
    }
818

819
    // Return to the lines skipping code.
820
    return Kind;
821
  } else if (Kind == tgtok::Define) {
822
    StringRef MacroName = prepLexMacroName();
823
    if (MacroName.empty())
824
      return ReturnError(TokStart, "Expected macro name after #define");
825

826
    if (!DefinedMacros.insert(MacroName).second)
827
      PrintWarning(getLoc(),
828
                   "Duplicate definition of macro: " + Twine(MacroName));
829

830
    if (!prepSkipDirectiveEnd())
831
      return ReturnError(CurPtr,
832
                         "Only comments are supported after #define NAME");
833

834
    if (!ReturnNextLiveToken) {
835
      PrintFatalError("#define must be ignored during the lines skipping");
836
      return tgtok::Error;
837
    }
838

839
    return LexToken();
840
  }
841

842
  PrintFatalError("Preprocessing directive is not supported");
843
  return tgtok::Error;
844
}
845

846
bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
847
  if (!MustNeverBeFalse)
848
    PrintFatalError("Invalid recursion.");
849

850
  do {
851
    // Skip all symbols to the line end.
852
    while (*CurPtr != '\n')
853
      ++CurPtr;
854

855
    // Find the first non-whitespace symbol in the next line(s).
856
    if (!prepSkipLineBegin())
857
      return false;
858

859
    // If the first non-blank/comment symbol on the line is '#',
860
    // it may be a start of preprocessing directive.
861
    //
862
    // If it is not '#' just go to the next line.
863
    if (*CurPtr == '#')
864
      ++CurPtr;
865
    else
866
      continue;
867

868
    tgtok::TokKind Kind = prepIsDirective();
869

870
    // If we did not find a preprocessing directive or it is #define,
871
    // then just skip to the next line.  We do not have to do anything
872
    // for #define in the line-skipping mode.
873
    if (Kind == tgtok::Error || Kind == tgtok::Define)
874
      continue;
875

876
    tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
877

878
    // If lexPreprocessor() encountered an error during lexing this
879
    // preprocessor idiom, then return false to the calling lexPreprocessor().
880
    // This will force tgtok::Error to be returned to the tokens processing.
881
    if (ProcessedKind == tgtok::Error)
882
      return false;
883

884
    if (Kind != ProcessedKind)
885
      PrintFatalError("prepIsDirective() and lexPreprocessor() "
886
                      "returned different token kinds");
887

888
    // If this preprocessing directive enables tokens processing,
889
    // then return to the lexPreprocessor() and get to the next token.
890
    // We can move from line-skipping mode to processing tokens only
891
    // due to #else or #endif.
892
    if (prepIsProcessingEnabled()) {
893
      if (Kind != tgtok::Else && Kind != tgtok::Endif) {
894
        PrintFatalError("Tokens processing was enabled by an unexpected "
895
                        "preprocessing directive");
896
        return false;
897
      }
898

899
      return true;
900
    }
901
  } while (CurPtr != CurBuf.end());
902

903
  // We have reached the end of the file, but never left the lines-skipping
904
  // mode.  This means there is no matching #endif.
905
  prepReportPreprocessorStackError();
906
  return false;
907
}
908

909
StringRef TGLexer::prepLexMacroName() {
910
  // Skip whitespaces between the preprocessing directive and the macro name.
911
  while (*CurPtr == ' ' || *CurPtr == '\t')
912
    ++CurPtr;
913

914
  TokStart = CurPtr;
915
  // Macro names start with [a-zA-Z_].
916
  if (*CurPtr != '_' && !isalpha(*CurPtr))
917
    return "";
918

919
  // Match the rest of the identifier regex: [0-9a-zA-Z_]*
920
  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
921
    ++CurPtr;
922

923
  return StringRef(TokStart, CurPtr - TokStart);
924
}
925

926
bool TGLexer::prepSkipLineBegin() {
927
  while (CurPtr != CurBuf.end()) {
928
    switch (*CurPtr) {
929
    case ' ':
930
    case '\t':
931
    case '\n':
932
    case '\r':
933
      break;
934

935
    case '/': {
936
      int NextChar = peekNextChar(1);
937
      if (NextChar == '*') {
938
        // Skip C-style comment.
939
        // Note that we do not care about skipping the C++-style comments.
940
        // If the line contains "//", it may not contain any processable
941
        // preprocessing directive.  Just return CurPtr pointing to
942
        // the first '/' in this case.  We also do not care about
943
        // incorrect symbols after the first '/' - we are in lines-skipping
944
        // mode, so incorrect code is allowed to some extent.
945

946
        // Set TokStart to the beginning of the comment to enable proper
947
        // diagnostic printing in case of error in SkipCComment().
948
        TokStart = CurPtr;
949

950
        // CurPtr must point to '*' before call to SkipCComment().
951
        ++CurPtr;
952
        if (SkipCComment())
953
          return false;
954
      } else {
955
        // CurPtr points to the non-whitespace '/'.
956
        return true;
957
      }
958

959
      // We must not increment CurPtr after the comment was lexed.
960
      continue;
961
    }
962

963
    default:
964
      return true;
965
    }
966

967
    ++CurPtr;
968
  }
969

970
  // We have reached the end of the file.  Return to the lines skipping
971
  // code, and allow it to handle the EOF as needed.
972
  return true;
973
}
974

975
bool TGLexer::prepSkipDirectiveEnd() {
976
  while (CurPtr != CurBuf.end()) {
977
    switch (*CurPtr) {
978
    case ' ':
979
    case '\t':
980
      break;
981

982
    case '\n':
983
    case '\r':
984
      return true;
985

986
    case '/': {
987
      int NextChar = peekNextChar(1);
988
      if (NextChar == '/') {
989
        // Skip C++-style comment.
990
        // We may just return true now, but let's skip to the line/buffer end
991
        // to simplify the method specification.
992
        ++CurPtr;
993
        SkipBCPLComment();
994
      } else if (NextChar == '*') {
995
        // When we are skipping C-style comment at the end of a preprocessing
996
        // directive, we can skip several lines.  If any meaningful TD token
997
        // follows the end of the C-style comment on the same line, it will
998
        // be considered as an invalid usage of TD token.
999
        // For example, we want to forbid usages like this one:
1000
        //     #define MACRO class Class {}
1001
        // But with C-style comments we also disallow the following:
1002
        //     #define MACRO /* This macro is used
1003
        //                      to ... */ class Class {}
1004
        // One can argue that this should be allowed, but it does not seem
1005
        // to be worth of the complication.  Moreover, this matches
1006
        // the C preprocessor behavior.
1007

1008
        // Set TokStart to the beginning of the comment to enable proper
1009
        // diagnostic printer in case of error in SkipCComment().
1010
        TokStart = CurPtr;
1011
        ++CurPtr;
1012
        if (SkipCComment())
1013
          return false;
1014
      } else {
1015
        TokStart = CurPtr;
1016
        PrintError(CurPtr, "Unexpected character");
1017
        return false;
1018
      }
1019

1020
      // We must not increment CurPtr after the comment was lexed.
1021
      continue;
1022
    }
1023

1024
    default:
1025
      // Do not allow any non-whitespaces after the directive.
1026
      TokStart = CurPtr;
1027
      return false;
1028
    }
1029

1030
    ++CurPtr;
1031
  }
1032

1033
  return true;
1034
}
1035

1036
bool TGLexer::prepIsProcessingEnabled() {
1037
  for (const PreprocessorControlDesc &I :
1038
       llvm::reverse(*PrepIncludeStack.back()))
1039
    if (!I.IsDefined)
1040
      return false;
1041

1042
  return true;
1043
}
1044

1045
void TGLexer::prepReportPreprocessorStackError() {
1046
  if (PrepIncludeStack.back()->empty())
1047
    PrintFatalError("prepReportPreprocessorStackError() called with "
1048
                    "empty control stack");
1049

1050
  auto &PrepControl = PrepIncludeStack.back()->back();
1051
  PrintError(CurBuf.end(), "Reached EOF without matching #endif");
1052
  PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
1053

1054
  TokStart = CurPtr;
1055
}
1056

1057
Product

Resources

Company