Path: blob/main/contrib/llvm-project/llvm/lib/TableGen/TGLexer.cpp
35233 views
//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Implement the Lexer for TableGen.9//10//===----------------------------------------------------------------------===//1112#include "TGLexer.h"13#include "llvm/ADT/ArrayRef.h"14#include "llvm/ADT/StringSwitch.h"15#include "llvm/ADT/Twine.h"16#include "llvm/Config/config.h" // for strtoull()/strtoll() define17#include "llvm/Support/Compiler.h"18#include "llvm/Support/MemoryBuffer.h"19#include "llvm/Support/SourceMgr.h"20#include "llvm/TableGen/Error.h"21#include <algorithm>22#include <cctype>23#include <cerrno>24#include <cstdint>25#include <cstdio>26#include <cstdlib>27#include <cstring>2829using namespace llvm;3031namespace {32// A list of supported preprocessing directives with their33// internal token kinds and names.34struct {35tgtok::TokKind Kind;36const char *Word;37} PreprocessorDirs[] = {38{ tgtok::Ifdef, "ifdef" },39{ tgtok::Ifndef, "ifndef" },40{ tgtok::Else, "else" },41{ tgtok::Endif, "endif" },42{ tgtok::Define, "define" }43};44} // end anonymous namespace4546TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {47CurBuffer = SrcMgr.getMainFileID();48CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();49CurPtr = CurBuf.begin();50TokStart = nullptr;5152// Pretend that we enter the "top-level" include file.53PrepIncludeStack.push_back(54std::make_unique<std::vector<PreprocessorControlDesc>>());5556// Put all macros defined in the command line into the DefinedMacros set.57for (const std::string &MacroName : Macros)58DefinedMacros.insert(MacroName);59}6061SMLoc TGLexer::getLoc() const {62return SMLoc::getFromPointer(TokStart);63}6465SMRange TGLexer::getLocRange() const {66return {getLoc(), SMLoc::getFromPointer(CurPtr)};67}6869/// ReturnError - Set the error to the specified string at the specified70/// location. This is defined to always return tgtok::Error.71tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {72PrintError(Loc, Msg);73return tgtok::Error;74}7576tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {77return ReturnError(SMLoc::getFromPointer(Loc), Msg);78}7980bool TGLexer::processEOF() {81SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);82if (ParentIncludeLoc != SMLoc()) {83// If prepExitInclude() detects a problem with the preprocessing84// control stack, it will return false. Pretend that we reached85// the final EOF and stop lexing more tokens by returning false86// to LexToken().87if (!prepExitInclude(false))88return false;8990CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);91CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();92CurPtr = ParentIncludeLoc.getPointer();93// Make sure TokStart points into the parent file's buffer.94// LexToken() assigns to it before calling getNextChar(),95// so it is pointing into the included file now.96TokStart = CurPtr;97return true;98}99100// Pretend that we exit the "top-level" include file.101// Note that in case of an error (e.g. control stack imbalance)102// the routine will issue a fatal error.103prepExitInclude(true);104return false;105}106107int TGLexer::getNextChar() {108char CurChar = *CurPtr++;109switch (CurChar) {110default:111return (unsigned char)CurChar;112113case 0: {114// A NUL character in the stream is either the end of the current buffer or115// a spurious NUL in the file. Disambiguate that here.116if (CurPtr - 1 == CurBuf.end()) {117--CurPtr; // Arrange for another call to return EOF again.118return EOF;119}120PrintError(getLoc(),121"NUL character is invalid in source; treated as space");122return ' ';123}124125case '\n':126case '\r':127// Handle the newline character by ignoring it and incrementing the line128// count. However, be careful about 'dos style' files with \n\r in them.129// Only treat a \n\r or \r\n as a single line.130if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&131*CurPtr != CurChar)132++CurPtr; // Eat the two char newline sequence.133return '\n';134}135}136137int TGLexer::peekNextChar(int Index) const {138return *(CurPtr + Index);139}140141tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {142TokStart = CurPtr;143// This always consumes at least one character.144int CurChar = getNextChar();145146switch (CurChar) {147default:148// Handle letters: [a-zA-Z_]149if (isalpha(CurChar) || CurChar == '_')150return LexIdentifier();151152// Unknown character, emit an error.153return ReturnError(TokStart, "Unexpected character");154case EOF:155// Lex next token, if we just left an include file.156// Note that leaving an include file means that the next157// symbol is located at the end of the 'include "..."'158// construct, so LexToken() is called with default159// false parameter.160if (processEOF())161return LexToken();162163// Return EOF denoting the end of lexing.164return tgtok::Eof;165166case ':': return tgtok::colon;167case ';': return tgtok::semi;168case ',': return tgtok::comma;169case '<': return tgtok::less;170case '>': return tgtok::greater;171case ']': return tgtok::r_square;172case '{': return tgtok::l_brace;173case '}': return tgtok::r_brace;174case '(': return tgtok::l_paren;175case ')': return tgtok::r_paren;176case '=': return tgtok::equal;177case '?': return tgtok::question;178case '#':179if (FileOrLineStart) {180tgtok::TokKind Kind = prepIsDirective();181if (Kind != tgtok::Error)182return lexPreprocessor(Kind);183}184185return tgtok::paste;186187// The period is a separate case so we can recognize the "..."188// range punctuator.189case '.':190if (peekNextChar(0) == '.') {191++CurPtr; // Eat second dot.192if (peekNextChar(0) == '.') {193++CurPtr; // Eat third dot.194return tgtok::dotdotdot;195}196return ReturnError(TokStart, "Invalid '..' punctuation");197}198return tgtok::dot;199200case '\r':201PrintFatalError("getNextChar() must never return '\r'");202return tgtok::Error;203204case ' ':205case '\t':206// Ignore whitespace.207return LexToken(FileOrLineStart);208case '\n':209// Ignore whitespace, and identify the new line.210return LexToken(true);211case '/':212// If this is the start of a // comment, skip until the end of the line or213// the end of the buffer.214if (*CurPtr == '/')215SkipBCPLComment();216else if (*CurPtr == '*') {217if (SkipCComment())218return tgtok::Error;219} else // Otherwise, this is an error.220return ReturnError(TokStart, "Unexpected character");221return LexToken(FileOrLineStart);222case '-': case '+':223case '0': case '1': case '2': case '3': case '4': case '5': case '6':224case '7': case '8': case '9': {225int NextChar = 0;226if (isdigit(CurChar)) {227// Allow identifiers to start with a number if it is followed by228// an identifier. This can happen with paste operations like229// foo#8i.230int i = 0;231do {232NextChar = peekNextChar(i++);233} while (isdigit(NextChar));234235if (NextChar == 'x' || NextChar == 'b') {236// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most237// likely a number.238int NextNextChar = peekNextChar(i);239switch (NextNextChar) {240default:241break;242case '0': case '1':243if (NextChar == 'b')244return LexNumber();245[[fallthrough]];246case '2': case '3': case '4': case '5':247case '6': case '7': case '8': case '9':248case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':249case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':250if (NextChar == 'x')251return LexNumber();252break;253}254}255}256257if (isalpha(NextChar) || NextChar == '_')258return LexIdentifier();259260return LexNumber();261}262case '"': return LexString();263case '$': return LexVarName();264case '[': return LexBracket();265case '!': return LexExclaim();266}267}268269/// LexString - Lex "[^"]*"270tgtok::TokKind TGLexer::LexString() {271const char *StrStart = CurPtr;272273CurStrVal = "";274275while (*CurPtr != '"') {276// If we hit the end of the buffer, report an error.277if (*CurPtr == 0 && CurPtr == CurBuf.end())278return ReturnError(StrStart, "End of file in string literal");279280if (*CurPtr == '\n' || *CurPtr == '\r')281return ReturnError(StrStart, "End of line in string literal");282283if (*CurPtr != '\\') {284CurStrVal += *CurPtr++;285continue;286}287288++CurPtr;289290switch (*CurPtr) {291case '\\': case '\'': case '"':292// These turn into their literal character.293CurStrVal += *CurPtr++;294break;295case 't':296CurStrVal += '\t';297++CurPtr;298break;299case 'n':300CurStrVal += '\n';301++CurPtr;302break;303304case '\n':305case '\r':306return ReturnError(CurPtr, "escaped newlines not supported in tblgen");307308// If we hit the end of the buffer, report an error.309case '\0':310if (CurPtr == CurBuf.end())311return ReturnError(StrStart, "End of file in string literal");312[[fallthrough]];313default:314return ReturnError(CurPtr, "invalid escape in string literal");315}316}317318++CurPtr;319return tgtok::StrVal;320}321322tgtok::TokKind TGLexer::LexVarName() {323if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')324return ReturnError(TokStart, "Invalid variable name");325326// Otherwise, we're ok, consume the rest of the characters.327const char *VarNameStart = CurPtr++;328329while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')330++CurPtr;331332CurStrVal.assign(VarNameStart, CurPtr);333return tgtok::VarName;334}335336tgtok::TokKind TGLexer::LexIdentifier() {337// The first letter is [a-zA-Z_].338const char *IdentStart = TokStart;339340// Match the rest of the identifier regex: [0-9a-zA-Z_]*341while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')342++CurPtr;343344// Check to see if this identifier is a reserved keyword.345StringRef Str(IdentStart, CurPtr-IdentStart);346347tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)348.Case("int", tgtok::Int)349.Case("bit", tgtok::Bit)350.Case("bits", tgtok::Bits)351.Case("string", tgtok::String)352.Case("list", tgtok::List)353.Case("code", tgtok::Code)354.Case("dag", tgtok::Dag)355.Case("class", tgtok::Class)356.Case("def", tgtok::Def)357.Case("true", tgtok::TrueVal)358.Case("false", tgtok::FalseVal)359.Case("foreach", tgtok::Foreach)360.Case("defm", tgtok::Defm)361.Case("defset", tgtok::Defset)362.Case("deftype", tgtok::Deftype)363.Case("multiclass", tgtok::MultiClass)364.Case("field", tgtok::Field)365.Case("let", tgtok::Let)366.Case("in", tgtok::In)367.Case("defvar", tgtok::Defvar)368.Case("include", tgtok::Include)369.Case("if", tgtok::If)370.Case("then", tgtok::Then)371.Case("else", tgtok::ElseKW)372.Case("assert", tgtok::Assert)373.Case("dump", tgtok::Dump)374.Default(tgtok::Id);375376// A couple of tokens require special processing.377switch (Kind) {378case tgtok::Include:379if (LexInclude()) return tgtok::Error;380return Lex();381case tgtok::Id:382CurStrVal.assign(Str.begin(), Str.end());383break;384default:385break;386}387388return Kind;389}390391/// LexInclude - We just read the "include" token. Get the string token that392/// comes next and enter the include.393bool TGLexer::LexInclude() {394// The token after the include must be a string.395tgtok::TokKind Tok = LexToken();396if (Tok == tgtok::Error) return true;397if (Tok != tgtok::StrVal) {398PrintError(getLoc(), "Expected filename after include");399return true;400}401402// Get the string.403std::string Filename = CurStrVal;404std::string IncludedFile;405406CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),407IncludedFile);408if (!CurBuffer) {409PrintError(getLoc(), "Could not find include file '" + Filename + "'");410return true;411}412413Dependencies.insert(IncludedFile);414// Save the line number and lex buffer of the includer.415CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();416CurPtr = CurBuf.begin();417418PrepIncludeStack.push_back(419std::make_unique<std::vector<PreprocessorControlDesc>>());420return false;421}422423/// SkipBCPLComment - Skip over the comment by finding the next CR or LF.424/// Or we may end up at the end of the buffer.425void TGLexer::SkipBCPLComment() {426++CurPtr; // skip the second slash.427auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());428CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;429}430431/// SkipCComment - This skips C-style /**/ comments. The only difference from C432/// is that we allow nesting.433bool TGLexer::SkipCComment() {434++CurPtr; // skip the star.435unsigned CommentDepth = 1;436437while (true) {438int CurChar = getNextChar();439switch (CurChar) {440case EOF:441PrintError(TokStart, "Unterminated comment!");442return true;443case '*':444// End of the comment?445if (CurPtr[0] != '/') break;446447++CurPtr; // End the */.448if (--CommentDepth == 0)449return false;450break;451case '/':452// Start of a nested comment?453if (CurPtr[0] != '*') break;454++CurPtr;455++CommentDepth;456break;457}458}459}460461/// LexNumber - Lex:462/// [-+]?[0-9]+463/// 0x[0-9a-fA-F]+464/// 0b[01]+465tgtok::TokKind TGLexer::LexNumber() {466unsigned Base = 0;467const char *NumStart;468469// Check if it's a hex or a binary value.470if (CurPtr[-1] == '0') {471NumStart = CurPtr + 1;472if (CurPtr[0] == 'x') {473Base = 16;474do475++CurPtr;476while (isxdigit(CurPtr[0]));477} else if (CurPtr[0] == 'b') {478Base = 2;479do480++CurPtr;481while (CurPtr[0] == '0' || CurPtr[0] == '1');482}483}484485// For a hex or binary value, we always convert it to an unsigned value.486bool IsMinus = false;487488// Check if it's a decimal value.489if (Base == 0) {490// Check for a sign without a digit.491if (!isdigit(CurPtr[0])) {492if (CurPtr[-1] == '-')493return tgtok::minus;494else if (CurPtr[-1] == '+')495return tgtok::plus;496}497498Base = 10;499NumStart = TokStart;500IsMinus = CurPtr[-1] == '-';501502while (isdigit(CurPtr[0]))503++CurPtr;504}505506// Requires at least one digit.507if (CurPtr == NumStart)508return ReturnError(TokStart, "Invalid number");509510errno = 0;511if (IsMinus)512CurIntVal = strtoll(NumStart, nullptr, Base);513else514CurIntVal = strtoull(NumStart, nullptr, Base);515516if (errno == EINVAL)517return ReturnError(TokStart, "Invalid number");518if (errno == ERANGE)519return ReturnError(TokStart, "Number out of range");520521return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;522}523524/// LexBracket - We just read '['. If this is a code block, return it,525/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'526tgtok::TokKind TGLexer::LexBracket() {527if (CurPtr[0] != '{')528return tgtok::l_square;529++CurPtr;530const char *CodeStart = CurPtr;531while (true) {532int Char = getNextChar();533if (Char == EOF) break;534535if (Char != '}') continue;536537Char = getNextChar();538if (Char == EOF) break;539if (Char == ']') {540CurStrVal.assign(CodeStart, CurPtr-2);541return tgtok::CodeFragment;542}543}544545return ReturnError(CodeStart - 2, "Unterminated code block");546}547548/// LexExclaim - Lex '!' and '![a-zA-Z]+'.549tgtok::TokKind TGLexer::LexExclaim() {550if (!isalpha(*CurPtr))551return ReturnError(CurPtr - 1, "Invalid \"!operator\"");552553const char *Start = CurPtr++;554while (isalpha(*CurPtr))555++CurPtr;556557// Check to see which operator this is.558tgtok::TokKind Kind =559StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))560.Case("eq", tgtok::XEq)561.Case("ne", tgtok::XNe)562.Case("le", tgtok::XLe)563.Case("lt", tgtok::XLt)564.Case("ge", tgtok::XGe)565.Case("gt", tgtok::XGt)566.Case("if", tgtok::XIf)567.Case("cond", tgtok::XCond)568.Case("isa", tgtok::XIsA)569.Case("head", tgtok::XHead)570.Case("tail", tgtok::XTail)571.Case("size", tgtok::XSize)572.Case("con", tgtok::XConcat)573.Case("dag", tgtok::XDag)574.Case("add", tgtok::XADD)575.Case("sub", tgtok::XSUB)576.Case("mul", tgtok::XMUL)577.Case("div", tgtok::XDIV)578.Case("not", tgtok::XNOT)579.Case("logtwo", tgtok::XLOG2)580.Case("and", tgtok::XAND)581.Case("or", tgtok::XOR)582.Case("xor", tgtok::XXOR)583.Case("shl", tgtok::XSHL)584.Case("sra", tgtok::XSRA)585.Case("srl", tgtok::XSRL)586.Case("cast", tgtok::XCast)587.Case("empty", tgtok::XEmpty)588.Case("subst", tgtok::XSubst)589.Case("foldl", tgtok::XFoldl)590.Case("foreach", tgtok::XForEach)591.Case("filter", tgtok::XFilter)592.Case("listconcat", tgtok::XListConcat)593.Case("listsplat", tgtok::XListSplat)594.Case("listremove", tgtok::XListRemove)595.Case("range", tgtok::XRange)596.Case("strconcat", tgtok::XStrConcat)597.Case("interleave", tgtok::XInterleave)598.Case("substr", tgtok::XSubstr)599.Case("find", tgtok::XFind)600.Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.601.Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.602.Case("getdagarg", tgtok::XGetDagArg)603.Case("getdagname", tgtok::XGetDagName)604.Case("setdagarg", tgtok::XSetDagArg)605.Case("setdagname", tgtok::XSetDagName)606.Case("exists", tgtok::XExists)607.Case("tolower", tgtok::XToLower)608.Case("toupper", tgtok::XToUpper)609.Case("repr", tgtok::XRepr)610.Default(tgtok::Error);611612return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");613}614615bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {616// Report an error, if preprocessor control stack for the current617// file is not empty.618if (!PrepIncludeStack.back()->empty()) {619prepReportPreprocessorStackError();620621return false;622}623624// Pop the preprocessing controls from the include stack.625if (PrepIncludeStack.empty()) {626PrintFatalError("Preprocessor include stack is empty");627}628629PrepIncludeStack.pop_back();630631if (IncludeStackMustBeEmpty) {632if (!PrepIncludeStack.empty())633PrintFatalError("Preprocessor include stack is not empty");634} else {635if (PrepIncludeStack.empty())636PrintFatalError("Preprocessor include stack is empty");637}638639return true;640}641642tgtok::TokKind TGLexer::prepIsDirective() const {643for (const auto &PD : PreprocessorDirs) {644int NextChar = *CurPtr;645bool Match = true;646unsigned I = 0;647for (; I < strlen(PD.Word); ++I) {648if (NextChar != PD.Word[I]) {649Match = false;650break;651}652653NextChar = peekNextChar(I + 1);654}655656// Check for whitespace after the directive. If there is no whitespace,657// then we do not recognize it as a preprocessing directive.658if (Match) {659tgtok::TokKind Kind = PD.Kind;660661// New line and EOF may follow only #else/#endif. It will be reported662// as an error for #ifdef/#define after the call to prepLexMacroName().663if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||664NextChar == '\n' ||665// It looks like TableGen does not support '\r' as the actual666// carriage return, e.g. getNextChar() treats a single '\r'667// as '\n'. So we do the same here.668NextChar == '\r')669return Kind;670671// Allow comments after some directives, e.g.:672// #else// OR #else/**/673// #endif// OR #endif/**/674//675// Note that we do allow comments after #ifdef/#define here, e.g.676// #ifdef/**/ AND #ifdef//677// #define/**/ AND #define//678//679// These cases will be reported as incorrect after calling680// prepLexMacroName(). We could have supported C-style comments681// after #ifdef/#define, but this would complicate the code682// for little benefit.683if (NextChar == '/') {684NextChar = peekNextChar(I + 1);685686if (NextChar == '*' || NextChar == '/')687return Kind;688689// Pretend that we do not recognize the directive.690}691}692}693694return tgtok::Error;695}696697bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {698TokStart = CurPtr;699700for (const auto &PD : PreprocessorDirs)701if (PD.Kind == Kind) {702// Advance CurPtr to the end of the preprocessing word.703CurPtr += strlen(PD.Word);704return true;705}706707PrintFatalError("Unsupported preprocessing token in "708"prepEatPreprocessorDirective()");709return false;710}711712tgtok::TokKind TGLexer::lexPreprocessor(713tgtok::TokKind Kind, bool ReturnNextLiveToken) {714715// We must be looking at a preprocessing directive. Eat it!716if (!prepEatPreprocessorDirective(Kind))717PrintFatalError("lexPreprocessor() called for unknown "718"preprocessor directive");719720if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) {721StringRef MacroName = prepLexMacroName();722StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";723if (MacroName.empty())724return ReturnError(TokStart, "Expected macro name after " + IfTokName);725726bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;727728// Canonicalize ifndef's MacroIsDefined to its ifdef equivalent.729if (Kind == tgtok::Ifndef)730MacroIsDefined = !MacroIsDefined;731732// Regardless of whether we are processing tokens or not,733// we put the #ifdef control on stack.734// Note that MacroIsDefined has been canonicalized against ifdef.735PrepIncludeStack.back()->push_back(736{tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});737738if (!prepSkipDirectiveEnd())739return ReturnError(CurPtr, "Only comments are supported after " +740IfTokName + " NAME");741742// If we were not processing tokens before this #ifdef,743// then just return back to the lines skipping code.744if (!ReturnNextLiveToken)745return Kind;746747// If we were processing tokens before this #ifdef,748// and the macro is defined, then just return the next token.749if (MacroIsDefined)750return LexToken();751752// We were processing tokens before this #ifdef, and the macro753// is not defined, so we have to start skipping the lines.754// If the skipping is successful, it will return the token following755// either #else or #endif corresponding to this #ifdef.756if (prepSkipRegion(ReturnNextLiveToken))757return LexToken();758759return tgtok::Error;760} else if (Kind == tgtok::Else) {761// Check if this #else is correct before calling prepSkipDirectiveEnd(),762// which will move CurPtr away from the beginning of #else.763if (PrepIncludeStack.back()->empty())764return ReturnError(TokStart, "#else without #ifdef or #ifndef");765766PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();767768if (IfdefEntry.Kind != tgtok::Ifdef) {769PrintError(TokStart, "double #else");770return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");771}772773// Replace the corresponding #ifdef's control with its negation774// on the control stack.775PrepIncludeStack.back()->pop_back();776PrepIncludeStack.back()->push_back(777{Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});778779if (!prepSkipDirectiveEnd())780return ReturnError(CurPtr, "Only comments are supported after #else");781782// If we were processing tokens before this #else,783// we have to start skipping lines until the matching #endif.784if (ReturnNextLiveToken) {785if (prepSkipRegion(ReturnNextLiveToken))786return LexToken();787788return tgtok::Error;789}790791// Return to the lines skipping code.792return Kind;793} else if (Kind == tgtok::Endif) {794// Check if this #endif is correct before calling prepSkipDirectiveEnd(),795// which will move CurPtr away from the beginning of #endif.796if (PrepIncludeStack.back()->empty())797return ReturnError(TokStart, "#endif without #ifdef");798799auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();800801if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&802IfdefOrElseEntry.Kind != tgtok::Else) {803PrintFatalError("Invalid preprocessor control on the stack");804return tgtok::Error;805}806807if (!prepSkipDirectiveEnd())808return ReturnError(CurPtr, "Only comments are supported after #endif");809810PrepIncludeStack.back()->pop_back();811812// If we were processing tokens before this #endif, then813// we should continue it.814if (ReturnNextLiveToken) {815return LexToken();816}817818// Return to the lines skipping code.819return Kind;820} else if (Kind == tgtok::Define) {821StringRef MacroName = prepLexMacroName();822if (MacroName.empty())823return ReturnError(TokStart, "Expected macro name after #define");824825if (!DefinedMacros.insert(MacroName).second)826PrintWarning(getLoc(),827"Duplicate definition of macro: " + Twine(MacroName));828829if (!prepSkipDirectiveEnd())830return ReturnError(CurPtr,831"Only comments are supported after #define NAME");832833if (!ReturnNextLiveToken) {834PrintFatalError("#define must be ignored during the lines skipping");835return tgtok::Error;836}837838return LexToken();839}840841PrintFatalError("Preprocessing directive is not supported");842return tgtok::Error;843}844845bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {846if (!MustNeverBeFalse)847PrintFatalError("Invalid recursion.");848849do {850// Skip all symbols to the line end.851while (*CurPtr != '\n')852++CurPtr;853854// Find the first non-whitespace symbol in the next line(s).855if (!prepSkipLineBegin())856return false;857858// If the first non-blank/comment symbol on the line is '#',859// it may be a start of preprocessing directive.860//861// If it is not '#' just go to the next line.862if (*CurPtr == '#')863++CurPtr;864else865continue;866867tgtok::TokKind Kind = prepIsDirective();868869// If we did not find a preprocessing directive or it is #define,870// then just skip to the next line. We do not have to do anything871// for #define in the line-skipping mode.872if (Kind == tgtok::Error || Kind == tgtok::Define)873continue;874875tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);876877// If lexPreprocessor() encountered an error during lexing this878// preprocessor idiom, then return false to the calling lexPreprocessor().879// This will force tgtok::Error to be returned to the tokens processing.880if (ProcessedKind == tgtok::Error)881return false;882883if (Kind != ProcessedKind)884PrintFatalError("prepIsDirective() and lexPreprocessor() "885"returned different token kinds");886887// If this preprocessing directive enables tokens processing,888// then return to the lexPreprocessor() and get to the next token.889// We can move from line-skipping mode to processing tokens only890// due to #else or #endif.891if (prepIsProcessingEnabled()) {892if (Kind != tgtok::Else && Kind != tgtok::Endif) {893PrintFatalError("Tokens processing was enabled by an unexpected "894"preprocessing directive");895return false;896}897898return true;899}900} while (CurPtr != CurBuf.end());901902// We have reached the end of the file, but never left the lines-skipping903// mode. This means there is no matching #endif.904prepReportPreprocessorStackError();905return false;906}907908StringRef TGLexer::prepLexMacroName() {909// Skip whitespaces between the preprocessing directive and the macro name.910while (*CurPtr == ' ' || *CurPtr == '\t')911++CurPtr;912913TokStart = CurPtr;914// Macro names start with [a-zA-Z_].915if (*CurPtr != '_' && !isalpha(*CurPtr))916return "";917918// Match the rest of the identifier regex: [0-9a-zA-Z_]*919while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')920++CurPtr;921922return StringRef(TokStart, CurPtr - TokStart);923}924925bool TGLexer::prepSkipLineBegin() {926while (CurPtr != CurBuf.end()) {927switch (*CurPtr) {928case ' ':929case '\t':930case '\n':931case '\r':932break;933934case '/': {935int NextChar = peekNextChar(1);936if (NextChar == '*') {937// Skip C-style comment.938// Note that we do not care about skipping the C++-style comments.939// If the line contains "//", it may not contain any processable940// preprocessing directive. Just return CurPtr pointing to941// the first '/' in this case. We also do not care about942// incorrect symbols after the first '/' - we are in lines-skipping943// mode, so incorrect code is allowed to some extent.944945// Set TokStart to the beginning of the comment to enable proper946// diagnostic printing in case of error in SkipCComment().947TokStart = CurPtr;948949// CurPtr must point to '*' before call to SkipCComment().950++CurPtr;951if (SkipCComment())952return false;953} else {954// CurPtr points to the non-whitespace '/'.955return true;956}957958// We must not increment CurPtr after the comment was lexed.959continue;960}961962default:963return true;964}965966++CurPtr;967}968969// We have reached the end of the file. Return to the lines skipping970// code, and allow it to handle the EOF as needed.971return true;972}973974bool TGLexer::prepSkipDirectiveEnd() {975while (CurPtr != CurBuf.end()) {976switch (*CurPtr) {977case ' ':978case '\t':979break;980981case '\n':982case '\r':983return true;984985case '/': {986int NextChar = peekNextChar(1);987if (NextChar == '/') {988// Skip C++-style comment.989// We may just return true now, but let's skip to the line/buffer end990// to simplify the method specification.991++CurPtr;992SkipBCPLComment();993} else if (NextChar == '*') {994// When we are skipping C-style comment at the end of a preprocessing995// directive, we can skip several lines. If any meaningful TD token996// follows the end of the C-style comment on the same line, it will997// be considered as an invalid usage of TD token.998// For example, we want to forbid usages like this one:999// #define MACRO class Class {}1000// But with C-style comments we also disallow the following:1001// #define MACRO /* This macro is used1002// to ... */ class Class {}1003// One can argue that this should be allowed, but it does not seem1004// to be worth of the complication. Moreover, this matches1005// the C preprocessor behavior.10061007// Set TokStart to the beginning of the comment to enable proper1008// diagnostic printer in case of error in SkipCComment().1009TokStart = CurPtr;1010++CurPtr;1011if (SkipCComment())1012return false;1013} else {1014TokStart = CurPtr;1015PrintError(CurPtr, "Unexpected character");1016return false;1017}10181019// We must not increment CurPtr after the comment was lexed.1020continue;1021}10221023default:1024// Do not allow any non-whitespaces after the directive.1025TokStart = CurPtr;1026return false;1027}10281029++CurPtr;1030}10311032return true;1033}10341035bool TGLexer::prepIsProcessingEnabled() {1036for (const PreprocessorControlDesc &I :1037llvm::reverse(*PrepIncludeStack.back()))1038if (!I.IsDefined)1039return false;10401041return true;1042}10431044void TGLexer::prepReportPreprocessorStackError() {1045if (PrepIncludeStack.back()->empty())1046PrintFatalError("prepReportPreprocessorStackError() called with "1047"empty control stack");10481049auto &PrepControl = PrepIncludeStack.back()->back();1050PrintError(CurBuf.end(), "Reached EOF without matching #endif");1051PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");10521053TokStart = CurPtr;1054}105510561057