Path: blob/main/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h
35233 views
//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This class represents the Lexer for tablegen files.9//10//===----------------------------------------------------------------------===//1112#ifndef LLVM_LIB_TABLEGEN_TGLEXER_H13#define LLVM_LIB_TABLEGEN_TGLEXER_H1415#include "llvm/ADT/StringRef.h"16#include "llvm/ADT/StringSet.h"17#include "llvm/Support/DataTypes.h"18#include "llvm/Support/SMLoc.h"19#include <cassert>20#include <memory>21#include <set>22#include <string>23#include <vector>2425namespace llvm {26template <typename T> class ArrayRef;27class SourceMgr;28class Twine;2930namespace tgtok {31enum TokKind {32// Markers33Eof,34Error,3536// Tokens with no info.37minus, // -38plus, // +39l_square, // [40r_square, // ]41l_brace, // {42r_brace, // }43l_paren, // (44r_paren, // )45less, // <46greater, // >47colon, // :48semi, // ;49comma, // ,50dot, // .51equal, // =52question, // ?53paste, // #54dotdotdot, // ...5556// Boolean literals.57TrueVal,58FalseVal,5960// Integer value.61IntVal,6263// Binary constant. Note that these are sized according to the number of64// bits given.65BinaryIntVal,6667// Preprocessing tokens for internal usage by the lexer.68// They are never returned as a result of Lex().69Ifdef,70Ifndef,71Else,72Endif,73Define,7475// Reserved keywords. ('ElseKW' is named to distinguish it from the76// existing 'Else' that means the preprocessor #else.)77Bit,78Bits,79Code,80Dag,81ElseKW,82FalseKW,83Field,84In,85Include,86Int,87List,88String,89Then,90TrueKW,9192// Object start tokens.93OBJECT_START_FIRST,94Assert = OBJECT_START_FIRST,95Class,96Def,97Defm,98Defset,99Deftype,100Defvar,101Dump,102Foreach,103If,104Let,105MultiClass,106OBJECT_START_LAST = MultiClass,107108// Bang operators.109BANG_OPERATOR_FIRST,110XConcat = BANG_OPERATOR_FIRST,111XADD,112XSUB,113XMUL,114XDIV,115XNOT,116XLOG2,117XAND,118XOR,119XXOR,120XSRA,121XSRL,122XSHL,123XListConcat,124XListSplat,125XStrConcat,126XInterleave,127XSubstr,128XFind,129XCast,130XSubst,131XForEach,132XFilter,133XFoldl,134XHead,135XTail,136XSize,137XEmpty,138XIf,139XCond,140XEq,141XIsA,142XDag,143XNe,144XLe,145XLt,146XGe,147XGt,148XSetDagOp,149XGetDagOp,150XExists,151XListRemove,152XToLower,153XToUpper,154XRange,155XGetDagArg,156XGetDagName,157XSetDagArg,158XSetDagName,159XRepr,160BANG_OPERATOR_LAST = XRepr,161162// String valued tokens.163STRING_VALUE_FIRST,164Id = STRING_VALUE_FIRST,165StrVal,166VarName,167CodeFragment,168STRING_VALUE_LAST = CodeFragment,169};170171/// isBangOperator - Return true if this is a bang operator.172static inline bool isBangOperator(tgtok::TokKind Kind) {173return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;174}175176/// isObjectStart - Return true if this is a valid first token for a statement.177static inline bool isObjectStart(tgtok::TokKind Kind) {178return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;179}180181/// isStringValue - Return true if this is a string value.182static inline bool isStringValue(tgtok::TokKind Kind) {183return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;184}185} // namespace tgtok186187/// TGLexer - TableGen Lexer class.188class TGLexer {189SourceMgr &SrcMgr;190191const char *CurPtr = nullptr;192StringRef CurBuf;193194// Information about the current token.195const char *TokStart = nullptr;196tgtok::TokKind CurCode = tgtok::TokKind::Eof;197std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment198int64_t CurIntVal = 0; // This is valid for IntVal.199200/// CurBuffer - This is the current buffer index we're lexing from as managed201/// by the SourceMgr object.202unsigned CurBuffer = 0;203204public:205typedef std::set<std::string> DependenciesSetTy;206207private:208/// Dependencies - This is the list of all included files.209DependenciesSetTy Dependencies;210211public:212TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);213214tgtok::TokKind Lex() {215return CurCode = LexToken(CurPtr == CurBuf.begin());216}217218const DependenciesSetTy &getDependencies() const {219return Dependencies;220}221222tgtok::TokKind getCode() const { return CurCode; }223224const std::string &getCurStrVal() const {225assert(tgtok::isStringValue(CurCode) &&226"This token doesn't have a string value");227return CurStrVal;228}229int64_t getCurIntVal() const {230assert(CurCode == tgtok::IntVal && "This token isn't an integer");231return CurIntVal;232}233std::pair<int64_t, unsigned> getCurBinaryIntVal() const {234assert(CurCode == tgtok::BinaryIntVal &&235"This token isn't a binary integer");236return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);237}238239SMLoc getLoc() const;240SMRange getLocRange() const;241242private:243/// LexToken - Read the next token and return its code.244tgtok::TokKind LexToken(bool FileOrLineStart = false);245246tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);247tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);248249int getNextChar();250int peekNextChar(int Index) const;251void SkipBCPLComment();252bool SkipCComment();253tgtok::TokKind LexIdentifier();254bool LexInclude();255tgtok::TokKind LexString();256tgtok::TokKind LexVarName();257tgtok::TokKind LexNumber();258tgtok::TokKind LexBracket();259tgtok::TokKind LexExclaim();260261// Process EOF encountered in LexToken().262// If EOF is met in an include file, then the method will update263// CurPtr, CurBuf and preprocessing include stack, and return true.264// If EOF is met in the top-level file, then the method will265// update and check the preprocessing include stack, and return false.266bool processEOF();267268// *** Structures and methods for preprocessing support ***269270// A set of macro names that are defined either via command line or271// by using:272// #define NAME273StringSet<> DefinedMacros;274275// Each of #ifdef and #else directives has a descriptor associated276// with it.277//278// An ordered list of preprocessing controls defined by #ifdef/#else279// directives that are in effect currently is called preprocessing280// control stack. It is represented as a vector of PreprocessorControlDesc's.281//282// The control stack is updated according to the following rules:283//284// For each #ifdef we add an element to the control stack.285// For each #else we replace the top element with a descriptor286// with an inverted IsDefined value.287// For each #endif we pop the top element from the control stack.288//289// When CurPtr reaches the current buffer's end, the control stack290// must be empty, i.e. #ifdef and the corresponding #endif291// must be located in the same file.292struct PreprocessorControlDesc {293// Either tgtok::Ifdef or tgtok::Else.294tgtok::TokKind Kind;295296// True, if the condition for this directive is true, false - otherwise.297// Examples:298// #ifdef NAME : true, if NAME is defined, false - otherwise.299// ...300// #else : false, if NAME is defined, true - otherwise.301bool IsDefined;302303// Pointer into CurBuf to the beginning of the preprocessing directive304// word, e.g.:305// #ifdef NAME306// ^ - SrcPos307SMLoc SrcPos;308};309310// We want to disallow code like this:311// file1.td:312// #define NAME313// #ifdef NAME314// include "file2.td"315// EOF316// file2.td:317// #endif318// EOF319//320// To do this, we clear the preprocessing control stack on entry321// to each of the included file. PrepIncludeStack is used to store322// preprocessing control stacks for the current file and all its323// parent files. The back() element is the preprocessing control324// stack for the current file.325std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>326PrepIncludeStack;327328// Validate that the current preprocessing control stack is empty,329// since we are about to exit a file, and pop the include stack.330//331// If IncludeStackMustBeEmpty is true, the include stack must be empty332// after the popping, otherwise, the include stack must not be empty333// after the popping. Basically, the include stack must be empty334// only if we exit the "top-level" file (i.e. finish lexing).335//336// The method returns false, if the current preprocessing control stack337// is not empty (e.g. there is an unterminated #ifdef/#else),338// true - otherwise.339bool prepExitInclude(bool IncludeStackMustBeEmpty);340341// Look ahead for a preprocessing directive starting from CurPtr. The caller342// must only call this method, if *(CurPtr - 1) is '#'. If the method matches343// a preprocessing directive word followed by a whitespace, then it returns344// one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.345//346// CurPtr is not adjusted by this method.347tgtok::TokKind prepIsDirective() const;348349// Given a preprocessing token kind, adjusts CurPtr to the end350// of the preprocessing directive word. Returns true, unless351// an unsupported token kind is passed in.352//353// We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()354// to avoid adjusting CurPtr before we are sure that '#' is followed355// by a preprocessing directive. If it is not, then we fall back to356// tgtok::paste interpretation of '#'.357bool prepEatPreprocessorDirective(tgtok::TokKind Kind);358359// The main "exit" point from the token parsing to preprocessor.360//361// The method is called for CurPtr, when prepIsDirective() returns362// true. The first parameter matches the result of prepIsDirective(),363// denoting the actual preprocessor directive to be processed.364//365// If the preprocessing directive disables the tokens processing, e.g.:366// #ifdef NAME // NAME is undefined367// then lexPreprocessor() enters the lines-skipping mode.368// In this mode, it does not parse any tokens, because the code under369// the #ifdef may not even be a correct tablegen code. The preprocessor370// looks for lines containing other preprocessing directives, which371// may be prepended with whitespaces and C-style comments. If the line372// does not contain a preprocessing directive, it is skipped completely.373// Otherwise, the preprocessing directive is processed by recursively374// calling lexPreprocessor(). The processing of the encountered375// preprocessing directives includes updating preprocessing control stack376// and adding new macros into DefinedMacros set.377//378// The second parameter controls whether lexPreprocessor() is called from379// LexToken() (true) or recursively from lexPreprocessor() (false).380//381// If ReturnNextLiveToken is true, the method returns the next382// LEX token following the current directive or following the end383// of the disabled preprocessing region corresponding to this directive.384// If ReturnNextLiveToken is false, the method returns the first parameter,385// unless there were errors encountered in the disabled preprocessing386// region - in this case, it returns tgtok::Error.387tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,388bool ReturnNextLiveToken = true);389390// Worker method for lexPreprocessor() to skip lines after some391// preprocessing directive up to the buffer end or to the directive392// that re-enables token processing. The method returns true393// upon processing the next directive that re-enables tokens394// processing. False is returned if an error was encountered.395//396// Note that prepSkipRegion() calls lexPreprocessor() to process397// encountered preprocessing directives. In this case, the second398// parameter to lexPreprocessor() is set to false. Being passed399// false ReturnNextLiveToken, lexPreprocessor() must never call400// prepSkipRegion(). We assert this by passing ReturnNextLiveToken401// to prepSkipRegion() and checking that it is never set to false.402bool prepSkipRegion(bool MustNeverBeFalse);403404// Lex name of the macro after either #ifdef or #define. We could have used405// LexIdentifier(), but it has special handling of "include" word, which406// could result in awkward diagnostic errors. Consider:407// ----408// #ifdef include409// class ...410// ----411// LexIdentifier() will engage LexInclude(), which will complain about412// missing file with name "class". Instead, prepLexMacroName() will treat413// "include" as a normal macro name.414//415// On entry, CurPtr points to the end of a preprocessing directive word.416// The method allows for whitespaces between the preprocessing directive417// and the macro name. The allowed whitespaces are ' ' and '\t'.418//419// If the first non-whitespace symbol after the preprocessing directive420// is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then421// the method updates TokStart to the position of the first non-whitespace422// symbol, sets CurPtr to the position of the macro name's last symbol,423// and returns a string reference to the macro name. Otherwise,424// TokStart is set to the first non-whitespace symbol after the preprocessing425// directive, and the method returns an empty string reference.426//427// In all cases, TokStart may be used to point to the word following428// the preprocessing directive.429StringRef prepLexMacroName();430431// Skip any whitespaces starting from CurPtr. The method is used432// only in the lines-skipping mode to find the first non-whitespace433// symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'434// and '\r'. The method skips C-style comments as well, because435// it is used to find the beginning of the preprocessing directive.436// If we do not handle C-style comments the following code would437// result in incorrect detection of a preprocessing directive:438// /*439// #ifdef NAME440// */441// As long as we skip C-style comments, the following code is correctly442// recognized as a preprocessing directive:443// /* first line comment444// second line comment */ #ifdef NAME445//446// The method returns true upon reaching the first non-whitespace symbol447// or EOF, CurPtr is set to point to this symbol. The method returns false,448// if an error occurred during skipping of a C-style comment.449bool prepSkipLineBegin();450451// Skip any whitespaces or comments after a preprocessing directive.452// The method returns true upon reaching either end of the line453// or end of the file. If there is a multiline C-style comment454// after the preprocessing directive, the method skips455// the comment, so the final CurPtr may point to one of the next lines.456// The method returns false, if an error occurred during skipping457// C- or C++-style comment, or a non-whitespace symbol appears458// after the preprocessing directive.459//460// The method maybe called both during lines-skipping and tokens461// processing. It actually verifies that only whitespaces or/and462// comments follow a preprocessing directive.463//464// After the execution of this mehod, CurPtr points either to new line465// symbol, buffer end or non-whitespace symbol following the preprocesing466// directive.467bool prepSkipDirectiveEnd();468469// Return true, if the current preprocessor control stack is such that470// we should allow lexer to process the next token, false - otherwise.471//472// In particular, the method returns true, if all the #ifdef/#else473// controls on the stack have their IsDefined member set to true.474bool prepIsProcessingEnabled();475476// Report an error, if we reach EOF with non-empty preprocessing control477// stack. This means there is no matching #endif for the previous478// #ifdef/#else.479void prepReportPreprocessorStackError();480};481482} // end namespace llvm483484#endif485486487