Path: blob/main/contrib/llvm-project/clang/lib/Lex/DependencyDirectivesScanner.cpp
35233 views
//===- DependencyDirectivesScanner.cpp ------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This is the interface for scanning header and source files to get the10/// minimum necessary preprocessor directives for evaluating includes. It11/// reduces the source down to #define, #include, #import, @import, and any12/// conditional preprocessor logic that contains one of those.13///14//===----------------------------------------------------------------------===//1516#include "clang/Lex/DependencyDirectivesScanner.h"17#include "clang/Basic/CharInfo.h"18#include "clang/Basic/Diagnostic.h"19#include "clang/Lex/LexDiagnostic.h"20#include "clang/Lex/Lexer.h"21#include "clang/Lex/Pragma.h"22#include "llvm/ADT/ScopeExit.h"23#include "llvm/ADT/SmallString.h"24#include "llvm/ADT/StringMap.h"25#include "llvm/ADT/StringSwitch.h"26#include <optional>2728using namespace clang;29using namespace clang::dependency_directives_scan;30using namespace llvm;3132namespace {3334struct DirectiveWithTokens {35DirectiveKind Kind;36unsigned NumTokens;3738DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)39: Kind(Kind), NumTokens(NumTokens) {}40};4142/// Does an efficient "scan" of the sources to detect the presence of43/// preprocessor (or module import) directives and collects the raw lexed tokens44/// for those directives so that the \p Lexer can "replay" them when the file is45/// included.46///47/// Note that the behavior of the raw lexer is affected by the language mode,48/// while at this point we want to do a scan and collect tokens once,49/// irrespective of the language mode that the file will get included in. To50/// compensate for that the \p Lexer, while "replaying", will adjust a token51/// where appropriate, when it could affect the preprocessor's state.52/// For example in a directive like53///54/// \code55/// #if __has_cpp_attribute(clang::fallthrough)56/// \endcode57///58/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 259/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'60/// while in C++ mode.61struct Scanner {62Scanner(StringRef Input,63SmallVectorImpl<dependency_directives_scan::Token> &Tokens,64DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)65: Input(Input), Tokens(Tokens), Diags(Diags),66InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),67TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),68Input.end()) {}6970static LangOptions getLangOptsForDepScanning() {71LangOptions LangOpts;72// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.73LangOpts.ObjC = true;74LangOpts.LineComment = true;75LangOpts.RawStringLiterals = true;76// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".77return LangOpts;78}7980/// Lex the provided source and emit the directive tokens.81///82/// \returns True on error.83bool scan(SmallVectorImpl<Directive> &Directives);8485private:86/// Lexes next token and advances \p First and the \p Lexer.87[[nodiscard]] dependency_directives_scan::Token &88lexToken(const char *&First, const char *const End);8990[[nodiscard]] dependency_directives_scan::Token &91lexIncludeFilename(const char *&First, const char *const End);9293void skipLine(const char *&First, const char *const End);94void skipDirective(StringRef Name, const char *&First, const char *const End);9596/// Returns the spelling of a string literal or identifier after performing97/// any processing needed to handle \c clang::Token::NeedsCleaning.98StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok);99100/// Lexes next token and if it is identifier returns its string, otherwise101/// it skips the current line and returns \p std::nullopt.102///103/// In any case (whatever the token kind) \p First and the \p Lexer will104/// advance beyond the token.105[[nodiscard]] std::optional<StringRef>106tryLexIdentifierOrSkipLine(const char *&First, const char *const End);107108/// Used when it is certain that next token is an identifier.109[[nodiscard]] StringRef lexIdentifier(const char *&First,110const char *const End);111112/// Lexes next token and returns true iff it is an identifier that matches \p113/// Id, otherwise it skips the current line and returns false.114///115/// In any case (whatever the token kind) \p First and the \p Lexer will116/// advance beyond the token.117[[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id,118const char *&First,119const char *const End);120121/// Lexes next token and returns true iff it matches the kind \p K.122/// Otherwise it skips the current line and returns false.123///124/// In any case (whatever the token kind) \p First and the \p Lexer will125/// advance beyond the token.126[[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,127const char *const End);128129/// Lexes next token and if it is string literal, returns its string.130/// Otherwise, it skips the current line and returns \p std::nullopt.131///132/// In any case (whatever the token kind) \p First and the \p Lexer will133/// advance beyond the token.134[[nodiscard]] std::optional<StringRef>135tryLexStringLiteralOrSkipLine(const char *&First, const char *const End);136137[[nodiscard]] bool scanImpl(const char *First, const char *const End);138[[nodiscard]] bool lexPPLine(const char *&First, const char *const End);139[[nodiscard]] bool lexAt(const char *&First, const char *const End);140[[nodiscard]] bool lexModule(const char *&First, const char *const End);141[[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First,142const char *const End);143[[nodiscard]] bool lexPragma(const char *&First, const char *const End);144[[nodiscard]] bool lex_Pragma(const char *&First, const char *const End);145[[nodiscard]] bool lexEndif(const char *&First, const char *const End);146[[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First,147const char *const End);148[[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind,149const char *&First,150const char *const End);151void lexPPDirectiveBody(const char *&First, const char *const End);152153DirectiveWithTokens &pushDirective(DirectiveKind Kind) {154Tokens.append(CurDirToks);155DirsWithToks.emplace_back(Kind, CurDirToks.size());156CurDirToks.clear();157return DirsWithToks.back();158}159void popDirective() {160Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);161}162DirectiveKind topDirective() const {163return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;164}165166unsigned getOffsetAt(const char *CurPtr) const {167return CurPtr - Input.data();168}169170/// Reports a diagnostic if the diagnostic engine is provided. Always returns171/// true at the end.172bool reportError(const char *CurPtr, unsigned Err);173174StringMap<char> SplitIds;175StringRef Input;176SmallVectorImpl<dependency_directives_scan::Token> &Tokens;177DiagnosticsEngine *Diags;178SourceLocation InputSourceLoc;179180const char *LastTokenPtr = nullptr;181/// Keeps track of the tokens for the currently lexed directive. Once a182/// directive is fully lexed and "committed" then the tokens get appended to183/// \p Tokens and \p CurDirToks is cleared for the next directive.184SmallVector<dependency_directives_scan::Token, 32> CurDirToks;185/// The directives that were lexed along with the number of tokens that each186/// directive contains. The tokens of all the directives are kept in \p Tokens187/// vector, in the same order as the directives order in \p DirsWithToks.188SmallVector<DirectiveWithTokens, 64> DirsWithToks;189LangOptions LangOpts;190Lexer TheLexer;191};192193} // end anonymous namespace194195bool Scanner::reportError(const char *CurPtr, unsigned Err) {196if (!Diags)197return true;198assert(CurPtr >= Input.data() && "invalid buffer ptr");199Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);200return true;201}202203static void skipOverSpaces(const char *&First, const char *const End) {204while (First != End && isHorizontalWhitespace(*First))205++First;206}207208[[nodiscard]] static bool isRawStringLiteral(const char *First,209const char *Current) {210assert(First <= Current);211212// Check if we can even back up.213if (*Current != '"' || First == Current)214return false;215216// Check for an "R".217--Current;218if (*Current != 'R')219return false;220if (First == Current || !isAsciiIdentifierContinue(*--Current))221return true;222223// Check for a prefix of "u", "U", or "L".224if (*Current == 'u' || *Current == 'U' || *Current == 'L')225return First == Current || !isAsciiIdentifierContinue(*--Current);226227// Check for a prefix of "u8".228if (*Current != '8' || First == Current || *Current-- != 'u')229return false;230return First == Current || !isAsciiIdentifierContinue(*--Current);231}232233static void skipRawString(const char *&First, const char *const End) {234assert(First[0] == '"');235assert(First[-1] == 'R');236237const char *Last = ++First;238while (Last != End && *Last != '(')239++Last;240if (Last == End) {241First = Last; // Hit the end... just give up.242return;243}244245StringRef Terminator(First, Last - First);246for (;;) {247// Move First to just past the next ")".248First = Last;249while (First != End && *First != ')')250++First;251if (First == End)252return;253++First;254255// Look ahead for the terminator sequence.256Last = First;257while (Last != End && size_t(Last - First) < Terminator.size() &&258Terminator[Last - First] == *Last)259++Last;260261// Check if we hit it (or the end of the file).262if (Last == End) {263First = Last;264return;265}266if (size_t(Last - First) < Terminator.size())267continue;268if (*Last != '"')269continue;270First = Last + 1;271return;272}273}274275// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)276static unsigned isEOL(const char *First, const char *const End) {277if (First == End)278return 0;279if (End - First > 1 && isVerticalWhitespace(First[0]) &&280isVerticalWhitespace(First[1]) && First[0] != First[1])281return 2;282return !!isVerticalWhitespace(First[0]);283}284285static void skipString(const char *&First, const char *const End) {286assert(*First == '\'' || *First == '"' || *First == '<');287const char Terminator = *First == '<' ? '>' : *First;288for (++First; First != End && *First != Terminator; ++First) {289// String and character literals don't extend past the end of the line.290if (isVerticalWhitespace(*First))291return;292if (*First != '\\')293continue;294// Skip past backslash to the next character. This ensures that the295// character right after it is skipped as well, which matters if it's296// the terminator.297if (++First == End)298return;299if (!isWhitespace(*First))300continue;301// Whitespace after the backslash might indicate a line continuation.302const char *FirstAfterBackslashPastSpace = First;303skipOverSpaces(FirstAfterBackslashPastSpace, End);304if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {305// Advance the character pointer to the next line for the next306// iteration.307First = FirstAfterBackslashPastSpace + NLSize - 1;308}309}310if (First != End)311++First; // Finish off the string.312}313314// Returns the length of the skipped newline315static unsigned skipNewline(const char *&First, const char *End) {316if (First == End)317return 0;318assert(isVerticalWhitespace(*First));319unsigned Len = isEOL(First, End);320assert(Len && "expected newline");321First += Len;322return Len;323}324325static bool wasLineContinuation(const char *First, unsigned EOLLen) {326return *(First - (int)EOLLen - 1) == '\\';327}328329static void skipToNewlineRaw(const char *&First, const char *const End) {330for (;;) {331if (First == End)332return;333334unsigned Len = isEOL(First, End);335if (Len)336return;337338do {339if (++First == End)340return;341Len = isEOL(First, End);342} while (!Len);343344if (First[-1] != '\\')345return;346347First += Len;348// Keep skipping lines...349}350}351352static void skipLineComment(const char *&First, const char *const End) {353assert(First[0] == '/' && First[1] == '/');354First += 2;355skipToNewlineRaw(First, End);356}357358static void skipBlockComment(const char *&First, const char *const End) {359assert(First[0] == '/' && First[1] == '*');360if (End - First < 4) {361First = End;362return;363}364for (First += 3; First != End; ++First)365if (First[-1] == '*' && First[0] == '/') {366++First;367return;368}369}370371/// \returns True if the current single quotation mark character is a C++14372/// digit separator.373static bool isQuoteCppDigitSeparator(const char *const Start,374const char *const Cur,375const char *const End) {376assert(*Cur == '\'' && "expected quotation character");377// skipLine called in places where we don't expect a valid number378// body before `start` on the same line, so always return false at the start.379if (Start == Cur)380return false;381// The previous character must be a valid PP number character.382// Make sure that the L, u, U, u8 prefixes don't get marked as a383// separator though.384char Prev = *(Cur - 1);385if (Prev == 'L' || Prev == 'U' || Prev == 'u')386return false;387if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')388return false;389if (!isPreprocessingNumberBody(Prev))390return false;391// The next character should be a valid identifier body character.392return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));393}394395void Scanner::skipLine(const char *&First, const char *const End) {396for (;;) {397assert(First <= End);398if (First == End)399return;400401if (isVerticalWhitespace(*First)) {402skipNewline(First, End);403return;404}405const char *Start = First;406while (First != End && !isVerticalWhitespace(*First)) {407// Iterate over strings correctly to avoid comments and newlines.408if (*First == '"' ||409(*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {410LastTokenPtr = First;411if (isRawStringLiteral(Start, First))412skipRawString(First, End);413else414skipString(First, End);415continue;416}417418// Iterate over comments correctly.419if (*First != '/' || End - First < 2) {420LastTokenPtr = First;421++First;422continue;423}424425if (First[1] == '/') {426// "//...".427skipLineComment(First, End);428continue;429}430431if (First[1] != '*') {432LastTokenPtr = First;433++First;434continue;435}436437// "/*...*/".438skipBlockComment(First, End);439}440if (First == End)441return;442443// Skip over the newline.444unsigned Len = skipNewline(First, End);445if (!wasLineContinuation(First, Len)) // Continue past line-continuations.446break;447}448}449450void Scanner::skipDirective(StringRef Name, const char *&First,451const char *const End) {452if (llvm::StringSwitch<bool>(Name)453.Case("warning", true)454.Case("error", true)455.Default(false))456// Do not process quotes or comments.457skipToNewlineRaw(First, End);458else459skipLine(First, End);460}461462static void skipWhitespace(const char *&First, const char *const End) {463for (;;) {464assert(First <= End);465skipOverSpaces(First, End);466467if (End - First < 2)468return;469470if (First[0] == '\\' && isVerticalWhitespace(First[1])) {471skipNewline(++First, End);472continue;473}474475// Check for a non-comment character.476if (First[0] != '/')477return;478479// "// ...".480if (First[1] == '/') {481skipLineComment(First, End);482return;483}484485// Cannot be a comment.486if (First[1] != '*')487return;488489// "/*...*/".490skipBlockComment(First, End);491}492}493494bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,495const char *const End) {496const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;497for (;;) {498const dependency_directives_scan::Token &Tok = lexToken(First, End);499if (Tok.is(tok::eof))500return reportError(501DirectiveLoc,502diag::err_dep_source_scanner_missing_semi_after_at_import);503if (Tok.is(tok::semi))504break;505}506pushDirective(Kind);507skipWhitespace(First, End);508if (First == End)509return false;510if (!isVerticalWhitespace(*First))511return reportError(512DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);513skipNewline(First, End);514return false;515}516517dependency_directives_scan::Token &Scanner::lexToken(const char *&First,518const char *const End) {519clang::Token Tok;520TheLexer.LexFromRawLexer(Tok);521First = Input.data() + TheLexer.getCurrentBufferOffset();522assert(First <= End);523524unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();525CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),526Tok.getFlags());527return CurDirToks.back();528}529530dependency_directives_scan::Token &531Scanner::lexIncludeFilename(const char *&First, const char *const End) {532clang::Token Tok;533TheLexer.LexIncludeFilename(Tok);534First = Input.data() + TheLexer.getCurrentBufferOffset();535assert(First <= End);536537unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();538CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),539Tok.getFlags());540return CurDirToks.back();541}542543void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {544while (true) {545const dependency_directives_scan::Token &Tok = lexToken(First, End);546if (Tok.is(tok::eod) || Tok.is(tok::eof))547break;548}549}550551StringRef552Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) {553bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;554if (LLVM_LIKELY(!NeedsCleaning))555return Input.slice(Tok.Offset, Tok.getEnd());556557SmallString<64> Spelling;558Spelling.resize(Tok.Length);559560// FIXME: C++11 raw string literals need special handling (see getSpellingSlow561// in the Lexer). Currently we cannot see them due to our LangOpts.562563unsigned SpellingLength = 0;564const char *BufPtr = Input.begin() + Tok.Offset;565const char *AfterIdent = Input.begin() + Tok.getEnd();566while (BufPtr < AfterIdent) {567auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts);568Spelling[SpellingLength++] = Char;569BufPtr += Size;570}571572return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)573.first->first();574}575576std::optional<StringRef>577Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {578const dependency_directives_scan::Token &Tok = lexToken(First, End);579if (Tok.isNot(tok::raw_identifier)) {580if (!Tok.is(tok::eod))581skipLine(First, End);582return std::nullopt;583}584585return cleanStringIfNeeded(Tok);586}587588StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {589std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);590assert(Id && "expected identifier token");591return *Id;592}593594bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,595const char *const End) {596if (std::optional<StringRef> FoundId =597tryLexIdentifierOrSkipLine(First, End)) {598if (*FoundId == Id)599return true;600skipLine(First, End);601}602return false;603}604605bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,606const char *const End) {607const dependency_directives_scan::Token &Tok = lexToken(First, End);608if (Tok.is(K))609return true;610skipLine(First, End);611return false;612}613614std::optional<StringRef>615Scanner::tryLexStringLiteralOrSkipLine(const char *&First,616const char *const End) {617const dependency_directives_scan::Token &Tok = lexToken(First, End);618if (!tok::isStringLiteral(Tok.Kind)) {619if (!Tok.is(tok::eod))620skipLine(First, End);621return std::nullopt;622}623624return cleanStringIfNeeded(Tok);625}626627bool Scanner::lexAt(const char *&First, const char *const End) {628// Handle "@import".629630// Lex '@'.631const dependency_directives_scan::Token &AtTok = lexToken(First, End);632assert(AtTok.is(tok::at));633(void)AtTok;634635if (!isNextIdentifierOrSkipLine("import", First, End))636return false;637return lexModuleDirectiveBody(decl_at_import, First, End);638}639640bool Scanner::lexModule(const char *&First, const char *const End) {641StringRef Id = lexIdentifier(First, End);642bool Export = false;643if (Id == "export") {644Export = true;645std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);646if (!NextId)647return false;648Id = *NextId;649}650651if (Id != "module" && Id != "import") {652skipLine(First, End);653return false;654}655656skipWhitespace(First, End);657658// Ignore this as a module directive if the next character can't be part of659// an import.660661switch (*First) {662case ':': {663// `module :` is never the start of a valid module declaration.664if (Id == "module") {665skipLine(First, End);666return false;667}668// `import:(type)name` is a valid ObjC method decl, so check one more token.669(void)lexToken(First, End);670if (!tryLexIdentifierOrSkipLine(First, End))671return false;672break;673}674case '<':675case '"':676break;677default:678if (!isAsciiIdentifierContinue(*First)) {679skipLine(First, End);680return false;681}682}683684TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);685686DirectiveKind Kind;687if (Id == "module")688Kind = Export ? cxx_export_module_decl : cxx_module_decl;689else690Kind = Export ? cxx_export_import_decl : cxx_import_decl;691692return lexModuleDirectiveBody(Kind, First, End);693}694695bool Scanner::lex_Pragma(const char *&First, const char *const End) {696if (!isNextTokenOrSkipLine(tok::l_paren, First, End))697return false;698699std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End);700701if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End))702return false;703704SmallString<64> Buffer(*Str);705prepare_PragmaString(Buffer);706707// Use a new scanner instance since the tokens will be inside the allocated708// string. We should already have captured all the relevant tokens in the709// current scanner.710SmallVector<dependency_directives_scan::Token> DiscardTokens;711const char *Begin = Buffer.c_str();712Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,713InputSourceLoc};714715PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);716if (PragmaScanner.lexPragma(Begin, Buffer.end()))717return true;718719DirectiveKind K = PragmaScanner.topDirective();720if (K == pp_none) {721skipLine(First, End);722return false;723}724725assert(Begin == Buffer.end());726pushDirective(K);727return false;728}729730bool Scanner::lexPragma(const char *&First, const char *const End) {731std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);732if (!FoundId)733return false;734735StringRef Id = *FoundId;736auto Kind = llvm::StringSwitch<DirectiveKind>(Id)737.Case("once", pp_pragma_once)738.Case("push_macro", pp_pragma_push_macro)739.Case("pop_macro", pp_pragma_pop_macro)740.Case("include_alias", pp_pragma_include_alias)741.Default(pp_none);742if (Kind != pp_none) {743lexPPDirectiveBody(First, End);744pushDirective(Kind);745return false;746}747748if (Id != "clang") {749skipLine(First, End);750return false;751}752753FoundId = tryLexIdentifierOrSkipLine(First, End);754if (!FoundId)755return false;756Id = *FoundId;757758// #pragma clang system_header759if (Id == "system_header") {760lexPPDirectiveBody(First, End);761pushDirective(pp_pragma_system_header);762return false;763}764765if (Id != "module") {766skipLine(First, End);767return false;768}769770// #pragma clang module.771if (!isNextIdentifierOrSkipLine("import", First, End))772return false;773774// #pragma clang module import.775lexPPDirectiveBody(First, End);776pushDirective(pp_pragma_import);777return false;778}779780bool Scanner::lexEndif(const char *&First, const char *const End) {781// Strip out "#else" if it's empty.782if (topDirective() == pp_else)783popDirective();784785// If "#ifdef" is empty, strip it and skip the "#endif".786//787// FIXME: Once/if Clang starts disallowing __has_include in macro expansions,788// we can skip empty `#if` and `#elif` blocks as well after scanning for a789// literal __has_include in the condition. Even without that rule we could790// drop the tokens if we scan for identifiers in the condition and find none.791if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) {792popDirective();793skipLine(First, End);794return false;795}796797return lexDefault(pp_endif, First, End);798}799800bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,801const char *const End) {802lexPPDirectiveBody(First, End);803pushDirective(Kind);804return false;805}806807static bool isStartOfRelevantLine(char First) {808switch (First) {809case '#':810case '@':811case 'i':812case 'e':813case 'm':814case '_':815return true;816}817return false;818}819820bool Scanner::lexPPLine(const char *&First, const char *const End) {821assert(First != End);822823skipWhitespace(First, End);824assert(First <= End);825if (First == End)826return false;827828if (!isStartOfRelevantLine(*First)) {829skipLine(First, End);830assert(First <= End);831return false;832}833834LastTokenPtr = First;835836TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);837838auto ScEx1 = make_scope_exit([&]() {839/// Clear Scanner's CurDirToks before returning, in case we didn't push a840/// new directive.841CurDirToks.clear();842});843844// Handle "@import".845if (*First == '@')846return lexAt(First, End);847848if (*First == 'i' || *First == 'e' || *First == 'm')849return lexModule(First, End);850851if (*First == '_') {852if (isNextIdentifierOrSkipLine("_Pragma", First, End))853return lex_Pragma(First, End);854return false;855}856857// Handle preprocessing directives.858859TheLexer.setParsingPreprocessorDirective(true);860auto ScEx2 = make_scope_exit(861[&]() { TheLexer.setParsingPreprocessorDirective(false); });862863// Lex '#'.864const dependency_directives_scan::Token &HashTok = lexToken(First, End);865if (HashTok.is(tok::hashhash)) {866// A \p tok::hashhash at this location is passed by the preprocessor to the867// parser to interpret, like any other token. So for dependency scanning868// skip it like a normal token not affecting the preprocessor.869skipLine(First, End);870assert(First <= End);871return false;872}873assert(HashTok.is(tok::hash));874(void)HashTok;875876std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);877if (!FoundId)878return false;879880StringRef Id = *FoundId;881882if (Id == "pragma")883return lexPragma(First, End);884885auto Kind = llvm::StringSwitch<DirectiveKind>(Id)886.Case("include", pp_include)887.Case("__include_macros", pp___include_macros)888.Case("define", pp_define)889.Case("undef", pp_undef)890.Case("import", pp_import)891.Case("include_next", pp_include_next)892.Case("if", pp_if)893.Case("ifdef", pp_ifdef)894.Case("ifndef", pp_ifndef)895.Case("elif", pp_elif)896.Case("elifdef", pp_elifdef)897.Case("elifndef", pp_elifndef)898.Case("else", pp_else)899.Case("endif", pp_endif)900.Default(pp_none);901if (Kind == pp_none) {902skipDirective(Id, First, End);903return false;904}905906if (Kind == pp_endif)907return lexEndif(First, End);908909switch (Kind) {910case pp_include:911case pp___include_macros:912case pp_include_next:913case pp_import:914// Ignore missing filenames in include or import directives.915if (lexIncludeFilename(First, End).is(tok::eod)) {916skipDirective(Id, First, End);917return true;918}919break;920default:921break;922}923924// Everything else.925return lexDefault(Kind, First, End);926}927928static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {929if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&930First[2] == '\xbf')931First += 3;932}933934bool Scanner::scanImpl(const char *First, const char *const End) {935skipUTF8ByteOrderMark(First, End);936while (First != End)937if (lexPPLine(First, End))938return true;939return false;940}941942bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {943bool Error = scanImpl(Input.begin(), Input.end());944945if (!Error) {946// Add an EOF on success.947if (LastTokenPtr &&948(Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))949pushDirective(tokens_present_before_eof);950pushDirective(pp_eof);951}952953ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;954for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {955assert(RemainingTokens.size() >= DirWithToks.NumTokens);956Directives.emplace_back(DirWithToks.Kind,957RemainingTokens.take_front(DirWithToks.NumTokens));958RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);959}960assert(RemainingTokens.empty());961962return Error;963}964965bool clang::scanSourceForDependencyDirectives(966StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,967SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,968SourceLocation InputSourceLoc) {969return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);970}971972void clang::printDependencyDirectivesAsSource(973StringRef Source,974ArrayRef<dependency_directives_scan::Directive> Directives,975llvm::raw_ostream &OS) {976// Add a space separator where it is convenient for testing purposes.977auto needsSpaceSeparator =978[](tok::TokenKind Prev,979const dependency_directives_scan::Token &Tok) -> bool {980if (Prev == Tok.Kind)981return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,982tok::r_square);983if (Prev == tok::raw_identifier &&984Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,985tok::char_constant, tok::header_name))986return true;987if (Prev == tok::r_paren &&988Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,989tok::char_constant, tok::unknown))990return true;991if (Prev == tok::comma &&992Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))993return true;994return false;995};996997for (const dependency_directives_scan::Directive &Directive : Directives) {998if (Directive.Kind == tokens_present_before_eof)999OS << "<TokBeforeEOF>";1000std::optional<tok::TokenKind> PrevTokenKind;1001for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {1002if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))1003OS << ' ';1004PrevTokenKind = Tok.Kind;1005OS << Source.slice(Tok.Offset, Tok.getEnd());1006}1007}1008}100910101011