Path: blob/main/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp
35233 views
//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// Implements # directive processing for the Preprocessor.10///11//===----------------------------------------------------------------------===//1213#include "clang/Basic/CharInfo.h"14#include "clang/Basic/DirectoryEntry.h"15#include "clang/Basic/FileManager.h"16#include "clang/Basic/IdentifierTable.h"17#include "clang/Basic/LangOptions.h"18#include "clang/Basic/Module.h"19#include "clang/Basic/SourceLocation.h"20#include "clang/Basic/SourceManager.h"21#include "clang/Basic/TargetInfo.h"22#include "clang/Basic/TokenKinds.h"23#include "clang/Lex/CodeCompletionHandler.h"24#include "clang/Lex/HeaderSearch.h"25#include "clang/Lex/HeaderSearchOptions.h"26#include "clang/Lex/LexDiagnostic.h"27#include "clang/Lex/LiteralSupport.h"28#include "clang/Lex/MacroInfo.h"29#include "clang/Lex/ModuleLoader.h"30#include "clang/Lex/ModuleMap.h"31#include "clang/Lex/PPCallbacks.h"32#include "clang/Lex/Pragma.h"33#include "clang/Lex/Preprocessor.h"34#include "clang/Lex/PreprocessorOptions.h"35#include "clang/Lex/Token.h"36#include "clang/Lex/VariadicMacroSupport.h"37#include "llvm/ADT/ArrayRef.h"38#include "llvm/ADT/STLExtras.h"39#include "llvm/ADT/ScopeExit.h"40#include "llvm/ADT/SmallString.h"41#include "llvm/ADT/SmallVector.h"42#include "llvm/ADT/StringExtras.h"43#include "llvm/ADT/StringRef.h"44#include "llvm/ADT/StringSwitch.h"45#include "llvm/Support/AlignOf.h"46#include "llvm/Support/ErrorHandling.h"47#include "llvm/Support/Path.h"48#include "llvm/Support/SaveAndRestore.h"49#include <algorithm>50#include <cassert>51#include <cstring>52#include <new>53#include <optional>54#include <string>55#include <utility>5657using namespace clang;5859//===----------------------------------------------------------------------===//60// Utility Methods for Preprocessor Directive Handling.61//===----------------------------------------------------------------------===//6263MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {64static_assert(std::is_trivially_destructible_v<MacroInfo>, "");65return new (BP) MacroInfo(L);66}6768DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,69SourceLocation Loc) {70return new (BP) DefMacroDirective(MI, Loc);71}7273UndefMacroDirective *74Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {75return new (BP) UndefMacroDirective(UndefLoc);76}7778VisibilityMacroDirective *79Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,80bool isPublic) {81return new (BP) VisibilityMacroDirective(Loc, isPublic);82}8384/// Read and discard all tokens remaining on the current line until85/// the tok::eod token is found.86SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {87SourceRange Res;8889LexUnexpandedToken(Tmp);90Res.setBegin(Tmp.getLocation());91while (Tmp.isNot(tok::eod)) {92assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");93LexUnexpandedToken(Tmp);94}95Res.setEnd(Tmp.getLocation());96return Res;97}9899/// Enumerates possible cases of #define/#undef a reserved identifier.100enum MacroDiag {101MD_NoWarn, //> Not a reserved identifier102MD_KeywordDef, //> Macro hides keyword, enabled by default103MD_ReservedMacro //> #define of #undef reserved id, disabled by default104};105106/// Enumerates possible %select values for the pp_err_elif_after_else and107/// pp_err_elif_without_if diagnostics.108enum PPElifDiag {109PED_Elif,110PED_Elifdef,111PED_Elifndef112};113114static bool isFeatureTestMacro(StringRef MacroName) {115// list from:116// * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html117// * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160118// * man 7 feature_test_macros119// The list must be sorted for correct binary search.120static constexpr StringRef ReservedMacro[] = {121"_ATFILE_SOURCE",122"_BSD_SOURCE",123"_CRT_NONSTDC_NO_WARNINGS",124"_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",125"_CRT_SECURE_NO_WARNINGS",126"_FILE_OFFSET_BITS",127"_FORTIFY_SOURCE",128"_GLIBCXX_ASSERTIONS",129"_GLIBCXX_CONCEPT_CHECKS",130"_GLIBCXX_DEBUG",131"_GLIBCXX_DEBUG_PEDANTIC",132"_GLIBCXX_PARALLEL",133"_GLIBCXX_PARALLEL_ASSERTIONS",134"_GLIBCXX_SANITIZE_VECTOR",135"_GLIBCXX_USE_CXX11_ABI",136"_GLIBCXX_USE_DEPRECATED",137"_GNU_SOURCE",138"_ISOC11_SOURCE",139"_ISOC95_SOURCE",140"_ISOC99_SOURCE",141"_LARGEFILE64_SOURCE",142"_POSIX_C_SOURCE",143"_REENTRANT",144"_SVID_SOURCE",145"_THREAD_SAFE",146"_XOPEN_SOURCE",147"_XOPEN_SOURCE_EXTENDED",148"__STDCPP_WANT_MATH_SPEC_FUNCS__",149"__STDC_FORMAT_MACROS",150};151return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),152MacroName);153}154155static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,156const MacroInfo *MI,157const StringRef MacroName) {158// If this is a macro with special handling (like __LINE__) then it's language159// defined.160if (MI->isBuiltinMacro())161return true;162// Builtin macros are defined in the builtin file163if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))164return false;165// C defines macros starting with __STDC, and C++ defines macros starting with166// __STDCPP167if (MacroName.starts_with("__STDC"))168return true;169// C++ defines the __cplusplus macro170if (MacroName == "__cplusplus")171return true;172// C++ defines various feature-test macros starting with __cpp173if (MacroName.starts_with("__cpp"))174return true;175// Anything else isn't language-defined176return false;177}178179static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {180const LangOptions &Lang = PP.getLangOpts();181StringRef Text = II->getName();182if (isReservedInAllContexts(II->isReserved(Lang)))183return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;184if (II->isKeyword(Lang))185return MD_KeywordDef;186if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))187return MD_KeywordDef;188return MD_NoWarn;189}190191static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {192const LangOptions &Lang = PP.getLangOpts();193// Do not warn on keyword undef. It is generally harmless and widely used.194if (isReservedInAllContexts(II->isReserved(Lang)))195return MD_ReservedMacro;196return MD_NoWarn;197}198199// Return true if we want to issue a diagnostic by default if we200// encounter this name in a #include with the wrong case. For now,201// this includes the standard C and C++ headers, Posix headers,202// and Boost headers. Improper case for these #includes is a203// potential portability issue.204static bool warnByDefaultOnWrongCase(StringRef Include) {205// If the first component of the path is "boost", treat this like a standard header206// for the purposes of diagnostics.207if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))208return true;209210// "condition_variable" is the longest standard header name at 18 characters.211// If the include file name is longer than that, it can't be a standard header.212static const size_t MaxStdHeaderNameLen = 18u;213if (Include.size() > MaxStdHeaderNameLen)214return false;215216// Lowercase and normalize the search string.217SmallString<32> LowerInclude{Include};218for (char &Ch : LowerInclude) {219// In the ASCII range?220if (static_cast<unsigned char>(Ch) > 0x7f)221return false; // Can't be a standard header222// ASCII lowercase:223if (Ch >= 'A' && Ch <= 'Z')224Ch += 'a' - 'A';225// Normalize path separators for comparison purposes.226else if (::llvm::sys::path::is_separator(Ch))227Ch = '/';228}229230// The standard C/C++ and Posix headers231return llvm::StringSwitch<bool>(LowerInclude)232// C library headers233.Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)234.Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)235.Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)236.Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)237.Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)238.Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)239.Cases("wchar.h", "wctype.h", true)240241// C++ headers for C library facilities242.Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)243.Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)244.Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)245.Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)246.Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)247.Case("cwctype", true)248249// C++ library headers250.Cases("algorithm", "fstream", "list", "regex", "thread", true)251.Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)252.Cases("atomic", "future", "map", "set", "type_traits", true)253.Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)254.Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)255.Cases("codecvt", "ios", "new", "stack", "unordered_map", true)256.Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)257.Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)258.Cases("deque", "istream", "queue", "string", "valarray", true)259.Cases("exception", "iterator", "random", "strstream", "vector", true)260.Cases("forward_list", "limits", "ratio", "system_error", true)261262// POSIX headers (which aren't also C headers)263.Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)264.Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)265.Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)266.Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)267.Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)268.Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)269.Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)270.Cases("sys/resource.h", "sys/select.h", "sys/sem.h", "sys/shm.h", "sys/socket.h", true)271.Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)272.Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)273.Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)274.Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)275.Default(false);276}277278/// Find a similar string in `Candidates`.279///280/// \param LHS a string for a similar string in `Candidates`281///282/// \param Candidates the candidates to find a similar string.283///284/// \returns a similar string if exists. If no similar string exists,285/// returns std::nullopt.286static std::optional<StringRef>287findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {288// We need to check if `Candidates` has the exact case-insensitive string289// because the Levenshtein distance match does not care about it.290for (StringRef C : Candidates) {291if (LHS.equals_insensitive(C)) {292return C;293}294}295296// Keep going with the Levenshtein distance match.297// If the LHS size is less than 3, use the LHS size minus 1 and if not,298// use the LHS size divided by 3.299size_t Length = LHS.size();300size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;301302std::optional<std::pair<StringRef, size_t>> SimilarStr;303for (StringRef C : Candidates) {304size_t CurDist = LHS.edit_distance(C, true);305if (CurDist <= MaxDist) {306if (!SimilarStr) {307// The first similar string found.308SimilarStr = {C, CurDist};309} else if (CurDist < SimilarStr->second) {310// More similar string found.311SimilarStr = {C, CurDist};312}313}314}315316if (SimilarStr) {317return SimilarStr->first;318} else {319return std::nullopt;320}321}322323bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,324bool *ShadowFlag) {325// Missing macro name?326if (MacroNameTok.is(tok::eod))327return Diag(MacroNameTok, diag::err_pp_missing_macro_name);328329IdentifierInfo *II = MacroNameTok.getIdentifierInfo();330if (!II)331return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);332333if (II->isCPlusPlusOperatorKeyword()) {334// C++ 2.5p2: Alternative tokens behave the same as its primary token335// except for their spellings.336Diag(MacroNameTok, getLangOpts().MicrosoftExt337? diag::ext_pp_operator_used_as_macro_name338: diag::err_pp_operator_used_as_macro_name)339<< II << MacroNameTok.getKind();340// Allow #defining |and| and friends for Microsoft compatibility or341// recovery when legacy C headers are included in C++.342}343344if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {345// Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.346return Diag(MacroNameTok, diag::err_defined_macro_name);347}348349// If defining/undefining reserved identifier or a keyword, we need to issue350// a warning.351SourceLocation MacroNameLoc = MacroNameTok.getLocation();352if (ShadowFlag)353*ShadowFlag = false;354if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&355(SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {356MacroDiag D = MD_NoWarn;357if (isDefineUndef == MU_Define) {358D = shouldWarnOnMacroDef(*this, II);359}360else if (isDefineUndef == MU_Undef)361D = shouldWarnOnMacroUndef(*this, II);362if (D == MD_KeywordDef) {363// We do not want to warn on some patterns widely used in configuration364// scripts. This requires analyzing next tokens, so do not issue warnings365// now, only inform caller.366if (ShadowFlag)367*ShadowFlag = true;368}369if (D == MD_ReservedMacro)370Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);371}372373// Okay, we got a good identifier.374return false;375}376377/// Lex and validate a macro name, which occurs after a378/// \#define or \#undef.379///380/// This sets the token kind to eod and discards the rest of the macro line if381/// the macro name is invalid.382///383/// \param MacroNameTok Token that is expected to be a macro name.384/// \param isDefineUndef Context in which macro is used.385/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.386void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,387bool *ShadowFlag) {388// Read the token, don't allow macro expansion on it.389LexUnexpandedToken(MacroNameTok);390391if (MacroNameTok.is(tok::code_completion)) {392if (CodeComplete)393CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);394setCodeCompletionReached();395LexUnexpandedToken(MacroNameTok);396}397398if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))399return;400401// Invalid macro name, read and discard the rest of the line and set the402// token kind to tok::eod if necessary.403if (MacroNameTok.isNot(tok::eod)) {404MacroNameTok.setKind(tok::eod);405DiscardUntilEndOfDirective();406}407}408409/// Ensure that the next token is a tok::eod token.410///411/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is412/// true, then we consider macros that expand to zero tokens as being ok.413///414/// Returns the location of the end of the directive.415SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,416bool EnableMacros) {417Token Tmp;418// Lex unexpanded tokens for most directives: macros might expand to zero419// tokens, causing us to miss diagnosing invalid lines. Some directives (like420// #line) allow empty macros.421if (EnableMacros)422Lex(Tmp);423else424LexUnexpandedToken(Tmp);425426// There should be no tokens after the directive, but we allow them as an427// extension.428while (Tmp.is(tok::comment)) // Skip comments in -C mode.429LexUnexpandedToken(Tmp);430431if (Tmp.is(tok::eod))432return Tmp.getLocation();433434// Add a fixit in GNU/C99/C++ mode. Don't offer a fixit for strict-C89,435// or if this is a macro-style preprocessing directive, because it is more436// trouble than it is worth to insert /**/ and check that there is no /**/437// in the range also.438FixItHint Hint;439if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&440!CurTokenLexer)441Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");442Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;443return DiscardUntilEndOfDirective().getEnd();444}445446void Preprocessor::SuggestTypoedDirective(const Token &Tok,447StringRef Directive) const {448// If this is a `.S` file, treat unknown # directives as non-preprocessor449// directives.450if (getLangOpts().AsmPreprocessor) return;451452std::vector<StringRef> Candidates = {453"if", "ifdef", "ifndef", "elif", "else", "endif"454};455if (LangOpts.C23 || LangOpts.CPlusPlus23)456Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});457458if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {459// Directive cannot be coming from macro.460assert(Tok.getLocation().isFileID());461CharSourceRange DirectiveRange = CharSourceRange::getCharRange(462Tok.getLocation(),463Tok.getLocation().getLocWithOffset(Directive.size()));464StringRef SuggValue = *Sugg;465466auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);467Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;468}469}470471/// SkipExcludedConditionalBlock - We just read a \#if or related directive and472/// decided that the subsequent tokens are in the \#if'd out portion of the473/// file. Lex the rest of the file, until we see an \#endif. If474/// FoundNonSkipPortion is true, then we have already emitted code for part of475/// this \#if directive, so \#else/\#elif blocks should never be entered.476/// If ElseOk is true, then \#else directives are ok, if not, then we have477/// already seen one so a \#else directive is a duplicate. When this returns,478/// the caller can lex the first valid token.479void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,480SourceLocation IfTokenLoc,481bool FoundNonSkipPortion,482bool FoundElse,483SourceLocation ElseLoc) {484// In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()485// not getting called recursively by storing the RecordedSkippedRanges486// DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects487// that RecordedSkippedRanges won't get modified and SkipRangePtr won't be488// invalidated. If this changes and there is a need to call489// SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should490// change to do a second lookup in endLexPass function instead of reusing the491// lookup pointer.492assert(!SkippingExcludedConditionalBlock &&493"calling SkipExcludedConditionalBlock recursively");494llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);495496++NumSkipped;497assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");498assert(CurPPLexer && "Conditional PP block must be in a file!");499assert(CurLexer && "Conditional PP block but no current lexer set!");500501if (PreambleConditionalStack.reachedEOFWhileSkipping())502PreambleConditionalStack.clearSkipInfo();503else504CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,505FoundNonSkipPortion, FoundElse);506507// Enter raw mode to disable identifier lookup (and thus macro expansion),508// disabling warnings, etc.509CurPPLexer->LexingRawMode = true;510Token Tok;511SourceLocation endLoc;512513/// Keeps track and caches skipped ranges and also retrieves a prior skipped514/// range if the same block is re-visited.515struct SkippingRangeStateTy {516Preprocessor &PP;517518const char *BeginPtr = nullptr;519unsigned *SkipRangePtr = nullptr;520521SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}522523void beginLexPass() {524if (BeginPtr)525return; // continue skipping a block.526527// Initiate a skipping block and adjust the lexer if we already skipped it528// before.529BeginPtr = PP.CurLexer->getBufferLocation();530SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];531if (*SkipRangePtr) {532PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,533/*IsAtStartOfLine*/ true);534}535}536537void endLexPass(const char *Hashptr) {538if (!BeginPtr) {539// Not doing normal lexing.540assert(PP.CurLexer->isDependencyDirectivesLexer());541return;542}543544// Finished skipping a block, record the range if it's first time visited.545if (!*SkipRangePtr) {546*SkipRangePtr = Hashptr - BeginPtr;547}548assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));549BeginPtr = nullptr;550SkipRangePtr = nullptr;551}552} SkippingRangeState(*this);553554while (true) {555if (CurLexer->isDependencyDirectivesLexer()) {556CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);557} else {558SkippingRangeState.beginLexPass();559while (true) {560CurLexer->Lex(Tok);561562if (Tok.is(tok::code_completion)) {563setCodeCompletionReached();564if (CodeComplete)565CodeComplete->CodeCompleteInConditionalExclusion();566continue;567}568569// If this is the end of the buffer, we have an error.570if (Tok.is(tok::eof)) {571// We don't emit errors for unterminated conditionals here,572// Lexer::LexEndOfFile can do that properly.573// Just return and let the caller lex after this #include.574if (PreambleConditionalStack.isRecording())575PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,576FoundNonSkipPortion,577FoundElse, ElseLoc);578break;579}580581// If this token is not a preprocessor directive, just skip it.582if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())583continue;584585break;586}587}588if (Tok.is(tok::eof))589break;590591// We just parsed a # character at the start of a line, so we're in592// directive mode. Tell the lexer this so any newlines we see will be593// converted into an EOD token (this terminates the macro).594CurPPLexer->ParsingPreprocessorDirective = true;595if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);596597assert(Tok.is(tok::hash));598const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();599assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());600601// Read the next token, the directive flavor.602LexUnexpandedToken(Tok);603604// If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or605// something bogus), skip it.606if (Tok.isNot(tok::raw_identifier)) {607CurPPLexer->ParsingPreprocessorDirective = false;608// Restore comment saving mode.609if (CurLexer) CurLexer->resetExtendedTokenMode();610continue;611}612613// If the first letter isn't i or e, it isn't intesting to us. We know that614// this is safe in the face of spelling differences, because there is no way615// to spell an i/e in a strange way that is another letter. Skipping this616// allows us to avoid looking up the identifier info for #define/#undef and617// other common directives.618StringRef RI = Tok.getRawIdentifier();619620char FirstChar = RI[0];621if (FirstChar >= 'a' && FirstChar <= 'z' &&622FirstChar != 'i' && FirstChar != 'e') {623CurPPLexer->ParsingPreprocessorDirective = false;624// Restore comment saving mode.625if (CurLexer) CurLexer->resetExtendedTokenMode();626continue;627}628629// Get the identifier name without trigraphs or embedded newlines. Note630// that we can't use Tok.getIdentifierInfo() because its lookup is disabled631// when skipping.632char DirectiveBuf[20];633StringRef Directive;634if (!Tok.needsCleaning() && RI.size() < 20) {635Directive = RI;636} else {637std::string DirectiveStr = getSpelling(Tok);638size_t IdLen = DirectiveStr.size();639if (IdLen >= 20) {640CurPPLexer->ParsingPreprocessorDirective = false;641// Restore comment saving mode.642if (CurLexer) CurLexer->resetExtendedTokenMode();643continue;644}645memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);646Directive = StringRef(DirectiveBuf, IdLen);647}648649if (Directive.starts_with("if")) {650StringRef Sub = Directive.substr(2);651if (Sub.empty() || // "if"652Sub == "def" || // "ifdef"653Sub == "ndef") { // "ifndef"654// We know the entire #if/#ifdef/#ifndef block will be skipped, don't655// bother parsing the condition.656DiscardUntilEndOfDirective();657CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,658/*foundnonskip*/false,659/*foundelse*/false);660} else {661SuggestTypoedDirective(Tok, Directive);662}663} else if (Directive[0] == 'e') {664StringRef Sub = Directive.substr(1);665if (Sub == "ndif") { // "endif"666PPConditionalInfo CondInfo;667CondInfo.WasSkipping = true; // Silence bogus warning.668bool InCond = CurPPLexer->popConditionalLevel(CondInfo);669(void)InCond; // Silence warning in no-asserts mode.670assert(!InCond && "Can't be skipping if not in a conditional!");671672// If we popped the outermost skipping block, we're done skipping!673if (!CondInfo.WasSkipping) {674SkippingRangeState.endLexPass(Hashptr);675// Restore the value of LexingRawMode so that trailing comments676// are handled correctly, if we've reached the outermost block.677CurPPLexer->LexingRawMode = false;678endLoc = CheckEndOfDirective("endif");679CurPPLexer->LexingRawMode = true;680if (Callbacks)681Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);682break;683} else {684DiscardUntilEndOfDirective();685}686} else if (Sub == "lse") { // "else".687// #else directive in a skipping conditional. If not in some other688// skipping conditional, and if #else hasn't already been seen, enter it689// as a non-skipping conditional.690PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();691692if (!CondInfo.WasSkipping)693SkippingRangeState.endLexPass(Hashptr);694695// If this is a #else with a #else before it, report the error.696if (CondInfo.FoundElse)697Diag(Tok, diag::pp_err_else_after_else);698699// Note that we've seen a #else in this conditional.700CondInfo.FoundElse = true;701702// If the conditional is at the top level, and the #if block wasn't703// entered, enter the #else block now.704if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {705CondInfo.FoundNonSkip = true;706// Restore the value of LexingRawMode so that trailing comments707// are handled correctly.708CurPPLexer->LexingRawMode = false;709endLoc = CheckEndOfDirective("else");710CurPPLexer->LexingRawMode = true;711if (Callbacks)712Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);713break;714} else {715DiscardUntilEndOfDirective(); // C99 6.10p4.716}717} else if (Sub == "lif") { // "elif".718PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();719720if (!CondInfo.WasSkipping)721SkippingRangeState.endLexPass(Hashptr);722723// If this is a #elif with a #else before it, report the error.724if (CondInfo.FoundElse)725Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;726727// If this is in a skipping block or if we're already handled this #if728// block, don't bother parsing the condition.729if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {730// FIXME: We should probably do at least some minimal parsing of the731// condition to verify that it is well-formed. The current state732// allows #elif* directives with completely malformed (or missing)733// conditions.734DiscardUntilEndOfDirective();735} else {736// Restore the value of LexingRawMode so that identifiers are737// looked up, etc, inside the #elif expression.738assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");739CurPPLexer->LexingRawMode = false;740IdentifierInfo *IfNDefMacro = nullptr;741DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);742// Stop if Lexer became invalid after hitting code completion token.743if (!CurPPLexer)744return;745const bool CondValue = DER.Conditional;746CurPPLexer->LexingRawMode = true;747if (Callbacks) {748Callbacks->Elif(749Tok.getLocation(), DER.ExprRange,750(CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),751CondInfo.IfLoc);752}753// If this condition is true, enter it!754if (CondValue) {755CondInfo.FoundNonSkip = true;756break;757}758}759} else if (Sub == "lifdef" || // "elifdef"760Sub == "lifndef") { // "elifndef"761bool IsElifDef = Sub == "lifdef";762PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();763Token DirectiveToken = Tok;764765if (!CondInfo.WasSkipping)766SkippingRangeState.endLexPass(Hashptr);767768// Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even769// if this branch is in a skipping block.770unsigned DiagID;771if (LangOpts.CPlusPlus)772DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive773: diag::ext_cxx23_pp_directive;774else775DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive776: diag::ext_c23_pp_directive;777Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);778779// If this is a #elif with a #else before it, report the error.780if (CondInfo.FoundElse)781Diag(Tok, diag::pp_err_elif_after_else)782<< (IsElifDef ? PED_Elifdef : PED_Elifndef);783784// If this is in a skipping block or if we're already handled this #if785// block, don't bother parsing the condition.786if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {787// FIXME: We should probably do at least some minimal parsing of the788// condition to verify that it is well-formed. The current state789// allows #elif* directives with completely malformed (or missing)790// conditions.791DiscardUntilEndOfDirective();792} else {793// Restore the value of LexingRawMode so that identifiers are794// looked up, etc, inside the #elif[n]def expression.795assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");796CurPPLexer->LexingRawMode = false;797Token MacroNameTok;798ReadMacroName(MacroNameTok);799CurPPLexer->LexingRawMode = true;800801// If the macro name token is tok::eod, there was an error that was802// already reported.803if (MacroNameTok.is(tok::eod)) {804// Skip code until we get to #endif. This helps with recovery by805// not emitting an error when the #endif is reached.806continue;807}808809emitMacroExpansionWarnings(MacroNameTok);810811CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");812813IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();814auto MD = getMacroDefinition(MII);815MacroInfo *MI = MD.getMacroInfo();816817if (Callbacks) {818if (IsElifDef) {819Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,820MD);821} else {822Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,823MD);824}825}826// If this condition is true, enter it!827if (static_cast<bool>(MI) == IsElifDef) {828CondInfo.FoundNonSkip = true;829break;830}831}832} else {833SuggestTypoedDirective(Tok, Directive);834}835} else {836SuggestTypoedDirective(Tok, Directive);837}838839CurPPLexer->ParsingPreprocessorDirective = false;840// Restore comment saving mode.841if (CurLexer) CurLexer->resetExtendedTokenMode();842}843844// Finally, if we are out of the conditional (saw an #endif or ran off the end845// of the file, just stop skipping and return to lexing whatever came after846// the #if block.847CurPPLexer->LexingRawMode = false;848849// The last skipped range isn't actually skipped yet if it's truncated850// by the end of the preamble; we'll resume parsing after the preamble.851if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))852Callbacks->SourceRangeSkipped(853SourceRange(HashTokenLoc, endLoc.isValid()854? endLoc855: CurPPLexer->getSourceLocation()),856Tok.getLocation());857}858859Module *Preprocessor::getModuleForLocation(SourceLocation Loc,860bool AllowTextual) {861if (!SourceMgr.isInMainFile(Loc)) {862// Try to determine the module of the include directive.863// FIXME: Look into directly passing the FileEntry from LookupFile instead.864FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));865if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {866// The include comes from an included file.867return HeaderInfo.getModuleMap()868.findModuleForHeader(*EntryOfIncl, AllowTextual)869.getModule();870}871}872873// This is either in the main file or not in a file at all. It belongs874// to the current module, if there is one.875return getLangOpts().CurrentModule.empty()876? nullptr877: HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);878}879880OptionalFileEntryRef881Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,882SourceLocation Loc) {883Module *IncM = getModuleForLocation(884IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);885886// Walk up through the include stack, looking through textual headers of M887// until we hit a non-textual header that we can #include. (We assume textual888// headers of a module with non-textual headers aren't meant to be used to889// import entities from the module.)890auto &SM = getSourceManager();891while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {892auto ID = SM.getFileID(SM.getExpansionLoc(Loc));893auto FE = SM.getFileEntryRefForID(ID);894if (!FE)895break;896897// We want to find all possible modules that might contain this header, so898// search all enclosing directories for module maps and load them.899HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,900SourceMgr.isInSystemHeader(Loc));901902bool InPrivateHeader = false;903for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {904if (!Header.isAccessibleFrom(IncM)) {905// It's in a private header; we can't #include it.906// FIXME: If there's a public header in some module that re-exports it,907// then we could suggest including that, but it's not clear that's the908// expected way to make this entity visible.909InPrivateHeader = true;910continue;911}912913// Don't suggest explicitly excluded headers.914if (Header.getRole() == ModuleMap::ExcludedHeader)915continue;916917// We'll suggest including textual headers below if they're918// include-guarded.919if (Header.getRole() & ModuleMap::TextualHeader)920continue;921922// If we have a module import syntax, we shouldn't include a header to923// make a particular module visible. Let the caller know they should924// suggest an import instead.925if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)926return std::nullopt;927928// If this is an accessible, non-textual header of M's top-level module929// that transitively includes the given location and makes the930// corresponding module visible, this is the thing to #include.931return *FE;932}933934// FIXME: If we're bailing out due to a private header, we shouldn't suggest935// an import either.936if (InPrivateHeader)937return std::nullopt;938939// If the header is includable and has an include guard, assume the940// intended way to expose its contents is by #include, not by importing a941// module that transitively includes it.942if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))943return *FE;944945Loc = SM.getIncludeLoc(ID);946}947948return std::nullopt;949}950951OptionalFileEntryRef Preprocessor::LookupFile(952SourceLocation FilenameLoc, StringRef Filename, bool isAngled,953ConstSearchDirIterator FromDir, const FileEntry *FromFile,954ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,955SmallVectorImpl<char> *RelativePath,956ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,957bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {958ConstSearchDirIterator CurDirLocal = nullptr;959ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;960961Module *RequestingModule = getModuleForLocation(962FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);963964// If the header lookup mechanism may be relative to the current inclusion965// stack, record the parent #includes.966SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;967bool BuildSystemModule = false;968if (!FromDir && !FromFile) {969FileID FID = getCurrentFileLexer()->getFileID();970OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);971972// If there is no file entry associated with this file, it must be the973// predefines buffer or the module includes buffer. Any other file is not974// lexed with a normal lexer, so it won't be scanned for preprocessor975// directives.976//977// If we have the predefines buffer, resolve #include references (which come978// from the -include command line argument) from the current working979// directory instead of relative to the main file.980//981// If we have the module includes buffer, resolve #include references (which982// come from header declarations in the module map) relative to the module983// map file.984if (!FileEnt) {985if (FID == SourceMgr.getMainFileID() && MainFileDir) {986auto IncludeDir =987HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(988Filename, getCurrentModule())989? HeaderInfo.getModuleMap().getBuiltinDir()990: MainFileDir;991Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));992BuildSystemModule = getCurrentModule()->IsSystem;993} else if ((FileEnt = SourceMgr.getFileEntryRefForID(994SourceMgr.getMainFileID()))) {995auto CWD = FileMgr.getOptionalDirectoryRef(".");996Includers.push_back(std::make_pair(*FileEnt, *CWD));997}998} else {999Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));1000}10011002// MSVC searches the current include stack from top to bottom for1003// headers included by quoted include directives.1004// See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx1005if (LangOpts.MSVCCompat && !isAngled) {1006for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {1007if (IsFileLexer(ISEntry))1008if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))1009Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));1010}1011}1012}10131014CurDir = CurDirLookup;10151016if (FromFile) {1017// We're supposed to start looking from after a particular file. Search1018// the include path until we find that file or run out of files.1019ConstSearchDirIterator TmpCurDir = CurDir;1020ConstSearchDirIterator TmpFromDir = nullptr;1021while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(1022Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,1023Includers, SearchPath, RelativePath, RequestingModule,1024SuggestedModule, /*IsMapped=*/nullptr,1025/*IsFrameworkFound=*/nullptr, SkipCache)) {1026// Keep looking as if this file did a #include_next.1027TmpFromDir = TmpCurDir;1028++TmpFromDir;1029if (&FE->getFileEntry() == FromFile) {1030// Found it.1031FromDir = TmpFromDir;1032CurDir = TmpCurDir;1033break;1034}1035}1036}10371038// Do a standard file entry lookup.1039OptionalFileEntryRef FE = HeaderInfo.LookupFile(1040Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,1041RelativePath, RequestingModule, SuggestedModule, IsMapped,1042IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);1043if (FE)1044return FE;10451046OptionalFileEntryRef CurFileEnt;1047// Otherwise, see if this is a subframework header. If so, this is relative1048// to one of the headers on the #include stack. Walk the list of the current1049// headers on the #include stack and pass them to HeaderInfo.1050if (IsFileLexer()) {1051if ((CurFileEnt = CurPPLexer->getFileEntry())) {1052if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(1053Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,1054SuggestedModule)) {1055return FE;1056}1057}1058}10591060for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {1061if (IsFileLexer(ISEntry)) {1062if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {1063if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(1064Filename, *CurFileEnt, SearchPath, RelativePath,1065RequestingModule, SuggestedModule)) {1066return FE;1067}1068}1069}1070}10711072// Otherwise, we really couldn't find the file.1073return std::nullopt;1074}10751076OptionalFileEntryRef1077Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,1078const FileEntry *LookupFromFile) {1079FileManager &FM = this->getFileManager();1080if (llvm::sys::path::is_absolute(Filename)) {1081// lookup path or immediately fail1082llvm::Expected<FileEntryRef> ShouldBeEntry =1083FM.getFileRef(Filename, OpenFile);1084return llvm::expectedToOptional(std::move(ShouldBeEntry));1085}10861087auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,1088StringRef StartingFrom, StringRef FileName,1089bool RemoveInitialFileComponentFromLookupPath) {1090llvm::sys::path::native(StartingFrom, LookupPath);1091if (RemoveInitialFileComponentFromLookupPath)1092llvm::sys::path::remove_filename(LookupPath);1093if (!LookupPath.empty() &&1094!llvm::sys::path::is_separator(LookupPath.back())) {1095LookupPath.push_back(llvm::sys::path::get_separator().front());1096}1097LookupPath.append(FileName.begin(), FileName.end());1098};10991100// Otherwise, it's search time!1101SmallString<512> LookupPath;1102// Non-angled lookup1103if (!isAngled) {1104if (LookupFromFile) {1105// Use file-based lookup.1106StringRef FullFileDir = LookupFromFile->tryGetRealPathName();1107if (!FullFileDir.empty()) {1108SeparateComponents(LookupPath, FullFileDir, Filename, true);1109llvm::Expected<FileEntryRef> ShouldBeEntry =1110FM.getFileRef(LookupPath, OpenFile);1111if (ShouldBeEntry)1112return llvm::expectedToOptional(std::move(ShouldBeEntry));1113llvm::consumeError(ShouldBeEntry.takeError());1114}1115}11161117// Otherwise, do working directory lookup.1118LookupPath.clear();1119auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");1120if (MaybeWorkingDirEntry) {1121DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;1122StringRef WorkingDir = WorkingDirEntry.getName();1123if (!WorkingDir.empty()) {1124SeparateComponents(LookupPath, WorkingDir, Filename, false);1125llvm::Expected<FileEntryRef> ShouldBeEntry =1126FM.getFileRef(LookupPath, OpenFile);1127if (ShouldBeEntry)1128return llvm::expectedToOptional(std::move(ShouldBeEntry));1129llvm::consumeError(ShouldBeEntry.takeError());1130}1131}1132}11331134for (const auto &Entry : PPOpts->EmbedEntries) {1135LookupPath.clear();1136SeparateComponents(LookupPath, Entry, Filename, false);1137llvm::Expected<FileEntryRef> ShouldBeEntry =1138FM.getFileRef(LookupPath, OpenFile);1139if (ShouldBeEntry)1140return llvm::expectedToOptional(std::move(ShouldBeEntry));1141llvm::consumeError(ShouldBeEntry.takeError());1142}1143return std::nullopt;1144}11451146//===----------------------------------------------------------------------===//1147// Preprocessor Directive Handling.1148//===----------------------------------------------------------------------===//11491150class Preprocessor::ResetMacroExpansionHelper {1151public:1152ResetMacroExpansionHelper(Preprocessor *pp)1153: PP(pp), save(pp->DisableMacroExpansion) {1154if (pp->MacroExpansionInDirectivesOverride)1155pp->DisableMacroExpansion = false;1156}11571158~ResetMacroExpansionHelper() {1159PP->DisableMacroExpansion = save;1160}11611162private:1163Preprocessor *PP;1164bool save;1165};11661167/// Process a directive while looking for the through header or a #pragma1168/// hdrstop. The following directives are handled:1169/// #include (to check if it is the through header)1170/// #define (to warn about macros that don't match the PCH)1171/// #pragma (to check for pragma hdrstop).1172/// All other directives are completely discarded.1173void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,1174SourceLocation HashLoc) {1175if (const IdentifierInfo *II = Result.getIdentifierInfo()) {1176if (II->getPPKeywordID() == tok::pp_define) {1177return HandleDefineDirective(Result,1178/*ImmediatelyAfterHeaderGuard=*/false);1179}1180if (SkippingUntilPCHThroughHeader &&1181II->getPPKeywordID() == tok::pp_include) {1182return HandleIncludeDirective(HashLoc, Result);1183}1184if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {1185Lex(Result);1186auto *II = Result.getIdentifierInfo();1187if (II && II->getName() == "hdrstop")1188return HandlePragmaHdrstop(Result);1189}1190}1191DiscardUntilEndOfDirective();1192}11931194/// HandleDirective - This callback is invoked when the lexer sees a # token1195/// at the start of a line. This consumes the directive, modifies the1196/// lexer/preprocessor state, and advances the lexer(s) so that the next token1197/// read is the correct one.1198void Preprocessor::HandleDirective(Token &Result) {1199// FIXME: Traditional: # with whitespace before it not recognized by K&R?12001201// We just parsed a # character at the start of a line, so we're in directive1202// mode. Tell the lexer this so any newlines we see will be converted into an1203// EOD token (which terminates the directive).1204CurPPLexer->ParsingPreprocessorDirective = true;1205if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);12061207bool ImmediatelyAfterTopLevelIfndef =1208CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();1209CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();12101211++NumDirectives;12121213// We are about to read a token. For the multiple-include optimization FA to1214// work, we have to remember if we had read any tokens *before* this1215// pp-directive.1216bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();12171218// Save the '#' token in case we need to return it later.1219Token SavedHash = Result;12201221// Read the next token, the directive flavor. This isn't expanded due to1222// C99 6.10.3p8.1223LexUnexpandedToken(Result);12241225// C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.:1226// #define A(x) #x1227// A(abc1228// #warning blah1229// def)1230// If so, the user is relying on undefined behavior, emit a diagnostic. Do1231// not support this for #include-like directives, since that can result in1232// terrible diagnostics, and does not work in GCC.1233if (InMacroArgs) {1234if (IdentifierInfo *II = Result.getIdentifierInfo()) {1235switch (II->getPPKeywordID()) {1236case tok::pp_include:1237case tok::pp_import:1238case tok::pp_include_next:1239case tok::pp___include_macros:1240case tok::pp_pragma:1241case tok::pp_embed:1242Diag(Result, diag::err_embedded_directive) << II->getName();1243Diag(*ArgMacro, diag::note_macro_expansion_here)1244<< ArgMacro->getIdentifierInfo();1245DiscardUntilEndOfDirective();1246return;1247default:1248break;1249}1250}1251Diag(Result, diag::ext_embedded_directive);1252}12531254// Temporarily enable macro expansion if set so1255// and reset to previous state when returning from this function.1256ResetMacroExpansionHelper helper(this);12571258if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)1259return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());12601261switch (Result.getKind()) {1262case tok::eod:1263// Ignore the null directive with regards to the multiple-include1264// optimization, i.e. allow the null directive to appear outside of the1265// include guard and still enable the multiple-include optimization.1266CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);1267return; // null directive.1268case tok::code_completion:1269setCodeCompletionReached();1270if (CodeComplete)1271CodeComplete->CodeCompleteDirective(1272CurPPLexer->getConditionalStackDepth() > 0);1273return;1274case tok::numeric_constant: // # 7 GNU line marker directive.1275// In a .S file "# 4" may be a comment so don't treat it as a preprocessor1276// directive. However do permit it in the predefines file, as we use line1277// markers to mark the builtin macros as being in a system header.1278if (getLangOpts().AsmPreprocessor &&1279SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())1280break;1281return HandleDigitDirective(Result);1282default:1283IdentifierInfo *II = Result.getIdentifierInfo();1284if (!II) break; // Not an identifier.12851286// Ask what the preprocessor keyword ID is.1287switch (II->getPPKeywordID()) {1288default: break;1289// C99 6.10.1 - Conditional Inclusion.1290case tok::pp_if:1291return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);1292case tok::pp_ifdef:1293return HandleIfdefDirective(Result, SavedHash, false,1294true /*not valid for miopt*/);1295case tok::pp_ifndef:1296return HandleIfdefDirective(Result, SavedHash, true,1297ReadAnyTokensBeforeDirective);1298case tok::pp_elif:1299case tok::pp_elifdef:1300case tok::pp_elifndef:1301return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());13021303case tok::pp_else:1304return HandleElseDirective(Result, SavedHash);1305case tok::pp_endif:1306return HandleEndifDirective(Result);13071308// C99 6.10.2 - Source File Inclusion.1309case tok::pp_include:1310// Handle #include.1311return HandleIncludeDirective(SavedHash.getLocation(), Result);1312case tok::pp___include_macros:1313// Handle -imacros.1314return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);13151316// C99 6.10.3 - Macro Replacement.1317case tok::pp_define:1318return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);1319case tok::pp_undef:1320return HandleUndefDirective();13211322// C99 6.10.4 - Line Control.1323case tok::pp_line:1324return HandleLineDirective();13251326// C99 6.10.5 - Error Directive.1327case tok::pp_error:1328return HandleUserDiagnosticDirective(Result, false);13291330// C99 6.10.6 - Pragma Directive.1331case tok::pp_pragma:1332return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});13331334// GNU Extensions.1335case tok::pp_import:1336return HandleImportDirective(SavedHash.getLocation(), Result);1337case tok::pp_include_next:1338return HandleIncludeNextDirective(SavedHash.getLocation(), Result);13391340case tok::pp_warning:1341if (LangOpts.CPlusPlus)1342Diag(Result, LangOpts.CPlusPlus231343? diag::warn_cxx23_compat_warning_directive1344: diag::ext_pp_warning_directive)1345<< /*C++23*/ 1;1346else1347Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive1348: diag::ext_pp_warning_directive)1349<< /*C23*/ 0;13501351return HandleUserDiagnosticDirective(Result, true);1352case tok::pp_ident:1353return HandleIdentSCCSDirective(Result);1354case tok::pp_sccs:1355return HandleIdentSCCSDirective(Result);1356case tok::pp_embed:1357return HandleEmbedDirective(SavedHash.getLocation(), Result,1358getCurrentFileLexer()1359? *getCurrentFileLexer()->getFileEntry()1360: static_cast<FileEntry *>(nullptr));1361case tok::pp_assert:1362//isExtension = true; // FIXME: implement #assert1363break;1364case tok::pp_unassert:1365//isExtension = true; // FIXME: implement #unassert1366break;13671368case tok::pp___public_macro:1369if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)1370return HandleMacroPublicDirective(Result);1371break;13721373case tok::pp___private_macro:1374if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)1375return HandleMacroPrivateDirective();1376break;1377}1378break;1379}13801381// If this is a .S file, treat unknown # directives as non-preprocessor1382// directives. This is important because # may be a comment or introduce1383// various pseudo-ops. Just return the # token and push back the following1384// token to be lexed next time.1385if (getLangOpts().AsmPreprocessor) {1386auto Toks = std::make_unique<Token[]>(2);1387// Return the # and the token after it.1388Toks[0] = SavedHash;1389Toks[1] = Result;13901391// If the second token is a hashhash token, then we need to translate it to1392// unknown so the token lexer doesn't try to perform token pasting.1393if (Result.is(tok::hashhash))1394Toks[1].setKind(tok::unknown);13951396// Enter this token stream so that we re-lex the tokens. Make sure to1397// enable macro expansion, in case the token after the # is an identifier1398// that is expanded.1399EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);1400return;1401}14021403// If we reached here, the preprocessing token is not valid!1404// Start suggesting if a similar directive found.1405Diag(Result, diag::err_pp_invalid_directive) << 0;14061407// Read the rest of the PP line.1408DiscardUntilEndOfDirective();14091410// Okay, we're done parsing the directive.1411}14121413/// GetLineValue - Convert a numeric token into an unsigned value, emitting1414/// Diagnostic DiagID if it is invalid, and returning the value in Val.1415static bool GetLineValue(Token &DigitTok, unsigned &Val,1416unsigned DiagID, Preprocessor &PP,1417bool IsGNULineDirective=false) {1418if (DigitTok.isNot(tok::numeric_constant)) {1419PP.Diag(DigitTok, DiagID);14201421if (DigitTok.isNot(tok::eod))1422PP.DiscardUntilEndOfDirective();1423return true;1424}14251426SmallString<64> IntegerBuffer;1427IntegerBuffer.resize(DigitTok.getLength());1428const char *DigitTokBegin = &IntegerBuffer[0];1429bool Invalid = false;1430unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);1431if (Invalid)1432return true;14331434// Verify that we have a simple digit-sequence, and compute the value. This1435// is always a simple digit string computed in decimal, so we do this manually1436// here.1437Val = 0;1438for (unsigned i = 0; i != ActualLength; ++i) {1439// C++1y [lex.fcon]p1:1440// Optional separating single quotes in a digit-sequence are ignored1441if (DigitTokBegin[i] == '\'')1442continue;14431444if (!isDigit(DigitTokBegin[i])) {1445PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),1446diag::err_pp_line_digit_sequence) << IsGNULineDirective;1447PP.DiscardUntilEndOfDirective();1448return true;1449}14501451unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');1452if (NextVal < Val) { // overflow.1453PP.Diag(DigitTok, DiagID);1454PP.DiscardUntilEndOfDirective();1455return true;1456}1457Val = NextVal;1458}14591460if (DigitTokBegin[0] == '0' && Val)1461PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)1462<< IsGNULineDirective;14631464return false;1465}14661467/// Handle a \#line directive: C99 6.10.4.1468///1469/// The two acceptable forms are:1470/// \verbatim1471/// # line digit-sequence1472/// # line digit-sequence "s-char-sequence"1473/// \endverbatim1474void Preprocessor::HandleLineDirective() {1475// Read the line # and string argument. Per C99 6.10.4p5, these tokens are1476// expanded.1477Token DigitTok;1478Lex(DigitTok);14791480// Validate the number and convert it to an unsigned.1481unsigned LineNo;1482if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))1483return;14841485if (LineNo == 0)1486Diag(DigitTok, diag::ext_pp_line_zero);14871488// Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a1489// number greater than 2147483647". C90 requires that the line # be <= 32767.1490unsigned LineLimit = 32768U;1491if (LangOpts.C99 || LangOpts.CPlusPlus11)1492LineLimit = 2147483648U;1493if (LineNo >= LineLimit)1494Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;1495else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)1496Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);14971498int FilenameID = -1;1499Token StrTok;1500Lex(StrTok);15011502// If the StrTok is "eod", then it wasn't present. Otherwise, it must be a1503// string followed by eod.1504if (StrTok.is(tok::eod))1505; // ok1506else if (StrTok.isNot(tok::string_literal)) {1507Diag(StrTok, diag::err_pp_line_invalid_filename);1508DiscardUntilEndOfDirective();1509return;1510} else if (StrTok.hasUDSuffix()) {1511Diag(StrTok, diag::err_invalid_string_udl);1512DiscardUntilEndOfDirective();1513return;1514} else {1515// Parse and validate the string, converting it into a unique ID.1516StringLiteralParser Literal(StrTok, *this);1517assert(Literal.isOrdinary() && "Didn't allow wide strings in");1518if (Literal.hadError) {1519DiscardUntilEndOfDirective();1520return;1521}1522if (Literal.Pascal) {1523Diag(StrTok, diag::err_pp_linemarker_invalid_filename);1524DiscardUntilEndOfDirective();1525return;1526}1527FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());15281529// Verify that there is nothing after the string, other than EOD. Because1530// of C99 6.10.4p5, macros that expand to empty tokens are ok.1531CheckEndOfDirective("line", true);1532}15331534// Take the file kind of the file containing the #line directive. #line1535// directives are often used for generated sources from the same codebase, so1536// the new file should generally be classified the same way as the current1537// file. This is visible in GCC's pre-processed output, which rewrites #line1538// to GNU line markers.1539SrcMgr::CharacteristicKind FileKind =1540SourceMgr.getFileCharacteristic(DigitTok.getLocation());15411542SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,1543false, FileKind);15441545if (Callbacks)1546Callbacks->FileChanged(CurPPLexer->getSourceLocation(),1547PPCallbacks::RenameFile, FileKind);1548}15491550/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line1551/// marker directive.1552static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,1553SrcMgr::CharacteristicKind &FileKind,1554Preprocessor &PP) {1555unsigned FlagVal;1556Token FlagTok;1557PP.Lex(FlagTok);1558if (FlagTok.is(tok::eod)) return false;1559if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))1560return true;15611562if (FlagVal == 1) {1563IsFileEntry = true;15641565PP.Lex(FlagTok);1566if (FlagTok.is(tok::eod)) return false;1567if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))1568return true;1569} else if (FlagVal == 2) {1570IsFileExit = true;15711572SourceManager &SM = PP.getSourceManager();1573// If we are leaving the current presumed file, check to make sure the1574// presumed include stack isn't empty!1575FileID CurFileID =1576SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;1577PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());1578if (PLoc.isInvalid())1579return true;15801581// If there is no include loc (main file) or if the include loc is in a1582// different physical file, then we aren't in a "1" line marker flag region.1583SourceLocation IncLoc = PLoc.getIncludeLoc();1584if (IncLoc.isInvalid() ||1585SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {1586PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);1587PP.DiscardUntilEndOfDirective();1588return true;1589}15901591PP.Lex(FlagTok);1592if (FlagTok.is(tok::eod)) return false;1593if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))1594return true;1595}15961597// We must have 3 if there are still flags.1598if (FlagVal != 3) {1599PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);1600PP.DiscardUntilEndOfDirective();1601return true;1602}16031604FileKind = SrcMgr::C_System;16051606PP.Lex(FlagTok);1607if (FlagTok.is(tok::eod)) return false;1608if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))1609return true;16101611// We must have 4 if there is yet another flag.1612if (FlagVal != 4) {1613PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);1614PP.DiscardUntilEndOfDirective();1615return true;1616}16171618FileKind = SrcMgr::C_ExternCSystem;16191620PP.Lex(FlagTok);1621if (FlagTok.is(tok::eod)) return false;16221623// There are no more valid flags here.1624PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);1625PP.DiscardUntilEndOfDirective();1626return true;1627}16281629/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is1630/// one of the following forms:1631///1632/// # 421633/// # 42 "file" ('1' | '2')?1634/// # 42 "file" ('1' | '2')? '3' '4'?1635///1636void Preprocessor::HandleDigitDirective(Token &DigitTok) {1637// Validate the number and convert it to an unsigned. GNU does not have a1638// line # limit other than it fit in 32-bits.1639unsigned LineNo;1640if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,1641*this, true))1642return;16431644Token StrTok;1645Lex(StrTok);16461647bool IsFileEntry = false, IsFileExit = false;1648int FilenameID = -1;1649SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;16501651// If the StrTok is "eod", then it wasn't present. Otherwise, it must be a1652// string followed by eod.1653if (StrTok.is(tok::eod)) {1654Diag(StrTok, diag::ext_pp_gnu_line_directive);1655// Treat this like "#line NN", which doesn't change file characteristics.1656FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());1657} else if (StrTok.isNot(tok::string_literal)) {1658Diag(StrTok, diag::err_pp_linemarker_invalid_filename);1659DiscardUntilEndOfDirective();1660return;1661} else if (StrTok.hasUDSuffix()) {1662Diag(StrTok, diag::err_invalid_string_udl);1663DiscardUntilEndOfDirective();1664return;1665} else {1666// Parse and validate the string, converting it into a unique ID.1667StringLiteralParser Literal(StrTok, *this);1668assert(Literal.isOrdinary() && "Didn't allow wide strings in");1669if (Literal.hadError) {1670DiscardUntilEndOfDirective();1671return;1672}1673if (Literal.Pascal) {1674Diag(StrTok, diag::err_pp_linemarker_invalid_filename);1675DiscardUntilEndOfDirective();1676return;1677}16781679// If a filename was present, read any flags that are present.1680if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))1681return;1682if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&1683!SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))1684Diag(StrTok, diag::ext_pp_gnu_line_directive);16851686// Exiting to an empty string means pop to the including file, so leave1687// FilenameID as -1 in that case.1688if (!(IsFileExit && Literal.GetString().empty()))1689FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());1690}16911692// Create a line note with this information.1693SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,1694IsFileExit, FileKind);16951696// If the preprocessor has callbacks installed, notify them of the #line1697// change. This is used so that the line marker comes out in -E mode for1698// example.1699if (Callbacks) {1700PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;1701if (IsFileEntry)1702Reason = PPCallbacks::EnterFile;1703else if (IsFileExit)1704Reason = PPCallbacks::ExitFile;17051706Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);1707}1708}17091710/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.1711///1712void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,1713bool isWarning) {1714// Read the rest of the line raw. We do this because we don't want macros1715// to be expanded and we don't require that the tokens be valid preprocessing1716// tokens. For example, this is allowed: "#warning ` 'foo". GCC does1717// collapse multiple consecutive white space between tokens, but this isn't1718// specified by the standard.1719SmallString<128> Message;1720CurLexer->ReadToEndOfLine(&Message);17211722// Find the first non-whitespace character, so that we can make the1723// diagnostic more succinct.1724StringRef Msg = Message.str().ltrim(' ');17251726if (isWarning)1727Diag(Tok, diag::pp_hash_warning) << Msg;1728else1729Diag(Tok, diag::err_pp_hash_error) << Msg;1730}17311732/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.1733///1734void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {1735// Yes, this directive is an extension.1736Diag(Tok, diag::ext_pp_ident_directive);17371738// Read the string argument.1739Token StrTok;1740Lex(StrTok);17411742// If the token kind isn't a string, it's a malformed directive.1743if (StrTok.isNot(tok::string_literal) &&1744StrTok.isNot(tok::wide_string_literal)) {1745Diag(StrTok, diag::err_pp_malformed_ident);1746if (StrTok.isNot(tok::eod))1747DiscardUntilEndOfDirective();1748return;1749}17501751if (StrTok.hasUDSuffix()) {1752Diag(StrTok, diag::err_invalid_string_udl);1753DiscardUntilEndOfDirective();1754return;1755}17561757// Verify that there is nothing after the string, other than EOD.1758CheckEndOfDirective("ident");17591760if (Callbacks) {1761bool Invalid = false;1762std::string Str = getSpelling(StrTok, &Invalid);1763if (!Invalid)1764Callbacks->Ident(Tok.getLocation(), Str);1765}1766}17671768/// Handle a #public directive.1769void Preprocessor::HandleMacroPublicDirective(Token &Tok) {1770Token MacroNameTok;1771ReadMacroName(MacroNameTok, MU_Undef);17721773// Error reading macro name? If so, diagnostic already issued.1774if (MacroNameTok.is(tok::eod))1775return;17761777// Check to see if this is the last token on the #__public_macro line.1778CheckEndOfDirective("__public_macro");17791780IdentifierInfo *II = MacroNameTok.getIdentifierInfo();1781// Okay, we finally have a valid identifier to undef.1782MacroDirective *MD = getLocalMacroDirective(II);17831784// If the macro is not defined, this is an error.1785if (!MD) {1786Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;1787return;1788}17891790// Note that this macro has now been exported.1791appendMacroDirective(II, AllocateVisibilityMacroDirective(1792MacroNameTok.getLocation(), /*isPublic=*/true));1793}17941795/// Handle a #private directive.1796void Preprocessor::HandleMacroPrivateDirective() {1797Token MacroNameTok;1798ReadMacroName(MacroNameTok, MU_Undef);17991800// Error reading macro name? If so, diagnostic already issued.1801if (MacroNameTok.is(tok::eod))1802return;18031804// Check to see if this is the last token on the #__private_macro line.1805CheckEndOfDirective("__private_macro");18061807IdentifierInfo *II = MacroNameTok.getIdentifierInfo();1808// Okay, we finally have a valid identifier to undef.1809MacroDirective *MD = getLocalMacroDirective(II);18101811// If the macro is not defined, this is an error.1812if (!MD) {1813Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;1814return;1815}18161817// Note that this macro has now been marked private.1818appendMacroDirective(II, AllocateVisibilityMacroDirective(1819MacroNameTok.getLocation(), /*isPublic=*/false));1820}18211822//===----------------------------------------------------------------------===//1823// Preprocessor Include Directive Handling.1824//===----------------------------------------------------------------------===//18251826/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully1827/// checked and spelled filename, e.g. as an operand of \#include. This returns1828/// true if the input filename was in <>'s or false if it were in ""'s. The1829/// caller is expected to provide a buffer that is large enough to hold the1830/// spelling of the filename, but is also expected to handle the case when1831/// this method decides to use a different buffer.1832bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,1833StringRef &Buffer) {1834// Get the text form of the filename.1835assert(!Buffer.empty() && "Can't have tokens with empty spellings!");18361837// FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and1838// C++20 [lex.header]/2:1839//1840// If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then1841// in C: behavior is undefined1842// in C++: program is conditionally-supported with implementation-defined1843// semantics18441845// Make sure the filename is <x> or "x".1846bool isAngled;1847if (Buffer[0] == '<') {1848if (Buffer.back() != '>') {1849Diag(Loc, diag::err_pp_expects_filename);1850Buffer = StringRef();1851return true;1852}1853isAngled = true;1854} else if (Buffer[0] == '"') {1855if (Buffer.back() != '"') {1856Diag(Loc, diag::err_pp_expects_filename);1857Buffer = StringRef();1858return true;1859}1860isAngled = false;1861} else {1862Diag(Loc, diag::err_pp_expects_filename);1863Buffer = StringRef();1864return true;1865}18661867// Diagnose #include "" as invalid.1868if (Buffer.size() <= 2) {1869Diag(Loc, diag::err_pp_empty_filename);1870Buffer = StringRef();1871return true;1872}18731874// Skip the brackets.1875Buffer = Buffer.substr(1, Buffer.size()-2);1876return isAngled;1877}18781879/// Push a token onto the token stream containing an annotation.1880void Preprocessor::EnterAnnotationToken(SourceRange Range,1881tok::TokenKind Kind,1882void *AnnotationVal) {1883// FIXME: Produce this as the current token directly, rather than1884// allocating a new token for it.1885auto Tok = std::make_unique<Token[]>(1);1886Tok[0].startToken();1887Tok[0].setKind(Kind);1888Tok[0].setLocation(Range.getBegin());1889Tok[0].setAnnotationEndLoc(Range.getEnd());1890Tok[0].setAnnotationValue(AnnotationVal);1891EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);1892}18931894/// Produce a diagnostic informing the user that a #include or similar1895/// was implicitly treated as a module import.1896static void diagnoseAutoModuleImport(1897Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,1898ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,1899SourceLocation PathEnd) {1900SmallString<128> PathString;1901for (size_t I = 0, N = Path.size(); I != N; ++I) {1902if (I)1903PathString += '.';1904PathString += Path[I].first->getName();1905}19061907int IncludeKind = 0;1908switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {1909case tok::pp_include:1910IncludeKind = 0;1911break;19121913case tok::pp_import:1914IncludeKind = 1;1915break;19161917case tok::pp_include_next:1918IncludeKind = 2;1919break;19201921case tok::pp___include_macros:1922IncludeKind = 3;1923break;19241925default:1926llvm_unreachable("unknown include directive kind");1927}19281929PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)1930<< IncludeKind << PathString;1931}19321933// Given a vector of path components and a string containing the real1934// path to the file, build a properly-cased replacement in the vector,1935// and return true if the replacement should be suggested.1936static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,1937StringRef RealPathName,1938llvm::sys::path::Style Separator) {1939auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);1940auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);1941int Cnt = 0;1942bool SuggestReplacement = false;19431944auto IsSep = [Separator](StringRef Component) {1945return Component.size() == 1 &&1946llvm::sys::path::is_separator(Component[0], Separator);1947};19481949// Below is a best-effort to handle ".." in paths. It is admittedly1950// not 100% correct in the presence of symlinks.1951for (auto &Component : llvm::reverse(Components)) {1952if ("." == Component) {1953} else if (".." == Component) {1954++Cnt;1955} else if (Cnt) {1956--Cnt;1957} else if (RealPathComponentIter != RealPathComponentEnd) {1958if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&1959Component != *RealPathComponentIter) {1960// If these non-separator path components differ by more than just case,1961// then we may be looking at symlinked paths. Bail on this diagnostic to1962// avoid noisy false positives.1963SuggestReplacement =1964RealPathComponentIter->equals_insensitive(Component);1965if (!SuggestReplacement)1966break;1967Component = *RealPathComponentIter;1968}1969++RealPathComponentIter;1970}1971}1972return SuggestReplacement;1973}19741975bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,1976const TargetInfo &TargetInfo,1977const Module &M,1978DiagnosticsEngine &Diags) {1979Module::Requirement Requirement;1980Module::UnresolvedHeaderDirective MissingHeader;1981Module *ShadowingModule = nullptr;1982if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,1983ShadowingModule))1984return false;19851986if (MissingHeader.FileNameLoc.isValid()) {1987Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)1988<< MissingHeader.IsUmbrella << MissingHeader.FileName;1989} else if (ShadowingModule) {1990Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;1991Diags.Report(ShadowingModule->DefinitionLoc,1992diag::note_previous_definition);1993} else {1994// FIXME: Track the location at which the requirement was specified, and1995// use it here.1996Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)1997<< M.getFullModuleName() << Requirement.RequiredState1998<< Requirement.FeatureName;1999}2000return true;2001}20022003std::pair<ConstSearchDirIterator, const FileEntry *>2004Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {2005// #include_next is like #include, except that we start searching after2006// the current found directory. If we can't do this, issue a2007// diagnostic.2008ConstSearchDirIterator Lookup = CurDirLookup;2009const FileEntry *LookupFromFile = nullptr;20102011if (isInPrimaryFile() && LangOpts.IsHeaderFile) {2012// If the main file is a header, then it's either for PCH/AST generation,2013// or libclang opened it. Either way, handle it as a normal include below2014// and do not complain about include_next.2015} else if (isInPrimaryFile()) {2016Lookup = nullptr;2017Diag(IncludeNextTok, diag::pp_include_next_in_primary);2018} else if (CurLexerSubmodule) {2019// Start looking up in the directory *after* the one in which the current2020// file would be found, if any.2021assert(CurPPLexer && "#include_next directive in macro?");2022if (auto FE = CurPPLexer->getFileEntry())2023LookupFromFile = *FE;2024Lookup = nullptr;2025} else if (!Lookup) {2026// The current file was not found by walking the include path. Either it2027// is the primary file (handled above), or it was found by absolute path,2028// or it was found relative to such a file.2029// FIXME: Track enough information so we know which case we're in.2030Diag(IncludeNextTok, diag::pp_include_next_absolute_path);2031} else {2032// Start looking up in the next directory.2033++Lookup;2034}20352036return {Lookup, LookupFromFile};2037}20382039/// HandleIncludeDirective - The "\#include" tokens have just been read, read2040/// the file to be included from the lexer, then include it! This is a common2041/// routine with functionality shared between \#include, \#include_next and2042/// \#import. LookupFrom is set when this is a \#include_next directive, it2043/// specifies the file to start searching from.2044void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,2045Token &IncludeTok,2046ConstSearchDirIterator LookupFrom,2047const FileEntry *LookupFromFile) {2048Token FilenameTok;2049if (LexHeaderName(FilenameTok))2050return;20512052if (FilenameTok.isNot(tok::header_name)) {2053Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);2054if (FilenameTok.isNot(tok::eod))2055DiscardUntilEndOfDirective();2056return;2057}20582059// Verify that there is nothing after the filename, other than EOD. Note2060// that we allow macros that expand to nothing after the filename, because2061// this falls into the category of "#include pp-tokens new-line" specified2062// in C99 6.10.2p4.2063SourceLocation EndLoc =2064CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);20652066auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,2067EndLoc, LookupFrom, LookupFromFile);2068switch (Action.Kind) {2069case ImportAction::None:2070case ImportAction::SkippedModuleImport:2071break;2072case ImportAction::ModuleBegin:2073EnterAnnotationToken(SourceRange(HashLoc, EndLoc),2074tok::annot_module_begin, Action.ModuleForHeader);2075break;2076case ImportAction::HeaderUnitImport:2077EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,2078Action.ModuleForHeader);2079break;2080case ImportAction::ModuleImport:2081EnterAnnotationToken(SourceRange(HashLoc, EndLoc),2082tok::annot_module_include, Action.ModuleForHeader);2083break;2084case ImportAction::Failure:2085assert(TheModuleLoader.HadFatalFailure &&2086"This should be an early exit only to a fatal error");2087TheModuleLoader.HadFatalFailure = true;2088IncludeTok.setKind(tok::eof);2089CurLexer->cutOffLexing();2090return;2091}2092}20932094OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(2095ConstSearchDirIterator *CurDir, StringRef &Filename,2096SourceLocation FilenameLoc, CharSourceRange FilenameRange,2097const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,2098bool &IsMapped, ConstSearchDirIterator LookupFrom,2099const FileEntry *LookupFromFile, StringRef &LookupFilename,2100SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,2101ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {2102auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {2103if (LangOpts.AsmPreprocessor)2104return;21052106Module *RequestingModule = getModuleForLocation(2107FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);2108bool RequestingModuleIsModuleInterface =2109!SourceMgr.isInMainFile(FilenameLoc);21102111HeaderInfo.getModuleMap().diagnoseHeaderInclusion(2112RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,2113Filename, FE);2114};21152116OptionalFileEntryRef File = LookupFile(2117FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,2118Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,2119&SuggestedModule, &IsMapped, &IsFrameworkFound);2120if (File) {2121DiagnoseHeaderInclusion(*File);2122return File;2123}21242125// Give the clients a chance to silently skip this include.2126if (Callbacks && Callbacks->FileNotFound(Filename))2127return std::nullopt;21282129if (SuppressIncludeNotFoundError)2130return std::nullopt;21312132// If the file could not be located and it was included via angle2133// brackets, we can attempt a lookup as though it were a quoted path to2134// provide the user with a possible fixit.2135if (isAngled) {2136OptionalFileEntryRef File = LookupFile(2137FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,2138Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,2139&SuggestedModule, &IsMapped,2140/*IsFrameworkFound=*/nullptr);2141if (File) {2142DiagnoseHeaderInclusion(*File);2143Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)2144<< Filename << IsImportDecl2145<< FixItHint::CreateReplacement(FilenameRange,2146"\"" + Filename.str() + "\"");2147return File;2148}2149}21502151// Check for likely typos due to leading or trailing non-isAlphanumeric2152// characters2153StringRef OriginalFilename = Filename;2154if (LangOpts.SpellChecking) {2155// A heuristic to correct a typo file name by removing leading and2156// trailing non-isAlphanumeric characters.2157auto CorrectTypoFilename = [](llvm::StringRef Filename) {2158Filename = Filename.drop_until(isAlphanumeric);2159while (!Filename.empty() && !isAlphanumeric(Filename.back())) {2160Filename = Filename.drop_back();2161}2162return Filename;2163};2164StringRef TypoCorrectionName = CorrectTypoFilename(Filename);2165StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);21662167OptionalFileEntryRef File = LookupFile(2168FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,2169LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,2170Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,2171/*IsFrameworkFound=*/nullptr);2172if (File) {2173DiagnoseHeaderInclusion(*File);2174auto Hint =2175isAngled ? FixItHint::CreateReplacement(2176FilenameRange, "<" + TypoCorrectionName.str() + ">")2177: FixItHint::CreateReplacement(2178FilenameRange, "\"" + TypoCorrectionName.str() + "\"");2179Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)2180<< OriginalFilename << TypoCorrectionName << Hint;2181// We found the file, so set the Filename to the name after typo2182// correction.2183Filename = TypoCorrectionName;2184LookupFilename = TypoCorrectionLookupName;2185return File;2186}2187}21882189// If the file is still not found, just go with the vanilla diagnostic2190assert(!File && "expected missing file");2191Diag(FilenameTok, diag::err_pp_file_not_found)2192<< OriginalFilename << FilenameRange;2193if (IsFrameworkFound) {2194size_t SlashPos = OriginalFilename.find('/');2195assert(SlashPos != StringRef::npos &&2196"Include with framework name should have '/' in the filename");2197StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);2198FrameworkCacheEntry &CacheEntry =2199HeaderInfo.LookupFrameworkCache(FrameworkName);2200assert(CacheEntry.Directory && "Found framework should be in cache");2201Diag(FilenameTok, diag::note_pp_framework_without_header)2202<< OriginalFilename.substr(SlashPos + 1) << FrameworkName2203<< CacheEntry.Directory->getName();2204}22052206return std::nullopt;2207}22082209/// Handle either a #include-like directive or an import declaration that names2210/// a header file.2211///2212/// \param HashLoc The location of the '#' token for an include, or2213/// SourceLocation() for an import declaration.2214/// \param IncludeTok The include / include_next / import token.2215/// \param FilenameTok The header-name token.2216/// \param EndLoc The location at which any imported macros become visible.2217/// \param LookupFrom For #include_next, the starting directory for the2218/// directory lookup.2219/// \param LookupFromFile For #include_next, the starting file for the directory2220/// lookup.2221Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(2222SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,2223SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,2224const FileEntry *LookupFromFile) {2225SmallString<128> FilenameBuffer;2226StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);2227SourceLocation CharEnd = FilenameTok.getEndLoc();22282229CharSourceRange FilenameRange2230= CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);2231StringRef OriginalFilename = Filename;2232bool isAngled =2233GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);22342235// If GetIncludeFilenameSpelling set the start ptr to null, there was an2236// error.2237if (Filename.empty())2238return {ImportAction::None};22392240bool IsImportDecl = HashLoc.isInvalid();2241SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;22422243// Complain about attempts to #include files in an audit pragma.2244if (PragmaARCCFCodeAuditedInfo.second.isValid()) {2245Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;2246Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);22472248// Immediately leave the pragma.2249PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};2250}22512252// Complain about attempts to #include files in an assume-nonnull pragma.2253if (PragmaAssumeNonNullLoc.isValid()) {2254Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;2255Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);22562257// Immediately leave the pragma.2258PragmaAssumeNonNullLoc = SourceLocation();2259}22602261if (HeaderInfo.HasIncludeAliasMap()) {2262// Map the filename with the brackets still attached. If the name doesn't2263// map to anything, fall back on the filename we've already gotten the2264// spelling for.2265StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);2266if (!NewName.empty())2267Filename = NewName;2268}22692270// Search include directories.2271bool IsMapped = false;2272bool IsFrameworkFound = false;2273ConstSearchDirIterator CurDir = nullptr;2274SmallString<1024> SearchPath;2275SmallString<1024> RelativePath;2276// We get the raw path only if we have 'Callbacks' to which we later pass2277// the path.2278ModuleMap::KnownHeader SuggestedModule;2279SourceLocation FilenameLoc = FilenameTok.getLocation();2280StringRef LookupFilename = Filename;22812282// Normalize slashes when compiling with -fms-extensions on non-Windows. This2283// is unnecessary on Windows since the filesystem there handles backslashes.2284SmallString<128> NormalizedPath;2285llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;2286if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {2287NormalizedPath = Filename.str();2288llvm::sys::path::native(NormalizedPath);2289LookupFilename = NormalizedPath;2290BackslashStyle = llvm::sys::path::Style::windows;2291}22922293OptionalFileEntryRef File = LookupHeaderIncludeOrImport(2294&CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,2295IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,2296LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);22972298if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {2299if (File && isPCHThroughHeader(&File->getFileEntry()))2300SkippingUntilPCHThroughHeader = false;2301return {ImportAction::None};2302}23032304// Should we enter the source file? Set to Skip if either the source file is2305// known to have no effect beyond its effect on module visibility -- that is,2306// if it's got an include guard that is already defined, set to Import if it2307// is a modular header we've already built and should import.23082309// For C++20 Modules2310// [cpp.include]/7 If the header identified by the header-name denotes an2311// importable header, it is implementation-defined whether the #include2312// preprocessing directive is instead replaced by an import directive.2313// For this implementation, the translation is permitted when we are parsing2314// the Global Module Fragment, and not otherwise (the cases where it would be2315// valid to replace an include with an import are highly constrained once in2316// named module purview; this choice avoids considerable complexity in2317// determining valid cases).23182319enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;23202321if (PPOpts->SingleFileParseMode)2322Action = IncludeLimitReached;23232324// If we've reached the max allowed include depth, it is usually due to an2325// include cycle. Don't enter already processed files again as it can lead to2326// reaching the max allowed include depth again.2327if (Action == Enter && HasReachedMaxIncludeDepth && File &&2328alreadyIncluded(*File))2329Action = IncludeLimitReached;23302331// FIXME: We do not have a good way to disambiguate C++ clang modules from2332// C++ standard modules (other than use/non-use of Header Units).23332334Module *ModuleToImport = SuggestedModule.getModule();23352336bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&2337!ModuleToImport->isForBuilding(getLangOpts());23382339// Maybe a usable Header Unit2340bool UsableHeaderUnit = false;2341if (getLangOpts().CPlusPlusModules && ModuleToImport &&2342ModuleToImport->isHeaderUnit()) {2343if (TrackGMFState.inGMF() || IsImportDecl)2344UsableHeaderUnit = true;2345else if (!IsImportDecl) {2346// This is a Header Unit that we do not include-translate2347ModuleToImport = nullptr;2348}2349}2350// Maybe a usable clang header module.2351bool UsableClangHeaderModule =2352(getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&2353ModuleToImport && !ModuleToImport->isHeaderUnit();23542355// Determine whether we should try to import the module for this #include, if2356// there is one. Don't do so if precompiled module support is disabled or we2357// are processing this module textually (because we're building the module).2358if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {2359// If this include corresponds to a module but that module is2360// unavailable, diagnose the situation and bail out.2361// FIXME: Remove this; loadModule does the same check (but produces2362// slightly worse diagnostics).2363if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,2364getDiagnostics())) {2365Diag(FilenameTok.getLocation(),2366diag::note_implicit_top_level_module_import_here)2367<< ModuleToImport->getTopLevelModuleName();2368return {ImportAction::None};2369}23702371// Compute the module access path corresponding to this module.2372// FIXME: Should we have a second loadModule() overload to avoid this2373// extra lookup step?2374SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;2375for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)2376Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),2377FilenameTok.getLocation()));2378std::reverse(Path.begin(), Path.end());23792380// Warn that we're replacing the include/import with a module import.2381if (!IsImportDecl)2382diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);23832384// Load the module to import its macros. We'll make the declarations2385// visible when the parser gets here.2386// FIXME: Pass ModuleToImport in here rather than converting it to a path2387// and making the module loader convert it back again.2388ModuleLoadResult Imported = TheModuleLoader.loadModule(2389IncludeTok.getLocation(), Path, Module::Hidden,2390/*IsInclusionDirective=*/true);2391assert((Imported == nullptr || Imported == ModuleToImport) &&2392"the imported module is different than the suggested one");23932394if (Imported) {2395Action = Import;2396} else if (Imported.isMissingExpected()) {2397markClangModuleAsAffecting(2398static_cast<Module *>(Imported)->getTopLevelModule());2399// We failed to find a submodule that we assumed would exist (because it2400// was in the directory of an umbrella header, for instance), but no2401// actual module containing it exists (because the umbrella header is2402// incomplete). Treat this as a textual inclusion.2403ModuleToImport = nullptr;2404} else if (Imported.isConfigMismatch()) {2405// On a configuration mismatch, enter the header textually. We still know2406// that it's part of the corresponding module.2407} else {2408// We hit an error processing the import. Bail out.2409if (hadModuleLoaderFatalFailure()) {2410// With a fatal failure in the module loader, we abort parsing.2411Token &Result = IncludeTok;2412assert(CurLexer && "#include but no current lexer set!");2413Result.startToken();2414CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);2415CurLexer->cutOffLexing();2416}2417return {ImportAction::None};2418}2419}24202421// The #included file will be considered to be a system header if either it is2422// in a system include directory, or if the #includer is a system include2423// header.2424SrcMgr::CharacteristicKind FileCharacter =2425SourceMgr.getFileCharacteristic(FilenameTok.getLocation());2426if (File)2427FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);24282429// If this is a '#import' or an import-declaration, don't re-enter the file.2430//2431// FIXME: If we have a suggested module for a '#include', and we've already2432// visited this file, don't bother entering it again. We know it has no2433// further effect.2434bool EnterOnce =2435IsImportDecl ||2436IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;24372438bool IsFirstIncludeOfFile = false;24392440// Ask HeaderInfo if we should enter this #include file. If not, #including2441// this file will have no effect.2442if (Action == Enter && File &&2443!HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,2444getLangOpts().Modules, ModuleToImport,2445IsFirstIncludeOfFile)) {2446// C++ standard modules:2447// If we are not in the GMF, then we textually include only2448// clang modules:2449// Even if we've already preprocessed this header once and know that we2450// don't need to see its contents again, we still need to import it if it's2451// modular because we might not have imported it from this submodule before.2452//2453// FIXME: We don't do this when compiling a PCH because the AST2454// serialization layer can't cope with it. This means we get local2455// submodule visibility semantics wrong in that case.2456if (UsableHeaderUnit && !getLangOpts().CompilingPCH)2457Action = TrackGMFState.inGMF() ? Import : Skip;2458else2459Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;2460}24612462// Check for circular inclusion of the main file.2463// We can't generate a consistent preamble with regard to the conditional2464// stack if the main file is included again as due to the preamble bounds2465// some directives (e.g. #endif of a header guard) will never be seen.2466// Since this will lead to confusing errors, avoid the inclusion.2467if (Action == Enter && File && PreambleConditionalStack.isRecording() &&2468SourceMgr.isMainFile(File->getFileEntry())) {2469Diag(FilenameTok.getLocation(),2470diag::err_pp_including_mainfile_in_preamble);2471return {ImportAction::None};2472}24732474if (Callbacks && !IsImportDecl) {2475// Notify the callback object that we've seen an inclusion directive.2476// FIXME: Use a different callback for a pp-import?2477Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,2478FilenameRange, File, SearchPath, RelativePath,2479SuggestedModule.getModule(), Action == Import,2480FileCharacter);2481if (Action == Skip && File)2482Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);2483}24842485if (!File)2486return {ImportAction::None};24872488// If this is a C++20 pp-import declaration, diagnose if we didn't find any2489// module corresponding to the named header.2490if (IsImportDecl && !ModuleToImport) {2491Diag(FilenameTok, diag::err_header_import_not_header_unit)2492<< OriginalFilename << File->getName();2493return {ImportAction::None};2494}24952496// Issue a diagnostic if the name of the file on disk has a different case2497// than the one we're about to open.2498const bool CheckIncludePathPortability =2499!IsMapped && !File->getFileEntry().tryGetRealPathName().empty();25002501if (CheckIncludePathPortability) {2502StringRef Name = LookupFilename;2503StringRef NameWithoriginalSlashes = Filename;2504#if defined(_WIN32)2505// Skip UNC prefix if present. (tryGetRealPathName() always2506// returns a path with the prefix skipped.)2507bool NameWasUNC = Name.consume_front("\\\\?\\");2508NameWithoriginalSlashes.consume_front("\\\\?\\");2509#endif2510StringRef RealPathName = File->getFileEntry().tryGetRealPathName();2511SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),2512llvm::sys::path::end(Name));2513#if defined(_WIN32)2514// -Wnonportable-include-path is designed to diagnose includes using2515// case even on systems with a case-insensitive file system.2516// On Windows, RealPathName always starts with an upper-case drive2517// letter for absolute paths, but Name might start with either2518// case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.2519// ("foo" will always have on-disk case, no matter which case was2520// used in the cd command). To not emit this warning solely for2521// the drive letter, whose case is dependent on if `cd` is used2522// with upper- or lower-case drive letters, always consider the2523// given drive letter case as correct for the purpose of this warning.2524SmallString<128> FixedDriveRealPath;2525if (llvm::sys::path::is_absolute(Name) &&2526llvm::sys::path::is_absolute(RealPathName) &&2527toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&2528isLowercase(Name[0]) != isLowercase(RealPathName[0])) {2529assert(Components.size() >= 3 && "should have drive, backslash, name");2530assert(Components[0].size() == 2 && "should start with drive");2531assert(Components[0][1] == ':' && "should have colon");2532FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();2533RealPathName = FixedDriveRealPath;2534}2535#endif25362537if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {2538SmallString<128> Path;2539Path.reserve(Name.size()+2);2540Path.push_back(isAngled ? '<' : '"');25412542const auto IsSep = [BackslashStyle](char c) {2543return llvm::sys::path::is_separator(c, BackslashStyle);2544};25452546for (auto Component : Components) {2547// On POSIX, Components will contain a single '/' as first element2548// exactly if Name is an absolute path.2549// On Windows, it will contain "C:" followed by '\' for absolute paths.2550// The drive letter is optional for absolute paths on Windows, but2551// clang currently cannot process absolute paths in #include lines that2552// don't have a drive.2553// If the first entry in Components is a directory separator,2554// then the code at the bottom of this loop that keeps the original2555// directory separator style copies it. If the second entry is2556// a directory separator (the C:\ case), then that separator already2557// got copied when the C: was processed and we want to skip that entry.2558if (!(Component.size() == 1 && IsSep(Component[0])))2559Path.append(Component);2560else if (Path.size() != 1)2561continue;25622563// Append the separator(s) the user used, or the close quote2564if (Path.size() > NameWithoriginalSlashes.size()) {2565Path.push_back(isAngled ? '>' : '"');2566continue;2567}2568assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));2569do2570Path.push_back(NameWithoriginalSlashes[Path.size()-1]);2571while (Path.size() <= NameWithoriginalSlashes.size() &&2572IsSep(NameWithoriginalSlashes[Path.size()-1]));2573}25742575#if defined(_WIN32)2576// Restore UNC prefix if it was there.2577if (NameWasUNC)2578Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();2579#endif25802581// For user files and known standard headers, issue a diagnostic.2582// For other system headers, don't. They can be controlled separately.2583auto DiagId =2584(FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))2585? diag::pp_nonportable_path2586: diag::pp_nonportable_system_path;2587Diag(FilenameTok, DiagId) << Path <<2588FixItHint::CreateReplacement(FilenameRange, Path);2589}2590}25912592switch (Action) {2593case Skip:2594// If we don't need to enter the file, stop now.2595if (ModuleToImport)2596return {ImportAction::SkippedModuleImport, ModuleToImport};2597return {ImportAction::None};25982599case IncludeLimitReached:2600// If we reached our include limit and don't want to enter any more files,2601// don't go any further.2602return {ImportAction::None};26032604case Import: {2605// If this is a module import, make it visible if needed.2606assert(ModuleToImport && "no module to import");26072608makeModuleVisible(ModuleToImport, EndLoc);26092610if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==2611tok::pp___include_macros)2612return {ImportAction::None};26132614return {ImportAction::ModuleImport, ModuleToImport};2615}26162617case Enter:2618break;2619}26202621// Check that we don't have infinite #include recursion.2622if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {2623Diag(FilenameTok, diag::err_pp_include_too_deep);2624HasReachedMaxIncludeDepth = true;2625return {ImportAction::None};2626}26272628if (isAngled && isInNamedModule())2629Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)2630<< getNamedModuleName();26312632// Look up the file, create a File ID for it.2633SourceLocation IncludePos = FilenameTok.getLocation();2634// If the filename string was the result of macro expansions, set the include2635// position on the file where it will be included and after the expansions.2636if (IncludePos.isMacroID())2637IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();2638FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);2639if (!FID.isValid()) {2640TheModuleLoader.HadFatalFailure = true;2641return ImportAction::Failure;2642}26432644// If all is good, enter the new file!2645if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),2646IsFirstIncludeOfFile))2647return {ImportAction::None};26482649// Determine if we're switching to building a new submodule, and which one.2650// This does not apply for C++20 modules header units.2651if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {2652if (ModuleToImport->getTopLevelModule()->ShadowingModule) {2653// We are building a submodule that belongs to a shadowed module. This2654// means we find header files in the shadowed module.2655Diag(ModuleToImport->DefinitionLoc,2656diag::err_module_build_shadowed_submodule)2657<< ModuleToImport->getFullModuleName();2658Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,2659diag::note_previous_definition);2660return {ImportAction::None};2661}2662// When building a pch, -fmodule-name tells the compiler to textually2663// include headers in the specified module. We are not building the2664// specified module.2665//2666// FIXME: This is the wrong way to handle this. We should produce a PCH2667// that behaves the same as the header would behave in a compilation using2668// that PCH, which means we should enter the submodule. We need to teach2669// the AST serialization layer to deal with the resulting AST.2670if (getLangOpts().CompilingPCH &&2671ModuleToImport->isForBuilding(getLangOpts()))2672return {ImportAction::None};26732674assert(!CurLexerSubmodule && "should not have marked this as a module yet");2675CurLexerSubmodule = ModuleToImport;26762677// Let the macro handling code know that any future macros are within2678// the new submodule.2679EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);26802681// Let the parser know that any future declarations are within the new2682// submodule.2683// FIXME: There's no point doing this if we're handling a #__include_macros2684// directive.2685return {ImportAction::ModuleBegin, ModuleToImport};2686}26872688assert(!IsImportDecl && "failed to diagnose missing module for import decl");2689return {ImportAction::None};2690}26912692/// HandleIncludeNextDirective - Implements \#include_next.2693///2694void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,2695Token &IncludeNextTok) {2696Diag(IncludeNextTok, diag::ext_pp_include_next_directive);26972698ConstSearchDirIterator Lookup = nullptr;2699const FileEntry *LookupFromFile;2700std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);27012702return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,2703LookupFromFile);2704}27052706/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode2707void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {2708// The Microsoft #import directive takes a type library and generates header2709// files from it, and includes those. This is beyond the scope of what clang2710// does, so we ignore it and error out. However, #import can optionally have2711// trailing attributes that span multiple lines. We're going to eat those2712// so we can continue processing from there.2713Diag(Tok, diag::err_pp_import_directive_ms );27142715// Read tokens until we get to the end of the directive. Note that the2716// directive can be split over multiple lines using the backslash character.2717DiscardUntilEndOfDirective();2718}27192720/// HandleImportDirective - Implements \#import.2721///2722void Preprocessor::HandleImportDirective(SourceLocation HashLoc,2723Token &ImportTok) {2724if (!LangOpts.ObjC) { // #import is standard for ObjC.2725if (LangOpts.MSVCCompat)2726return HandleMicrosoftImportDirective(ImportTok);2727Diag(ImportTok, diag::ext_pp_import_directive);2728}2729return HandleIncludeDirective(HashLoc, ImportTok);2730}27312732/// HandleIncludeMacrosDirective - The -imacros command line option turns into a2733/// pseudo directive in the predefines buffer. This handles it by sucking all2734/// tokens through the preprocessor and discarding them (only keeping the side2735/// effects on the preprocessor).2736void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,2737Token &IncludeMacrosTok) {2738// This directive should only occur in the predefines buffer. If not, emit an2739// error and reject it.2740SourceLocation Loc = IncludeMacrosTok.getLocation();2741if (SourceMgr.getBufferName(Loc) != "<built-in>") {2742Diag(IncludeMacrosTok.getLocation(),2743diag::pp_include_macros_out_of_predefines);2744DiscardUntilEndOfDirective();2745return;2746}27472748// Treat this as a normal #include for checking purposes. If this is2749// successful, it will push a new lexer onto the include stack.2750HandleIncludeDirective(HashLoc, IncludeMacrosTok);27512752Token TmpTok;2753do {2754Lex(TmpTok);2755assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");2756} while (TmpTok.isNot(tok::hashhash));2757}27582759//===----------------------------------------------------------------------===//2760// Preprocessor Macro Directive Handling.2761//===----------------------------------------------------------------------===//27622763/// ReadMacroParameterList - The ( starting a parameter list of a macro2764/// definition has just been read. Lex the rest of the parameters and the2765/// closing ), updating MI with what we learn. Return true if an error occurs2766/// parsing the param list.2767bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {2768SmallVector<IdentifierInfo*, 32> Parameters;27692770while (true) {2771LexUnexpandedNonComment(Tok);2772switch (Tok.getKind()) {2773case tok::r_paren:2774// Found the end of the parameter list.2775if (Parameters.empty()) // #define FOO()2776return false;2777// Otherwise we have #define FOO(A,)2778Diag(Tok, diag::err_pp_expected_ident_in_arg_list);2779return true;2780case tok::ellipsis: // #define X(... -> C99 varargs2781if (!LangOpts.C99)2782Diag(Tok, LangOpts.CPlusPlus11 ?2783diag::warn_cxx98_compat_variadic_macro :2784diag::ext_variadic_macro);27852786// OpenCL v1.2 s6.9.e: variadic macros are not supported.2787if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {2788Diag(Tok, diag::ext_pp_opencl_variadic_macros);2789}27902791// Lex the token after the identifier.2792LexUnexpandedNonComment(Tok);2793if (Tok.isNot(tok::r_paren)) {2794Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);2795return true;2796}2797// Add the __VA_ARGS__ identifier as a parameter.2798Parameters.push_back(Ident__VA_ARGS__);2799MI->setIsC99Varargs();2800MI->setParameterList(Parameters, BP);2801return false;2802case tok::eod: // #define X(2803Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);2804return true;2805default:2806// Handle keywords and identifiers here to accept things like2807// #define Foo(for) for.2808IdentifierInfo *II = Tok.getIdentifierInfo();2809if (!II) {2810// #define X(12811Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);2812return true;2813}28142815// If this is already used as a parameter, it is used multiple times (e.g.2816// #define X(A,A.2817if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p62818Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;2819return true;2820}28212822// Add the parameter to the macro info.2823Parameters.push_back(II);28242825// Lex the token after the identifier.2826LexUnexpandedNonComment(Tok);28272828switch (Tok.getKind()) {2829default: // #define X(A B2830Diag(Tok, diag::err_pp_expected_comma_in_arg_list);2831return true;2832case tok::r_paren: // #define X(A)2833MI->setParameterList(Parameters, BP);2834return false;2835case tok::comma: // #define X(A,2836break;2837case tok::ellipsis: // #define X(A... -> GCC extension2838// Diagnose extension.2839Diag(Tok, diag::ext_named_variadic_macro);28402841// Lex the token after the identifier.2842LexUnexpandedNonComment(Tok);2843if (Tok.isNot(tok::r_paren)) {2844Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);2845return true;2846}28472848MI->setIsGNUVarargs();2849MI->setParameterList(Parameters, BP);2850return false;2851}2852}2853}2854}28552856static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,2857const LangOptions &LOptions) {2858if (MI->getNumTokens() == 1) {2859const Token &Value = MI->getReplacementToken(0);28602861// Macro that is identity, like '#define inline inline' is a valid pattern.2862if (MacroName.getKind() == Value.getKind())2863return true;28642865// Macro that maps a keyword to the same keyword decorated with leading/2866// trailing underscores is a valid pattern:2867// #define inline __inline2868// #define inline __inline__2869// #define inline _inline (in MS compatibility mode)2870StringRef MacroText = MacroName.getIdentifierInfo()->getName();2871if (IdentifierInfo *II = Value.getIdentifierInfo()) {2872if (!II->isKeyword(LOptions))2873return false;2874StringRef ValueText = II->getName();2875StringRef TrimmedValue = ValueText;2876if (!ValueText.starts_with("__")) {2877if (ValueText.starts_with("_"))2878TrimmedValue = TrimmedValue.drop_front(1);2879else2880return false;2881} else {2882TrimmedValue = TrimmedValue.drop_front(2);2883if (TrimmedValue.ends_with("__"))2884TrimmedValue = TrimmedValue.drop_back(2);2885}2886return TrimmedValue == MacroText;2887} else {2888return false;2889}2890}28912892// #define inline2893return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,2894tok::kw_const) &&2895MI->getNumTokens() == 0;2896}28972898// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the2899// entire line) of the macro's tokens and adds them to MacroInfo, and while2900// doing so performs certain validity checks including (but not limited to):2901// - # (stringization) is followed by a macro parameter2902//2903// Returns a nullptr if an invalid sequence of tokens is encountered or returns2904// a pointer to a MacroInfo object.29052906MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(2907const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {29082909Token LastTok = MacroNameTok;2910// Create the new macro.2911MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());29122913Token Tok;2914LexUnexpandedToken(Tok);29152916// Ensure we consume the rest of the macro body if errors occur.2917auto _ = llvm::make_scope_exit([&]() {2918// The flag indicates if we are still waiting for 'eod'.2919if (CurLexer->ParsingPreprocessorDirective)2920DiscardUntilEndOfDirective();2921});29222923// Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk2924// within their appropriate context.2925VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);29262927// If this is a function-like macro definition, parse the argument list,2928// marking each of the identifiers as being used as macro arguments. Also,2929// check other constraints on the first token of the macro body.2930if (Tok.is(tok::eod)) {2931if (ImmediatelyAfterHeaderGuard) {2932// Save this macro information since it may part of a header guard.2933CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),2934MacroNameTok.getLocation());2935}2936// If there is no body to this macro, we have no special handling here.2937} else if (Tok.hasLeadingSpace()) {2938// This is a normal token with leading space. Clear the leading space2939// marker on the first token to get proper expansion.2940Tok.clearFlag(Token::LeadingSpace);2941} else if (Tok.is(tok::l_paren)) {2942// This is a function-like macro definition. Read the argument list.2943MI->setIsFunctionLike();2944if (ReadMacroParameterList(MI, LastTok))2945return nullptr;29462947// If this is a definition of an ISO C/C++ variadic function-like macro (not2948// using the GNU named varargs extension) inform our variadic scope guard2949// which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)2950// allowed only within the definition of a variadic macro.29512952if (MI->isC99Varargs()) {2953VariadicMacroScopeGuard.enterScope();2954}29552956// Read the first token after the arg list for down below.2957LexUnexpandedToken(Tok);2958} else if (LangOpts.C99 || LangOpts.CPlusPlus11) {2959// C99 requires whitespace between the macro definition and the body. Emit2960// a diagnostic for something like "#define X+".2961Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);2962} else {2963// C90 6.8 TC1 says: "In the definition of an object-like macro, if the2964// first character of a replacement list is not a character required by2965// subclause 5.2.1, then there shall be white-space separation between the2966// identifier and the replacement list.". 5.2.1 lists this set:2967// "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which2968// is irrelevant here.2969bool isInvalid = false;2970if (Tok.is(tok::at)) // @ is not in the list above.2971isInvalid = true;2972else if (Tok.is(tok::unknown)) {2973// If we have an unknown token, it is something strange like "`". Since2974// all of valid characters would have lexed into a single character2975// token of some sort, we know this is not a valid case.2976isInvalid = true;2977}2978if (isInvalid)2979Diag(Tok, diag::ext_missing_whitespace_after_macro_name);2980else2981Diag(Tok, diag::warn_missing_whitespace_after_macro_name);2982}29832984if (!Tok.is(tok::eod))2985LastTok = Tok;29862987SmallVector<Token, 16> Tokens;29882989// Read the rest of the macro body.2990if (MI->isObjectLike()) {2991// Object-like macros are very simple, just read their body.2992while (Tok.isNot(tok::eod)) {2993LastTok = Tok;2994Tokens.push_back(Tok);2995// Get the next token of the macro.2996LexUnexpandedToken(Tok);2997}2998} else {2999// Otherwise, read the body of a function-like macro. While we are at it,3000// check C99 6.10.3.2p1: ensure that # operators are followed by macro3001// parameters in function-like macro expansions.30023003VAOptDefinitionContext VAOCtx(*this);30043005while (Tok.isNot(tok::eod)) {3006LastTok = Tok;30073008if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {3009Tokens.push_back(Tok);30103011if (VAOCtx.isVAOptToken(Tok)) {3012// If we're already within a VAOPT, emit an error.3013if (VAOCtx.isInVAOpt()) {3014Diag(Tok, diag::err_pp_vaopt_nested_use);3015return nullptr;3016}3017// Ensure VAOPT is followed by a '(' .3018LexUnexpandedToken(Tok);3019if (Tok.isNot(tok::l_paren)) {3020Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);3021return nullptr;3022}3023Tokens.push_back(Tok);3024VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());3025LexUnexpandedToken(Tok);3026if (Tok.is(tok::hashhash)) {3027Diag(Tok, diag::err_vaopt_paste_at_start);3028return nullptr;3029}3030continue;3031} else if (VAOCtx.isInVAOpt()) {3032if (Tok.is(tok::r_paren)) {3033if (VAOCtx.sawClosingParen()) {3034assert(Tokens.size() >= 3 &&3035"Must have seen at least __VA_OPT__( "3036"and a subsequent tok::r_paren");3037if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {3038Diag(Tok, diag::err_vaopt_paste_at_end);3039return nullptr;3040}3041}3042} else if (Tok.is(tok::l_paren)) {3043VAOCtx.sawOpeningParen(Tok.getLocation());3044}3045}3046// Get the next token of the macro.3047LexUnexpandedToken(Tok);3048continue;3049}30503051// If we're in -traditional mode, then we should ignore stringification3052// and token pasting. Mark the tokens as unknown so as not to confuse3053// things.3054if (getLangOpts().TraditionalCPP) {3055Tok.setKind(tok::unknown);3056Tokens.push_back(Tok);30573058// Get the next token of the macro.3059LexUnexpandedToken(Tok);3060continue;3061}30623063if (Tok.is(tok::hashhash)) {3064// If we see token pasting, check if it looks like the gcc comma3065// pasting extension. We'll use this information to suppress3066// diagnostics later on.30673068// Get the next token of the macro.3069LexUnexpandedToken(Tok);30703071if (Tok.is(tok::eod)) {3072Tokens.push_back(LastTok);3073break;3074}30753076if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&3077Tokens[Tokens.size() - 1].is(tok::comma))3078MI->setHasCommaPasting();30793080// Things look ok, add the '##' token to the macro.3081Tokens.push_back(LastTok);3082continue;3083}30843085// Our Token is a stringization operator.3086// Get the next token of the macro.3087LexUnexpandedToken(Tok);30883089// Check for a valid macro arg identifier or __VA_OPT__.3090if (!VAOCtx.isVAOptToken(Tok) &&3091(Tok.getIdentifierInfo() == nullptr ||3092MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {30933094// If this is assembler-with-cpp mode, we accept random gibberish after3095// the '#' because '#' is often a comment character. However, change3096// the kind of the token to tok::unknown so that the preprocessor isn't3097// confused.3098if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {3099LastTok.setKind(tok::unknown);3100Tokens.push_back(LastTok);3101continue;3102} else {3103Diag(Tok, diag::err_pp_stringize_not_parameter)3104<< LastTok.is(tok::hashat);3105return nullptr;3106}3107}31083109// Things look ok, add the '#' and param name tokens to the macro.3110Tokens.push_back(LastTok);31113112// If the token following '#' is VAOPT, let the next iteration handle it3113// and check it for correctness, otherwise add the token and prime the3114// loop with the next one.3115if (!VAOCtx.isVAOptToken(Tok)) {3116Tokens.push_back(Tok);3117LastTok = Tok;31183119// Get the next token of the macro.3120LexUnexpandedToken(Tok);3121}3122}3123if (VAOCtx.isInVAOpt()) {3124assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");3125Diag(Tok, diag::err_pp_expected_after)3126<< LastTok.getKind() << tok::r_paren;3127Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;3128return nullptr;3129}3130}3131MI->setDefinitionEndLoc(LastTok.getLocation());31323133MI->setTokens(Tokens, BP);3134return MI;3135}31363137static bool isObjCProtectedMacro(const IdentifierInfo *II) {3138return II->isStr("__strong") || II->isStr("__weak") ||3139II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");3140}31413142/// HandleDefineDirective - Implements \#define. This consumes the entire macro3143/// line then lets the caller lex the next real token.3144void Preprocessor::HandleDefineDirective(3145Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {3146++NumDefined;31473148Token MacroNameTok;3149bool MacroShadowsKeyword;3150ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);31513152// Error reading macro name? If so, diagnostic already issued.3153if (MacroNameTok.is(tok::eod))3154return;31553156IdentifierInfo *II = MacroNameTok.getIdentifierInfo();3157// Issue a final pragma warning if we're defining a macro that was has been3158// undefined and is being redefined.3159if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())3160emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);31613162// If we are supposed to keep comments in #defines, reenable comment saving3163// mode.3164if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);31653166MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(3167MacroNameTok, ImmediatelyAfterHeaderGuard);31683169if (!MI) return;31703171if (MacroShadowsKeyword &&3172!isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {3173Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);3174}3175// Check that there is no paste (##) operator at the beginning or end of the3176// replacement list.3177unsigned NumTokens = MI->getNumTokens();3178if (NumTokens != 0) {3179if (MI->getReplacementToken(0).is(tok::hashhash)) {3180Diag(MI->getReplacementToken(0), diag::err_paste_at_start);3181return;3182}3183if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {3184Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);3185return;3186}3187}31883189// When skipping just warn about macros that do not match.3190if (SkippingUntilPCHThroughHeader) {3191const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());3192if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,3193/*Syntactic=*/LangOpts.MicrosoftExt))3194Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)3195<< MacroNameTok.getIdentifierInfo();3196// Issue the diagnostic but allow the change if msvc extensions are enabled3197if (!LangOpts.MicrosoftExt)3198return;3199}32003201// Finally, if this identifier already had a macro defined for it, verify that3202// the macro bodies are identical, and issue diagnostics if they are not.3203if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {3204// Final macros are hard-mode: they always warn. Even if the bodies are3205// identical. Even if they are in system headers. Even if they are things we3206// would silently allow in the past.3207if (MacroNameTok.getIdentifierInfo()->isFinal())3208emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);32093210// In Objective-C, ignore attempts to directly redefine the builtin3211// definitions of the ownership qualifiers. It's still possible to3212// #undef them.3213if (getLangOpts().ObjC &&3214SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==3215getPredefinesFileID() &&3216isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {3217// Warn if it changes the tokens.3218if ((!getDiagnostics().getSuppressSystemWarnings() ||3219!SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&3220!MI->isIdenticalTo(*OtherMI, *this,3221/*Syntactic=*/LangOpts.MicrosoftExt)) {3222Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);3223}3224assert(!OtherMI->isWarnIfUnused());3225return;3226}32273228// It is very common for system headers to have tons of macro redefinitions3229// and for warnings to be disabled in system headers. If this is the case,3230// then don't bother calling MacroInfo::isIdenticalTo.3231if (!getDiagnostics().getSuppressSystemWarnings() ||3232!SourceMgr.isInSystemHeader(DefineTok.getLocation())) {32333234if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())3235Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);32363237// Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and3238// C++ [cpp.predefined]p4, but allow it as an extension.3239if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))3240Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);3241// Macros must be identical. This means all tokens and whitespace3242// separation must be the same. C99 6.10.3p2.3243else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&3244!MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {3245Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)3246<< MacroNameTok.getIdentifierInfo();3247Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);3248}3249}3250if (OtherMI->isWarnIfUnused())3251WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());3252}32533254DefMacroDirective *MD =3255appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);32563257assert(!MI->isUsed());3258// If we need warning for not using the macro, add its location in the3259// warn-because-unused-macro set. If it gets used it will be removed from set.3260if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&3261!Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&3262!MacroExpansionInDirectivesOverride &&3263getSourceManager().getFileID(MI->getDefinitionLoc()) !=3264getPredefinesFileID()) {3265MI->setIsWarnIfUnused(true);3266WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());3267}32683269// If the callbacks want to know, tell them about the macro definition.3270if (Callbacks)3271Callbacks->MacroDefined(MacroNameTok, MD);32723273// If we're in MS compatibility mode and the macro being defined is the3274// assert macro, implicitly add a macro definition for static_assert to work3275// around their broken assert.h header file in C. Only do so if there isn't3276// already a static_assert macro defined.3277if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&3278MacroNameTok.getIdentifierInfo()->isStr("assert") &&3279!isMacroDefined("static_assert")) {3280MacroInfo *MI = AllocateMacroInfo(SourceLocation());32813282Token Tok;3283Tok.startToken();3284Tok.setKind(tok::kw__Static_assert);3285Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));3286MI->setTokens({Tok}, BP);3287(void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);3288}3289}32903291/// HandleUndefDirective - Implements \#undef.3292///3293void Preprocessor::HandleUndefDirective() {3294++NumUndefined;32953296Token MacroNameTok;3297ReadMacroName(MacroNameTok, MU_Undef);32983299// Error reading macro name? If so, diagnostic already issued.3300if (MacroNameTok.is(tok::eod))3301return;33023303// Check to see if this is the last token on the #undef line.3304CheckEndOfDirective("undef");33053306// Okay, we have a valid identifier to undef.3307auto *II = MacroNameTok.getIdentifierInfo();3308auto MD = getMacroDefinition(II);3309UndefMacroDirective *Undef = nullptr;33103311if (II->isFinal())3312emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);33133314// If the macro is not defined, this is a noop undef.3315if (const MacroInfo *MI = MD.getMacroInfo()) {3316if (!MI->isUsed() && MI->isWarnIfUnused())3317Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);33183319// Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and3320// C++ [cpp.predefined]p4, but allow it as an extension.3321if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))3322Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);33233324if (MI->isWarnIfUnused())3325WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());33263327Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());3328}33293330// If the callbacks want to know, tell them about the macro #undef.3331// Note: no matter if the macro was defined or not.3332if (Callbacks)3333Callbacks->MacroUndefined(MacroNameTok, MD, Undef);33343335if (Undef)3336appendMacroDirective(II, Undef);3337}33383339//===----------------------------------------------------------------------===//3340// Preprocessor Conditional Directive Handling.3341//===----------------------------------------------------------------------===//33423343/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef3344/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is3345/// true if any tokens have been returned or pp-directives activated before this3346/// \#ifndef has been lexed.3347///3348void Preprocessor::HandleIfdefDirective(Token &Result,3349const Token &HashToken,3350bool isIfndef,3351bool ReadAnyTokensBeforeDirective) {3352++NumIf;3353Token DirectiveTok = Result;33543355Token MacroNameTok;3356ReadMacroName(MacroNameTok);33573358// Error reading macro name? If so, diagnostic already issued.3359if (MacroNameTok.is(tok::eod)) {3360// Skip code until we get to #endif. This helps with recovery by not3361// emitting an error when the #endif is reached.3362SkipExcludedConditionalBlock(HashToken.getLocation(),3363DirectiveTok.getLocation(),3364/*Foundnonskip*/ false, /*FoundElse*/ false);3365return;3366}33673368emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);33693370// Check to see if this is the last token on the #if[n]def line.3371CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");33723373IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();3374auto MD = getMacroDefinition(MII);3375MacroInfo *MI = MD.getMacroInfo();33763377if (CurPPLexer->getConditionalStackDepth() == 0) {3378// If the start of a top-level #ifdef and if the macro is not defined,3379// inform MIOpt that this might be the start of a proper include guard.3380// Otherwise it is some other form of unknown conditional which we can't3381// handle.3382if (!ReadAnyTokensBeforeDirective && !MI) {3383assert(isIfndef && "#ifdef shouldn't reach here");3384CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());3385} else3386CurPPLexer->MIOpt.EnterTopLevelConditional();3387}33883389// If there is a macro, process it.3390if (MI) // Mark it used.3391markMacroAsUsed(MI);33923393if (Callbacks) {3394if (isIfndef)3395Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);3396else3397Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);3398}33993400bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&3401getSourceManager().isInMainFile(DirectiveTok.getLocation());34023403// Should we include the stuff contained by this directive?3404if (PPOpts->SingleFileParseMode && !MI) {3405// In 'single-file-parse mode' undefined identifiers trigger parsing of all3406// the directive blocks.3407CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),3408/*wasskip*/false, /*foundnonskip*/false,3409/*foundelse*/false);3410} else if (!MI == isIfndef || RetainExcludedCB) {3411// Yes, remember that we are inside a conditional, then lex the next token.3412CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),3413/*wasskip*/false, /*foundnonskip*/true,3414/*foundelse*/false);3415} else {3416// No, skip the contents of this block.3417SkipExcludedConditionalBlock(HashToken.getLocation(),3418DirectiveTok.getLocation(),3419/*Foundnonskip*/ false,3420/*FoundElse*/ false);3421}3422}34233424/// HandleIfDirective - Implements the \#if directive.3425///3426void Preprocessor::HandleIfDirective(Token &IfToken,3427const Token &HashToken,3428bool ReadAnyTokensBeforeDirective) {3429++NumIf;34303431// Parse and evaluate the conditional expression.3432IdentifierInfo *IfNDefMacro = nullptr;3433const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);3434const bool ConditionalTrue = DER.Conditional;3435// Lexer might become invalid if we hit code completion point while evaluating3436// expression.3437if (!CurPPLexer)3438return;34393440// If this condition is equivalent to #ifndef X, and if this is the first3441// directive seen, handle it for the multiple-include optimization.3442if (CurPPLexer->getConditionalStackDepth() == 0) {3443if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)3444// FIXME: Pass in the location of the macro name, not the 'if' token.3445CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());3446else3447CurPPLexer->MIOpt.EnterTopLevelConditional();3448}34493450if (Callbacks)3451Callbacks->If(3452IfToken.getLocation(), DER.ExprRange,3453(ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));34543455bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&3456getSourceManager().isInMainFile(IfToken.getLocation());34573458// Should we include the stuff contained by this directive?3459if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {3460// In 'single-file-parse mode' undefined identifiers trigger parsing of all3461// the directive blocks.3462CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,3463/*foundnonskip*/false, /*foundelse*/false);3464} else if (ConditionalTrue || RetainExcludedCB) {3465// Yes, remember that we are inside a conditional, then lex the next token.3466CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,3467/*foundnonskip*/true, /*foundelse*/false);3468} else {3469// No, skip the contents of this block.3470SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),3471/*Foundnonskip*/ false,3472/*FoundElse*/ false);3473}3474}34753476/// HandleEndifDirective - Implements the \#endif directive.3477///3478void Preprocessor::HandleEndifDirective(Token &EndifToken) {3479++NumEndif;34803481// Check that this is the whole directive.3482CheckEndOfDirective("endif");34833484PPConditionalInfo CondInfo;3485if (CurPPLexer->popConditionalLevel(CondInfo)) {3486// No conditionals on the stack: this is an #endif without an #if.3487Diag(EndifToken, diag::err_pp_endif_without_if);3488return;3489}34903491// If this the end of a top-level #endif, inform MIOpt.3492if (CurPPLexer->getConditionalStackDepth() == 0)3493CurPPLexer->MIOpt.ExitTopLevelConditional();34943495assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&3496"This code should only be reachable in the non-skipping case!");34973498if (Callbacks)3499Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);3500}35013502/// HandleElseDirective - Implements the \#else directive.3503///3504void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {3505++NumElse;35063507// #else directive in a non-skipping conditional... start skipping.3508CheckEndOfDirective("else");35093510PPConditionalInfo CI;3511if (CurPPLexer->popConditionalLevel(CI)) {3512Diag(Result, diag::pp_err_else_without_if);3513return;3514}35153516// If this is a top-level #else, inform the MIOpt.3517if (CurPPLexer->getConditionalStackDepth() == 0)3518CurPPLexer->MIOpt.EnterTopLevelConditional();35193520// If this is a #else with a #else before it, report the error.3521if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);35223523if (Callbacks)3524Callbacks->Else(Result.getLocation(), CI.IfLoc);35253526bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&3527getSourceManager().isInMainFile(Result.getLocation());35283529if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {3530// In 'single-file-parse mode' undefined identifiers trigger parsing of all3531// the directive blocks.3532CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,3533/*foundnonskip*/false, /*foundelse*/true);3534return;3535}35363537// Finally, skip the rest of the contents of this block.3538SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,3539/*Foundnonskip*/ true,3540/*FoundElse*/ true, Result.getLocation());3541}35423543/// Implements the \#elif, \#elifdef, and \#elifndef directives.3544void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,3545const Token &HashToken,3546tok::PPKeywordKind Kind) {3547PPElifDiag DirKind = Kind == tok::pp_elif ? PED_Elif3548: Kind == tok::pp_elifdef ? PED_Elifdef3549: PED_Elifndef;3550++NumElse;35513552// Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.3553switch (DirKind) {3554case PED_Elifdef:3555case PED_Elifndef:3556unsigned DiagID;3557if (LangOpts.CPlusPlus)3558DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive3559: diag::ext_cxx23_pp_directive;3560else3561DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive3562: diag::ext_c23_pp_directive;3563Diag(ElifToken, DiagID) << DirKind;3564break;3565default:3566break;3567}35683569// #elif directive in a non-skipping conditional... start skipping.3570// We don't care what the condition is, because we will always skip it (since3571// the block immediately before it was included).3572SourceRange ConditionRange = DiscardUntilEndOfDirective();35733574PPConditionalInfo CI;3575if (CurPPLexer->popConditionalLevel(CI)) {3576Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;3577return;3578}35793580// If this is a top-level #elif, inform the MIOpt.3581if (CurPPLexer->getConditionalStackDepth() == 0)3582CurPPLexer->MIOpt.EnterTopLevelConditional();35833584// If this is a #elif with a #else before it, report the error.3585if (CI.FoundElse)3586Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;35873588if (Callbacks) {3589switch (Kind) {3590case tok::pp_elif:3591Callbacks->Elif(ElifToken.getLocation(), ConditionRange,3592PPCallbacks::CVK_NotEvaluated, CI.IfLoc);3593break;3594case tok::pp_elifdef:3595Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);3596break;3597case tok::pp_elifndef:3598Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);3599break;3600default:3601assert(false && "unexpected directive kind");3602break;3603}3604}36053606bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&3607getSourceManager().isInMainFile(ElifToken.getLocation());36083609if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {3610// In 'single-file-parse mode' undefined identifiers trigger parsing of all3611// the directive blocks.3612CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,3613/*foundnonskip*/false, /*foundelse*/false);3614return;3615}36163617// Finally, skip the rest of the contents of this block.3618SkipExcludedConditionalBlock(3619HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,3620/*FoundElse*/ CI.FoundElse, ElifToken.getLocation());3621}36223623std::optional<LexEmbedParametersResult>3624Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {3625LexEmbedParametersResult Result{};3626SmallVector<Token, 2> ParameterTokens;3627tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;36283629auto DiagMismatchedBracesAndSkipToEOD =3630[&](tok::TokenKind Expected,3631std::pair<tok::TokenKind, SourceLocation> Matches) {3632Diag(CurTok, diag::err_expected) << Expected;3633Diag(Matches.second, diag::note_matching) << Matches.first;3634if (CurTok.isNot(tok::eod))3635DiscardUntilEndOfDirective(CurTok);3636};36373638auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {3639if (CurTok.isNot(Kind)) {3640Diag(CurTok, diag::err_expected) << Kind;3641if (CurTok.isNot(tok::eod))3642DiscardUntilEndOfDirective(CurTok);3643return false;3644}3645return true;3646};36473648// C23 6.10:3649// pp-parameter-name:3650// pp-standard-parameter3651// pp-prefixed-parameter3652//3653// pp-standard-parameter:3654// identifier3655//3656// pp-prefixed-parameter:3657// identifier :: identifier3658auto LexPPParameterName = [&]() -> std::optional<std::string> {3659// We expect the current token to be an identifier; if it's not, things3660// have gone wrong.3661if (!ExpectOrDiagAndSkipToEOD(tok::identifier))3662return std::nullopt;36633664const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();36653666// Lex another token; it is either a :: or we're done with the parameter3667// name.3668LexNonComment(CurTok);3669if (CurTok.is(tok::coloncolon)) {3670// We found a ::, so lex another identifier token.3671LexNonComment(CurTok);3672if (!ExpectOrDiagAndSkipToEOD(tok::identifier))3673return std::nullopt;36743675const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();36763677// Lex another token so we're past the name.3678LexNonComment(CurTok);3679return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();3680}3681return Prefix->getName().str();3682};36833684// C23 6.10p5: In all aspects, a preprocessor standard parameter specified by3685// this document as an identifier pp_param and an identifier of the form3686// __pp_param__ shall behave the same when used as a preprocessor parameter,3687// except for the spelling.3688auto NormalizeParameterName = [](StringRef Name) {3689if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))3690return Name.substr(2, Name.size() - 4);3691return Name;3692};36933694auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {3695// we have a limit parameter and its internals are processed using3696// evaluation rules from #if.3697if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))3698return std::nullopt;36993700// We do not consume the ( because EvaluateDirectiveExpression will lex3701// the next token for us.3702IdentifierInfo *ParameterIfNDef = nullptr;3703bool EvaluatedDefined;3704DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(3705ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);37063707if (!LimitEvalResult.Value) {3708// If there was an error evaluating the directive expression, we expect3709// to be at the end of directive token.3710assert(CurTok.is(tok::eod) && "expect to be at the end of directive");3711return std::nullopt;3712}37133714if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))3715return std::nullopt;37163717// Eat the ).3718LexNonComment(CurTok);37193720// C23 6.10.3.2p2: The token defined shall not appear within the constant3721// expression.3722if (EvaluatedDefined) {3723Diag(CurTok, diag::err_defined_in_pp_embed);3724return std::nullopt;3725}37263727if (LimitEvalResult.Value) {3728const llvm::APSInt &Result = *LimitEvalResult.Value;3729if (Result.isNegative()) {3730Diag(CurTok, diag::err_requires_positive_value)3731<< toString(Result, 10) << /*positive*/ 0;3732return std::nullopt;3733}3734return Result.getLimitedValue();3735}3736return std::nullopt;3737};37383739auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {3740switch (Kind) {3741case tok::l_paren:3742return tok::r_paren;3743case tok::l_brace:3744return tok::r_brace;3745case tok::l_square:3746return tok::r_square;3747default:3748llvm_unreachable("should not get here");3749}3750};37513752auto LexParenthesizedBalancedTokenSoup =3753[&](llvm::SmallVectorImpl<Token> &Tokens) {3754std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;37553756// We expect the current token to be a left paren.3757if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))3758return false;3759LexNonComment(CurTok); // Eat the (37603761bool WaitingForInnerCloseParen = false;3762while (CurTok.isNot(tok::eod) &&3763(WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {3764switch (CurTok.getKind()) {3765default: // Shutting up diagnostics about not fully-covered switch.3766break;3767case tok::l_paren:3768WaitingForInnerCloseParen = true;3769[[fallthrough]];3770case tok::l_brace:3771case tok::l_square:3772BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});3773break;3774case tok::r_paren:3775WaitingForInnerCloseParen = false;3776[[fallthrough]];3777case tok::r_brace:3778case tok::r_square: {3779tok::TokenKind Matching =3780GetMatchingCloseBracket(BracketStack.back().first);3781if (BracketStack.empty() || CurTok.getKind() != Matching) {3782DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());3783return false;3784}3785BracketStack.pop_back();3786} break;3787}3788Tokens.push_back(CurTok);3789LexNonComment(CurTok);3790}37913792// When we're done, we want to eat the closing paren.3793if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))3794return false;37953796LexNonComment(CurTok); // Eat the )3797return true;3798};37993800LexNonComment(CurTok); // Prime the pump.3801while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {3802SourceLocation ParamStartLoc = CurTok.getLocation();3803std::optional<std::string> ParamName = LexPPParameterName();3804if (!ParamName)3805return std::nullopt;3806StringRef Parameter = NormalizeParameterName(*ParamName);38073808// Lex the parameters (dependent on the parameter type we want!).3809//3810// C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or3811// one time in the embed parameter sequence.3812if (Parameter == "limit") {3813if (Result.MaybeLimitParam)3814Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;38153816std::optional<size_t> Limit = LexParenthesizedIntegerExpr();3817if (!Limit)3818return std::nullopt;3819Result.MaybeLimitParam =3820PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};3821} else if (Parameter == "clang::offset") {3822if (Result.MaybeOffsetParam)3823Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;38243825std::optional<size_t> Offset = LexParenthesizedIntegerExpr();3826if (!Offset)3827return std::nullopt;3828Result.MaybeOffsetParam = PPEmbedParameterOffset{3829*Offset, {ParamStartLoc, CurTok.getLocation()}};3830} else if (Parameter == "prefix") {3831if (Result.MaybePrefixParam)3832Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;38333834SmallVector<Token, 4> Soup;3835if (!LexParenthesizedBalancedTokenSoup(Soup))3836return std::nullopt;3837Result.MaybePrefixParam = PPEmbedParameterPrefix{3838std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};3839} else if (Parameter == "suffix") {3840if (Result.MaybeSuffixParam)3841Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;38423843SmallVector<Token, 4> Soup;3844if (!LexParenthesizedBalancedTokenSoup(Soup))3845return std::nullopt;3846Result.MaybeSuffixParam = PPEmbedParameterSuffix{3847std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};3848} else if (Parameter == "if_empty") {3849if (Result.MaybeIfEmptyParam)3850Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;38513852SmallVector<Token, 4> Soup;3853if (!LexParenthesizedBalancedTokenSoup(Soup))3854return std::nullopt;3855Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{3856std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};3857} else {3858++Result.UnrecognizedParams;38593860// If there's a left paren, we need to parse a balanced token sequence3861// and just eat those tokens.3862if (CurTok.is(tok::l_paren)) {3863SmallVector<Token, 4> Soup;3864if (!LexParenthesizedBalancedTokenSoup(Soup))3865return std::nullopt;3866}3867if (!ForHasEmbed) {3868Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;3869return std::nullopt;3870}3871}3872}3873return Result;3874}38753876void Preprocessor::HandleEmbedDirectiveImpl(3877SourceLocation HashLoc, const LexEmbedParametersResult &Params,3878StringRef BinaryContents) {3879if (BinaryContents.empty()) {3880// If we have no binary contents, the only thing we need to emit are the3881// if_empty tokens, if any.3882// FIXME: this loses AST fidelity; nothing in the compiler will see that3883// these tokens came from #embed. We have to hack around this when printing3884// preprocessed output. The same is true for prefix and suffix tokens.3885if (Params.MaybeIfEmptyParam) {3886ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;3887size_t TokCount = Toks.size();3888auto NewToks = std::make_unique<Token[]>(TokCount);3889llvm::copy(Toks, NewToks.get());3890EnterTokenStream(std::move(NewToks), TokCount, true, true);3891}3892return;3893}38943895size_t NumPrefixToks = Params.PrefixTokenCount(),3896NumSuffixToks = Params.SuffixTokenCount();3897size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;3898size_t CurIdx = 0;3899auto Toks = std::make_unique<Token[]>(TotalNumToks);39003901// Add the prefix tokens, if any.3902if (Params.MaybePrefixParam) {3903llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);3904CurIdx += NumPrefixToks;3905}39063907EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;3908Data->BinaryData = BinaryContents;39093910Toks[CurIdx].startToken();3911Toks[CurIdx].setKind(tok::annot_embed);3912Toks[CurIdx].setAnnotationRange(HashLoc);3913Toks[CurIdx++].setAnnotationValue(Data);39143915// Now add the suffix tokens, if any.3916if (Params.MaybeSuffixParam) {3917llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);3918CurIdx += NumSuffixToks;3919}39203921assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");3922EnterTokenStream(std::move(Toks), TotalNumToks, true, true);3923}39243925void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,3926const FileEntry *LookupFromFile) {3927// Give the usual extension/compatibility warnings.3928if (LangOpts.C23)3929Diag(EmbedTok, diag::warn_compat_pp_embed_directive);3930else3931Diag(EmbedTok, diag::ext_pp_embed_directive)3932<< (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);39333934// Parse the filename header3935Token FilenameTok;3936if (LexHeaderName(FilenameTok))3937return;39383939if (FilenameTok.isNot(tok::header_name)) {3940Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);3941if (FilenameTok.isNot(tok::eod))3942DiscardUntilEndOfDirective();3943return;3944}39453946// Parse the optional sequence of3947// directive-parameters:3948// identifier parameter-name-list[opt] directive-argument-list[opt]3949// directive-argument-list:3950// '(' balanced-token-sequence ')'3951// parameter-name-list:3952// '::' identifier parameter-name-list[opt]3953Token CurTok;3954std::optional<LexEmbedParametersResult> Params =3955LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);39563957assert((Params || CurTok.is(tok::eod)) &&3958"expected success or to be at the end of the directive");3959if (!Params)3960return;39613962// Now, splat the data out!3963SmallString<128> FilenameBuffer;3964StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);3965StringRef OriginalFilename = Filename;3966bool isAngled =3967GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);3968// If GetIncludeFilenameSpelling set the start ptr to null, there was an3969// error.3970assert(!Filename.empty());3971OptionalFileEntryRef MaybeFileRef =3972this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);3973if (!MaybeFileRef) {3974// could not find file3975if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {3976return;3977}3978Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;3979return;3980}3981std::optional<llvm::MemoryBufferRef> MaybeFile =3982getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);3983if (!MaybeFile) {3984// could not find file3985Diag(FilenameTok, diag::err_cannot_open_file)3986<< Filename << "a buffer to the contents could not be created";3987return;3988}3989StringRef BinaryContents = MaybeFile->getBuffer();39903991// The order is important between 'offset' and 'limit'; we want to offset3992// first and then limit second; otherwise we may reduce the notional resource3993// size to something too small to offset into.3994if (Params->MaybeOffsetParam) {3995// FIXME: just like with the limit() and if_empty() parameters, this loses3996// source fidelity in the AST; it has no idea that there was an offset3997// involved.3998// offsets all the way to the end of the file make for an empty file.3999BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);4000}40014002if (Params->MaybeLimitParam) {4003// FIXME: just like with the clang::offset() and if_empty() parameters,4004// this loses source fidelity in the AST; it has no idea there was a limit4005// involved.4006BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);4007}40084009if (Callbacks)4010Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,4011*Params);4012HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);4013}401440154016