CoCalc -- PPDirectives.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Lex/PPDirectives.cpp
³⁵²³³ views
1
//===--- PPDirectives.cpp - Directive Handling for Preprocessor -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// Implements # directive processing for the Preprocessor.
11
///
12
//===----------------------------------------------------------------------===//
13

14
#include "clang/Basic/CharInfo.h"
15
#include "clang/Basic/DirectoryEntry.h"
16
#include "clang/Basic/FileManager.h"
17
#include "clang/Basic/IdentifierTable.h"
18
#include "clang/Basic/LangOptions.h"
19
#include "clang/Basic/Module.h"
20
#include "clang/Basic/SourceLocation.h"
21
#include "clang/Basic/SourceManager.h"
22
#include "clang/Basic/TargetInfo.h"
23
#include "clang/Basic/TokenKinds.h"
24
#include "clang/Lex/CodeCompletionHandler.h"
25
#include "clang/Lex/HeaderSearch.h"
26
#include "clang/Lex/HeaderSearchOptions.h"
27
#include "clang/Lex/LexDiagnostic.h"
28
#include "clang/Lex/LiteralSupport.h"
29
#include "clang/Lex/MacroInfo.h"
30
#include "clang/Lex/ModuleLoader.h"
31
#include "clang/Lex/ModuleMap.h"
32
#include "clang/Lex/PPCallbacks.h"
33
#include "clang/Lex/Pragma.h"
34
#include "clang/Lex/Preprocessor.h"
35
#include "clang/Lex/PreprocessorOptions.h"
36
#include "clang/Lex/Token.h"
37
#include "clang/Lex/VariadicMacroSupport.h"
38
#include "llvm/ADT/ArrayRef.h"
39
#include "llvm/ADT/STLExtras.h"
40
#include "llvm/ADT/ScopeExit.h"
41
#include "llvm/ADT/SmallString.h"
42
#include "llvm/ADT/SmallVector.h"
43
#include "llvm/ADT/StringExtras.h"
44
#include "llvm/ADT/StringRef.h"
45
#include "llvm/ADT/StringSwitch.h"
46
#include "llvm/Support/AlignOf.h"
47
#include "llvm/Support/ErrorHandling.h"
48
#include "llvm/Support/Path.h"
49
#include "llvm/Support/SaveAndRestore.h"
50
#include <algorithm>
51
#include <cassert>
52
#include <cstring>
53
#include <new>
54
#include <optional>
55
#include <string>
56
#include <utility>
57

58
using namespace clang;
59

60
//===----------------------------------------------------------------------===//
61
// Utility Methods for Preprocessor Directive Handling.
62
//===----------------------------------------------------------------------===//
63

64
MacroInfo *Preprocessor::AllocateMacroInfo(SourceLocation L) {
65
  static_assert(std::is_trivially_destructible_v<MacroInfo>, "");
66
  return new (BP) MacroInfo(L);
67
}
68

69
DefMacroDirective *Preprocessor::AllocateDefMacroDirective(MacroInfo *MI,
70
                                                           SourceLocation Loc) {
71
  return new (BP) DefMacroDirective(MI, Loc);
72
}
73

74
UndefMacroDirective *
75
Preprocessor::AllocateUndefMacroDirective(SourceLocation UndefLoc) {
76
  return new (BP) UndefMacroDirective(UndefLoc);
77
}
78

79
VisibilityMacroDirective *
80
Preprocessor::AllocateVisibilityMacroDirective(SourceLocation Loc,
81
                                               bool isPublic) {
82
  return new (BP) VisibilityMacroDirective(Loc, isPublic);
83
}
84

85
/// Read and discard all tokens remaining on the current line until
86
/// the tok::eod token is found.
87
SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) {
88
  SourceRange Res;
89

90
  LexUnexpandedToken(Tmp);
91
  Res.setBegin(Tmp.getLocation());
92
  while (Tmp.isNot(tok::eod)) {
93
    assert(Tmp.isNot(tok::eof) && "EOF seen while discarding directive tokens");
94
    LexUnexpandedToken(Tmp);
95
  }
96
  Res.setEnd(Tmp.getLocation());
97
  return Res;
98
}
99

100
/// Enumerates possible cases of #define/#undef a reserved identifier.
101
enum MacroDiag {
102
  MD_NoWarn,        //> Not a reserved identifier
103
  MD_KeywordDef,    //> Macro hides keyword, enabled by default
104
  MD_ReservedMacro  //> #define of #undef reserved id, disabled by default
105
};
106

107
/// Enumerates possible %select values for the pp_err_elif_after_else and
108
/// pp_err_elif_without_if diagnostics.
109
enum PPElifDiag {
110
  PED_Elif,
111
  PED_Elifdef,
112
  PED_Elifndef
113
};
114

115
static bool isFeatureTestMacro(StringRef MacroName) {
116
  // list from:
117
  // * https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_macros.html
118
  // * https://docs.microsoft.com/en-us/cpp/c-runtime-library/security-features-in-the-crt?view=msvc-160
119
  // * man 7 feature_test_macros
120
  // The list must be sorted for correct binary search.
121
  static constexpr StringRef ReservedMacro[] = {
122
      "_ATFILE_SOURCE",
123
      "_BSD_SOURCE",
124
      "_CRT_NONSTDC_NO_WARNINGS",
125
      "_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES",
126
      "_CRT_SECURE_NO_WARNINGS",
127
      "_FILE_OFFSET_BITS",
128
      "_FORTIFY_SOURCE",
129
      "_GLIBCXX_ASSERTIONS",
130
      "_GLIBCXX_CONCEPT_CHECKS",
131
      "_GLIBCXX_DEBUG",
132
      "_GLIBCXX_DEBUG_PEDANTIC",
133
      "_GLIBCXX_PARALLEL",
134
      "_GLIBCXX_PARALLEL_ASSERTIONS",
135
      "_GLIBCXX_SANITIZE_VECTOR",
136
      "_GLIBCXX_USE_CXX11_ABI",
137
      "_GLIBCXX_USE_DEPRECATED",
138
      "_GNU_SOURCE",
139
      "_ISOC11_SOURCE",
140
      "_ISOC95_SOURCE",
141
      "_ISOC99_SOURCE",
142
      "_LARGEFILE64_SOURCE",
143
      "_POSIX_C_SOURCE",
144
      "_REENTRANT",
145
      "_SVID_SOURCE",
146
      "_THREAD_SAFE",
147
      "_XOPEN_SOURCE",
148
      "_XOPEN_SOURCE_EXTENDED",
149
      "__STDCPP_WANT_MATH_SPEC_FUNCS__",
150
      "__STDC_FORMAT_MACROS",
151
  };
152
  return std::binary_search(std::begin(ReservedMacro), std::end(ReservedMacro),
153
                            MacroName);
154
}
155

156
static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr,
157
                                     const MacroInfo *MI,
158
                                     const StringRef MacroName) {
159
  // If this is a macro with special handling (like __LINE__) then it's language
160
  // defined.
161
  if (MI->isBuiltinMacro())
162
    return true;
163
  // Builtin macros are defined in the builtin file
164
  if (!SourceMgr.isWrittenInBuiltinFile(MI->getDefinitionLoc()))
165
    return false;
166
  // C defines macros starting with __STDC, and C++ defines macros starting with
167
  // __STDCPP
168
  if (MacroName.starts_with("__STDC"))
169
    return true;
170
  // C++ defines the __cplusplus macro
171
  if (MacroName == "__cplusplus")
172
    return true;
173
  // C++ defines various feature-test macros starting with __cpp
174
  if (MacroName.starts_with("__cpp"))
175
    return true;
176
  // Anything else isn't language-defined
177
  return false;
178
}
179

180
static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) {
181
  const LangOptions &Lang = PP.getLangOpts();
182
  StringRef Text = II->getName();
183
  if (isReservedInAllContexts(II->isReserved(Lang)))
184
    return isFeatureTestMacro(Text) ? MD_NoWarn : MD_ReservedMacro;
185
  if (II->isKeyword(Lang))
186
    return MD_KeywordDef;
187
  if (Lang.CPlusPlus11 && (Text == "override" || Text == "final"))
188
    return MD_KeywordDef;
189
  return MD_NoWarn;
190
}
191

192
static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) {
193
  const LangOptions &Lang = PP.getLangOpts();
194
  // Do not warn on keyword undef.  It is generally harmless and widely used.
195
  if (isReservedInAllContexts(II->isReserved(Lang)))
196
    return MD_ReservedMacro;
197
  return MD_NoWarn;
198
}
199

200
// Return true if we want to issue a diagnostic by default if we
201
// encounter this name in a #include with the wrong case. For now,
202
// this includes the standard C and C++ headers, Posix headers,
203
// and Boost headers. Improper case for these #includes is a
204
// potential portability issue.
205
static bool warnByDefaultOnWrongCase(StringRef Include) {
206
  // If the first component of the path is "boost", treat this like a standard header
207
  // for the purposes of diagnostics.
208
  if (::llvm::sys::path::begin(Include)->equals_insensitive("boost"))
209
    return true;
210

211
  // "condition_variable" is the longest standard header name at 18 characters.
212
  // If the include file name is longer than that, it can't be a standard header.
213
  static const size_t MaxStdHeaderNameLen = 18u;
214
  if (Include.size() > MaxStdHeaderNameLen)
215
    return false;
216

217
  // Lowercase and normalize the search string.
218
  SmallString<32> LowerInclude{Include};
219
  for (char &Ch : LowerInclude) {
220
    // In the ASCII range?
221
    if (static_cast<unsigned char>(Ch) > 0x7f)
222
      return false; // Can't be a standard header
223
    // ASCII lowercase:
224
    if (Ch >= 'A' && Ch <= 'Z')
225
      Ch += 'a' - 'A';
226
    // Normalize path separators for comparison purposes.
227
    else if (::llvm::sys::path::is_separator(Ch))
228
      Ch = '/';
229
  }
230

231
  // The standard C/C++ and Posix headers
232
  return llvm::StringSwitch<bool>(LowerInclude)
233
    // C library headers
234
    .Cases("assert.h", "complex.h", "ctype.h", "errno.h", "fenv.h", true)
235
    .Cases("float.h", "inttypes.h", "iso646.h", "limits.h", "locale.h", true)
236
    .Cases("math.h", "setjmp.h", "signal.h", "stdalign.h", "stdarg.h", true)
237
    .Cases("stdatomic.h", "stdbool.h", "stdckdint.h", "stddef.h", true)
238
    .Cases("stdint.h", "stdio.h", "stdlib.h", "stdnoreturn.h", true)
239
    .Cases("string.h", "tgmath.h", "threads.h", "time.h", "uchar.h", true)
240
    .Cases("wchar.h", "wctype.h", true)
241

242
    // C++ headers for C library facilities
243
    .Cases("cassert", "ccomplex", "cctype", "cerrno", "cfenv", true)
244
    .Cases("cfloat", "cinttypes", "ciso646", "climits", "clocale", true)
245
    .Cases("cmath", "csetjmp", "csignal", "cstdalign", "cstdarg", true)
246
    .Cases("cstdbool", "cstddef", "cstdint", "cstdio", "cstdlib", true)
247
    .Cases("cstring", "ctgmath", "ctime", "cuchar", "cwchar", true)
248
    .Case("cwctype", true)
249

250
    // C++ library headers
251
    .Cases("algorithm", "fstream", "list", "regex", "thread", true)
252
    .Cases("array", "functional", "locale", "scoped_allocator", "tuple", true)
253
    .Cases("atomic", "future", "map", "set", "type_traits", true)
254
    .Cases("bitset", "initializer_list", "memory", "shared_mutex", "typeindex", true)
255
    .Cases("chrono", "iomanip", "mutex", "sstream", "typeinfo", true)
256
    .Cases("codecvt", "ios", "new", "stack", "unordered_map", true)
257
    .Cases("complex", "iosfwd", "numeric", "stdexcept", "unordered_set", true)
258
    .Cases("condition_variable", "iostream", "ostream", "streambuf", "utility", true)
259
    .Cases("deque", "istream", "queue", "string", "valarray", true)
260
    .Cases("exception", "iterator", "random", "strstream", "vector", true)
261
    .Cases("forward_list", "limits", "ratio", "system_error", true)
262

263
    // POSIX headers (which aren't also C headers)
264
    .Cases("aio.h", "arpa/inet.h", "cpio.h", "dirent.h", "dlfcn.h", true)
265
    .Cases("fcntl.h", "fmtmsg.h", "fnmatch.h", "ftw.h", "glob.h", true)
266
    .Cases("grp.h", "iconv.h", "langinfo.h", "libgen.h", "monetary.h", true)
267
    .Cases("mqueue.h", "ndbm.h", "net/if.h", "netdb.h", "netinet/in.h", true)
268
    .Cases("netinet/tcp.h", "nl_types.h", "poll.h", "pthread.h", "pwd.h", true)
269
    .Cases("regex.h", "sched.h", "search.h", "semaphore.h", "spawn.h", true)
270
    .Cases("strings.h", "stropts.h", "sys/ipc.h", "sys/mman.h", "sys/msg.h", true)
271
    .Cases("sys/resource.h", "sys/select.h",  "sys/sem.h", "sys/shm.h", "sys/socket.h", true)
272
    .Cases("sys/stat.h", "sys/statvfs.h", "sys/time.h", "sys/times.h", "sys/types.h", true)
273
    .Cases("sys/uio.h", "sys/un.h", "sys/utsname.h", "sys/wait.h", "syslog.h", true)
274
    .Cases("tar.h", "termios.h", "trace.h", "ulimit.h", true)
275
    .Cases("unistd.h", "utime.h", "utmpx.h", "wordexp.h", true)
276
    .Default(false);
277
}
278

279
/// Find a similar string in `Candidates`.
280
///
281
/// \param LHS a string for a similar string in `Candidates`
282
///
283
/// \param Candidates the candidates to find a similar string.
284
///
285
/// \returns a similar string if exists. If no similar string exists,
286
/// returns std::nullopt.
287
static std::optional<StringRef>
288
findSimilarStr(StringRef LHS, const std::vector<StringRef> &Candidates) {
289
  // We need to check if `Candidates` has the exact case-insensitive string
290
  // because the Levenshtein distance match does not care about it.
291
  for (StringRef C : Candidates) {
292
    if (LHS.equals_insensitive(C)) {
293
      return C;
294
    }
295
  }
296

297
  // Keep going with the Levenshtein distance match.
298
  // If the LHS size is less than 3, use the LHS size minus 1 and if not,
299
  // use the LHS size divided by 3.
300
  size_t Length = LHS.size();
301
  size_t MaxDist = Length < 3 ? Length - 1 : Length / 3;
302

303
  std::optional<std::pair<StringRef, size_t>> SimilarStr;
304
  for (StringRef C : Candidates) {
305
    size_t CurDist = LHS.edit_distance(C, true);
306
    if (CurDist <= MaxDist) {
307
      if (!SimilarStr) {
308
        // The first similar string found.
309
        SimilarStr = {C, CurDist};
310
      } else if (CurDist < SimilarStr->second) {
311
        // More similar string found.
312
        SimilarStr = {C, CurDist};
313
      }
314
    }
315
  }
316

317
  if (SimilarStr) {
318
    return SimilarStr->first;
319
  } else {
320
    return std::nullopt;
321
  }
322
}
323

324
bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
325
                                  bool *ShadowFlag) {
326
  // Missing macro name?
327
  if (MacroNameTok.is(tok::eod))
328
    return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
329

330
  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
331
  if (!II)
332
    return Diag(MacroNameTok, diag::err_pp_macro_not_identifier);
333

334
  if (II->isCPlusPlusOperatorKeyword()) {
335
    // C++ 2.5p2: Alternative tokens behave the same as its primary token
336
    // except for their spellings.
337
    Diag(MacroNameTok, getLangOpts().MicrosoftExt
338
                           ? diag::ext_pp_operator_used_as_macro_name
339
                           : diag::err_pp_operator_used_as_macro_name)
340
        << II << MacroNameTok.getKind();
341
    // Allow #defining |and| and friends for Microsoft compatibility or
342
    // recovery when legacy C headers are included in C++.
343
  }
344

345
  if ((isDefineUndef != MU_Other) && II->getPPKeywordID() == tok::pp_defined) {
346
    // Error if defining "defined": C99 6.10.8/4, C++ [cpp.predefined]p4.
347
    return Diag(MacroNameTok, diag::err_defined_macro_name);
348
  }
349

350
  // If defining/undefining reserved identifier or a keyword, we need to issue
351
  // a warning.
352
  SourceLocation MacroNameLoc = MacroNameTok.getLocation();
353
  if (ShadowFlag)
354
    *ShadowFlag = false;
355
  if (!SourceMgr.isInSystemHeader(MacroNameLoc) &&
356
      (SourceMgr.getBufferName(MacroNameLoc) != "<built-in>")) {
357
    MacroDiag D = MD_NoWarn;
358
    if (isDefineUndef == MU_Define) {
359
      D = shouldWarnOnMacroDef(*this, II);
360
    }
361
    else if (isDefineUndef == MU_Undef)
362
      D = shouldWarnOnMacroUndef(*this, II);
363
    if (D == MD_KeywordDef) {
364
      // We do not want to warn on some patterns widely used in configuration
365
      // scripts.  This requires analyzing next tokens, so do not issue warnings
366
      // now, only inform caller.
367
      if (ShadowFlag)
368
        *ShadowFlag = true;
369
    }
370
    if (D == MD_ReservedMacro)
371
      Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id);
372
  }
373

374
  // Okay, we got a good identifier.
375
  return false;
376
}
377

378
/// Lex and validate a macro name, which occurs after a
379
/// \#define or \#undef.
380
///
381
/// This sets the token kind to eod and discards the rest of the macro line if
382
/// the macro name is invalid.
383
///
384
/// \param MacroNameTok Token that is expected to be a macro name.
385
/// \param isDefineUndef Context in which macro is used.
386
/// \param ShadowFlag Points to a flag that is set if macro shadows a keyword.
387
void Preprocessor::ReadMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
388
                                 bool *ShadowFlag) {
389
  // Read the token, don't allow macro expansion on it.
390
  LexUnexpandedToken(MacroNameTok);
391

392
  if (MacroNameTok.is(tok::code_completion)) {
393
    if (CodeComplete)
394
      CodeComplete->CodeCompleteMacroName(isDefineUndef == MU_Define);
395
    setCodeCompletionReached();
396
    LexUnexpandedToken(MacroNameTok);
397
  }
398

399
  if (!CheckMacroName(MacroNameTok, isDefineUndef, ShadowFlag))
400
    return;
401

402
  // Invalid macro name, read and discard the rest of the line and set the
403
  // token kind to tok::eod if necessary.
404
  if (MacroNameTok.isNot(tok::eod)) {
405
    MacroNameTok.setKind(tok::eod);
406
    DiscardUntilEndOfDirective();
407
  }
408
}
409

410
/// Ensure that the next token is a tok::eod token.
411
///
412
/// If not, emit a diagnostic and consume up until the eod.  If EnableMacros is
413
/// true, then we consider macros that expand to zero tokens as being ok.
414
///
415
/// Returns the location of the end of the directive.
416
SourceLocation Preprocessor::CheckEndOfDirective(const char *DirType,
417
                                                 bool EnableMacros) {
418
  Token Tmp;
419
  // Lex unexpanded tokens for most directives: macros might expand to zero
420
  // tokens, causing us to miss diagnosing invalid lines.  Some directives (like
421
  // #line) allow empty macros.
422
  if (EnableMacros)
423
    Lex(Tmp);
424
  else
425
    LexUnexpandedToken(Tmp);
426

427
  // There should be no tokens after the directive, but we allow them as an
428
  // extension.
429
  while (Tmp.is(tok::comment))  // Skip comments in -C mode.
430
    LexUnexpandedToken(Tmp);
431

432
  if (Tmp.is(tok::eod))
433
    return Tmp.getLocation();
434

435
  // Add a fixit in GNU/C99/C++ mode.  Don't offer a fixit for strict-C89,
436
  // or if this is a macro-style preprocessing directive, because it is more
437
  // trouble than it is worth to insert /**/ and check that there is no /**/
438
  // in the range also.
439
  FixItHint Hint;
440
  if ((LangOpts.GNUMode || LangOpts.C99 || LangOpts.CPlusPlus) &&
441
      !CurTokenLexer)
442
    Hint = FixItHint::CreateInsertion(Tmp.getLocation(),"//");
443
  Diag(Tmp, diag::ext_pp_extra_tokens_at_eol) << DirType << Hint;
444
  return DiscardUntilEndOfDirective().getEnd();
445
}
446

447
void Preprocessor::SuggestTypoedDirective(const Token &Tok,
448
                                          StringRef Directive) const {
449
  // If this is a `.S` file, treat unknown # directives as non-preprocessor
450
  // directives.
451
  if (getLangOpts().AsmPreprocessor) return;
452

453
  std::vector<StringRef> Candidates = {
454
      "if", "ifdef", "ifndef", "elif", "else", "endif"
455
  };
456
  if (LangOpts.C23 || LangOpts.CPlusPlus23)
457
    Candidates.insert(Candidates.end(), {"elifdef", "elifndef"});
458

459
  if (std::optional<StringRef> Sugg = findSimilarStr(Directive, Candidates)) {
460
    // Directive cannot be coming from macro.
461
    assert(Tok.getLocation().isFileID());
462
    CharSourceRange DirectiveRange = CharSourceRange::getCharRange(
463
        Tok.getLocation(),
464
        Tok.getLocation().getLocWithOffset(Directive.size()));
465
    StringRef SuggValue = *Sugg;
466

467
    auto Hint = FixItHint::CreateReplacement(DirectiveRange, SuggValue);
468
    Diag(Tok, diag::warn_pp_invalid_directive) << 1 << SuggValue << Hint;
469
  }
470
}
471

472
/// SkipExcludedConditionalBlock - We just read a \#if or related directive and
473
/// decided that the subsequent tokens are in the \#if'd out portion of the
474
/// file.  Lex the rest of the file, until we see an \#endif.  If
475
/// FoundNonSkipPortion is true, then we have already emitted code for part of
476
/// this \#if directive, so \#else/\#elif blocks should never be entered.
477
/// If ElseOk is true, then \#else directives are ok, if not, then we have
478
/// already seen one so a \#else directive is a duplicate.  When this returns,
479
/// the caller can lex the first valid token.
480
void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
481
                                                SourceLocation IfTokenLoc,
482
                                                bool FoundNonSkipPortion,
483
                                                bool FoundElse,
484
                                                SourceLocation ElseLoc) {
485
  // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
486
  // not getting called recursively by storing the RecordedSkippedRanges
487
  // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
488
  // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
489
  // invalidated. If this changes and there is a need to call
490
  // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
491
  // change to do a second lookup in endLexPass function instead of reusing the
492
  // lookup pointer.
493
  assert(!SkippingExcludedConditionalBlock &&
494
         "calling SkipExcludedConditionalBlock recursively");
495
  llvm::SaveAndRestore SARSkipping(SkippingExcludedConditionalBlock, true);
496

497
  ++NumSkipped;
498
  assert(!CurTokenLexer && "Conditional PP block cannot appear in a macro!");
499
  assert(CurPPLexer && "Conditional PP block must be in a file!");
500
  assert(CurLexer && "Conditional PP block but no current lexer set!");
501

502
  if (PreambleConditionalStack.reachedEOFWhileSkipping())
503
    PreambleConditionalStack.clearSkipInfo();
504
  else
505
    CurPPLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/ false,
506
                                     FoundNonSkipPortion, FoundElse);
507

508
  // Enter raw mode to disable identifier lookup (and thus macro expansion),
509
  // disabling warnings, etc.
510
  CurPPLexer->LexingRawMode = true;
511
  Token Tok;
512
  SourceLocation endLoc;
513

514
  /// Keeps track and caches skipped ranges and also retrieves a prior skipped
515
  /// range if the same block is re-visited.
516
  struct SkippingRangeStateTy {
517
    Preprocessor &PP;
518

519
    const char *BeginPtr = nullptr;
520
    unsigned *SkipRangePtr = nullptr;
521

522
    SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
523

524
    void beginLexPass() {
525
      if (BeginPtr)
526
        return; // continue skipping a block.
527

528
      // Initiate a skipping block and adjust the lexer if we already skipped it
529
      // before.
530
      BeginPtr = PP.CurLexer->getBufferLocation();
531
      SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
532
      if (*SkipRangePtr) {
533
        PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
534
                          /*IsAtStartOfLine*/ true);
535
      }
536
    }
537

538
    void endLexPass(const char *Hashptr) {
539
      if (!BeginPtr) {
540
        // Not doing normal lexing.
541
        assert(PP.CurLexer->isDependencyDirectivesLexer());
542
        return;
543
      }
544

545
      // Finished skipping a block, record the range if it's first time visited.
546
      if (!*SkipRangePtr) {
547
        *SkipRangePtr = Hashptr - BeginPtr;
548
      }
549
      assert(*SkipRangePtr == unsigned(Hashptr - BeginPtr));
550
      BeginPtr = nullptr;
551
      SkipRangePtr = nullptr;
552
    }
553
  } SkippingRangeState(*this);
554

555
  while (true) {
556
    if (CurLexer->isDependencyDirectivesLexer()) {
557
      CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
558
    } else {
559
      SkippingRangeState.beginLexPass();
560
      while (true) {
561
        CurLexer->Lex(Tok);
562

563
        if (Tok.is(tok::code_completion)) {
564
          setCodeCompletionReached();
565
          if (CodeComplete)
566
            CodeComplete->CodeCompleteInConditionalExclusion();
567
          continue;
568
        }
569

570
        // If this is the end of the buffer, we have an error.
571
        if (Tok.is(tok::eof)) {
572
          // We don't emit errors for unterminated conditionals here,
573
          // Lexer::LexEndOfFile can do that properly.
574
          // Just return and let the caller lex after this #include.
575
          if (PreambleConditionalStack.isRecording())
576
            PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc,
577
                                                      FoundNonSkipPortion,
578
                                                      FoundElse, ElseLoc);
579
          break;
580
        }
581

582
        // If this token is not a preprocessor directive, just skip it.
583
        if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine())
584
          continue;
585

586
        break;
587
      }
588
    }
589
    if (Tok.is(tok::eof))
590
      break;
591

592
    // We just parsed a # character at the start of a line, so we're in
593
    // directive mode.  Tell the lexer this so any newlines we see will be
594
    // converted into an EOD token (this terminates the macro).
595
    CurPPLexer->ParsingPreprocessorDirective = true;
596
    if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
597

598
    assert(Tok.is(tok::hash));
599
    const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
600
    assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
601

602
    // Read the next token, the directive flavor.
603
    LexUnexpandedToken(Tok);
604

605
    // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
606
    // something bogus), skip it.
607
    if (Tok.isNot(tok::raw_identifier)) {
608
      CurPPLexer->ParsingPreprocessorDirective = false;
609
      // Restore comment saving mode.
610
      if (CurLexer) CurLexer->resetExtendedTokenMode();
611
      continue;
612
    }
613

614
    // If the first letter isn't i or e, it isn't intesting to us.  We know that
615
    // this is safe in the face of spelling differences, because there is no way
616
    // to spell an i/e in a strange way that is another letter.  Skipping this
617
    // allows us to avoid looking up the identifier info for #define/#undef and
618
    // other common directives.
619
    StringRef RI = Tok.getRawIdentifier();
620

621
    char FirstChar = RI[0];
622
    if (FirstChar >= 'a' && FirstChar <= 'z' &&
623
        FirstChar != 'i' && FirstChar != 'e') {
624
      CurPPLexer->ParsingPreprocessorDirective = false;
625
      // Restore comment saving mode.
626
      if (CurLexer) CurLexer->resetExtendedTokenMode();
627
      continue;
628
    }
629

630
    // Get the identifier name without trigraphs or embedded newlines.  Note
631
    // that we can't use Tok.getIdentifierInfo() because its lookup is disabled
632
    // when skipping.
633
    char DirectiveBuf[20];
634
    StringRef Directive;
635
    if (!Tok.needsCleaning() && RI.size() < 20) {
636
      Directive = RI;
637
    } else {
638
      std::string DirectiveStr = getSpelling(Tok);
639
      size_t IdLen = DirectiveStr.size();
640
      if (IdLen >= 20) {
641
        CurPPLexer->ParsingPreprocessorDirective = false;
642
        // Restore comment saving mode.
643
        if (CurLexer) CurLexer->resetExtendedTokenMode();
644
        continue;
645
      }
646
      memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
647
      Directive = StringRef(DirectiveBuf, IdLen);
648
    }
649

650
    if (Directive.starts_with("if")) {
651
      StringRef Sub = Directive.substr(2);
652
      if (Sub.empty() ||   // "if"
653
          Sub == "def" ||   // "ifdef"
654
          Sub == "ndef") {  // "ifndef"
655
        // We know the entire #if/#ifdef/#ifndef block will be skipped, don't
656
        // bother parsing the condition.
657
        DiscardUntilEndOfDirective();
658
        CurPPLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true,
659
                                       /*foundnonskip*/false,
660
                                       /*foundelse*/false);
661
      } else {
662
        SuggestTypoedDirective(Tok, Directive);
663
      }
664
    } else if (Directive[0] == 'e') {
665
      StringRef Sub = Directive.substr(1);
666
      if (Sub == "ndif") {  // "endif"
667
        PPConditionalInfo CondInfo;
668
        CondInfo.WasSkipping = true; // Silence bogus warning.
669
        bool InCond = CurPPLexer->popConditionalLevel(CondInfo);
670
        (void)InCond;  // Silence warning in no-asserts mode.
671
        assert(!InCond && "Can't be skipping if not in a conditional!");
672

673
        // If we popped the outermost skipping block, we're done skipping!
674
        if (!CondInfo.WasSkipping) {
675
          SkippingRangeState.endLexPass(Hashptr);
676
          // Restore the value of LexingRawMode so that trailing comments
677
          // are handled correctly, if we've reached the outermost block.
678
          CurPPLexer->LexingRawMode = false;
679
          endLoc = CheckEndOfDirective("endif");
680
          CurPPLexer->LexingRawMode = true;
681
          if (Callbacks)
682
            Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc);
683
          break;
684
        } else {
685
          DiscardUntilEndOfDirective();
686
        }
687
      } else if (Sub == "lse") { // "else".
688
        // #else directive in a skipping conditional.  If not in some other
689
        // skipping conditional, and if #else hasn't already been seen, enter it
690
        // as a non-skipping conditional.
691
        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
692

693
        if (!CondInfo.WasSkipping)
694
          SkippingRangeState.endLexPass(Hashptr);
695

696
        // If this is a #else with a #else before it, report the error.
697
        if (CondInfo.FoundElse)
698
          Diag(Tok, diag::pp_err_else_after_else);
699

700
        // Note that we've seen a #else in this conditional.
701
        CondInfo.FoundElse = true;
702

703
        // If the conditional is at the top level, and the #if block wasn't
704
        // entered, enter the #else block now.
705
        if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
706
          CondInfo.FoundNonSkip = true;
707
          // Restore the value of LexingRawMode so that trailing comments
708
          // are handled correctly.
709
          CurPPLexer->LexingRawMode = false;
710
          endLoc = CheckEndOfDirective("else");
711
          CurPPLexer->LexingRawMode = true;
712
          if (Callbacks)
713
            Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc);
714
          break;
715
        } else {
716
          DiscardUntilEndOfDirective();  // C99 6.10p4.
717
        }
718
      } else if (Sub == "lif") {  // "elif".
719
        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
720

721
        if (!CondInfo.WasSkipping)
722
          SkippingRangeState.endLexPass(Hashptr);
723

724
        // If this is a #elif with a #else before it, report the error.
725
        if (CondInfo.FoundElse)
726
          Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
727

728
        // If this is in a skipping block or if we're already handled this #if
729
        // block, don't bother parsing the condition.
730
        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
731
          // FIXME: We should probably do at least some minimal parsing of the
732
          // condition to verify that it is well-formed. The current state
733
          // allows #elif* directives with completely malformed (or missing)
734
          // conditions.
735
          DiscardUntilEndOfDirective();
736
        } else {
737
          // Restore the value of LexingRawMode so that identifiers are
738
          // looked up, etc, inside the #elif expression.
739
          assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
740
          CurPPLexer->LexingRawMode = false;
741
          IdentifierInfo *IfNDefMacro = nullptr;
742
          DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
743
          // Stop if Lexer became invalid after hitting code completion token.
744
          if (!CurPPLexer)
745
            return;
746
          const bool CondValue = DER.Conditional;
747
          CurPPLexer->LexingRawMode = true;
748
          if (Callbacks) {
749
            Callbacks->Elif(
750
                Tok.getLocation(), DER.ExprRange,
751
                (CondValue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False),
752
                CondInfo.IfLoc);
753
          }
754
          // If this condition is true, enter it!
755
          if (CondValue) {
756
            CondInfo.FoundNonSkip = true;
757
            break;
758
          }
759
        }
760
      } else if (Sub == "lifdef" ||  // "elifdef"
761
                 Sub == "lifndef") { // "elifndef"
762
        bool IsElifDef = Sub == "lifdef";
763
        PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
764
        Token DirectiveToken = Tok;
765

766
        if (!CondInfo.WasSkipping)
767
          SkippingRangeState.endLexPass(Hashptr);
768

769
        // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode even
770
        // if this branch is in a skipping block.
771
        unsigned DiagID;
772
        if (LangOpts.CPlusPlus)
773
          DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
774
                                        : diag::ext_cxx23_pp_directive;
775
        else
776
          DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
777
                                : diag::ext_c23_pp_directive;
778
        Diag(Tok, DiagID) << (IsElifDef ? PED_Elifdef : PED_Elifndef);
779

780
        // If this is a #elif with a #else before it, report the error.
781
        if (CondInfo.FoundElse)
782
          Diag(Tok, diag::pp_err_elif_after_else)
783
              << (IsElifDef ? PED_Elifdef : PED_Elifndef);
784

785
        // If this is in a skipping block or if we're already handled this #if
786
        // block, don't bother parsing the condition.
787
        if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) {
788
          // FIXME: We should probably do at least some minimal parsing of the
789
          // condition to verify that it is well-formed. The current state
790
          // allows #elif* directives with completely malformed (or missing)
791
          // conditions.
792
          DiscardUntilEndOfDirective();
793
        } else {
794
          // Restore the value of LexingRawMode so that identifiers are
795
          // looked up, etc, inside the #elif[n]def expression.
796
          assert(CurPPLexer->LexingRawMode && "We have to be skipping here!");
797
          CurPPLexer->LexingRawMode = false;
798
          Token MacroNameTok;
799
          ReadMacroName(MacroNameTok);
800
          CurPPLexer->LexingRawMode = true;
801

802
          // If the macro name token is tok::eod, there was an error that was
803
          // already reported.
804
          if (MacroNameTok.is(tok::eod)) {
805
            // Skip code until we get to #endif.  This helps with recovery by
806
            // not emitting an error when the #endif is reached.
807
            continue;
808
          }
809

810
          emitMacroExpansionWarnings(MacroNameTok);
811

812
          CheckEndOfDirective(IsElifDef ? "elifdef" : "elifndef");
813

814
          IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
815
          auto MD = getMacroDefinition(MII);
816
          MacroInfo *MI = MD.getMacroInfo();
817

818
          if (Callbacks) {
819
            if (IsElifDef) {
820
              Callbacks->Elifdef(DirectiveToken.getLocation(), MacroNameTok,
821
                                 MD);
822
            } else {
823
              Callbacks->Elifndef(DirectiveToken.getLocation(), MacroNameTok,
824
                                  MD);
825
            }
826
          }
827
          // If this condition is true, enter it!
828
          if (static_cast<bool>(MI) == IsElifDef) {
829
            CondInfo.FoundNonSkip = true;
830
            break;
831
          }
832
        }
833
      } else {
834
        SuggestTypoedDirective(Tok, Directive);
835
      }
836
    } else {
837
      SuggestTypoedDirective(Tok, Directive);
838
    }
839

840
    CurPPLexer->ParsingPreprocessorDirective = false;
841
    // Restore comment saving mode.
842
    if (CurLexer) CurLexer->resetExtendedTokenMode();
843
  }
844

845
  // Finally, if we are out of the conditional (saw an #endif or ran off the end
846
  // of the file, just stop skipping and return to lexing whatever came after
847
  // the #if block.
848
  CurPPLexer->LexingRawMode = false;
849

850
  // The last skipped range isn't actually skipped yet if it's truncated
851
  // by the end of the preamble; we'll resume parsing after the preamble.
852
  if (Callbacks && (Tok.isNot(tok::eof) || !isRecordingPreamble()))
853
    Callbacks->SourceRangeSkipped(
854
        SourceRange(HashTokenLoc, endLoc.isValid()
855
                                      ? endLoc
856
                                      : CurPPLexer->getSourceLocation()),
857
        Tok.getLocation());
858
}
859

860
Module *Preprocessor::getModuleForLocation(SourceLocation Loc,
861
                                           bool AllowTextual) {
862
  if (!SourceMgr.isInMainFile(Loc)) {
863
    // Try to determine the module of the include directive.
864
    // FIXME: Look into directly passing the FileEntry from LookupFile instead.
865
    FileID IDOfIncl = SourceMgr.getFileID(SourceMgr.getExpansionLoc(Loc));
866
    if (auto EntryOfIncl = SourceMgr.getFileEntryRefForID(IDOfIncl)) {
867
      // The include comes from an included file.
868
      return HeaderInfo.getModuleMap()
869
          .findModuleForHeader(*EntryOfIncl, AllowTextual)
870
          .getModule();
871
    }
872
  }
873

874
  // This is either in the main file or not in a file at all. It belongs
875
  // to the current module, if there is one.
876
  return getLangOpts().CurrentModule.empty()
877
             ? nullptr
878
             : HeaderInfo.lookupModule(getLangOpts().CurrentModule, Loc);
879
}
880

881
OptionalFileEntryRef
882
Preprocessor::getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
883
                                               SourceLocation Loc) {
884
  Module *IncM = getModuleForLocation(
885
      IncLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
886

887
  // Walk up through the include stack, looking through textual headers of M
888
  // until we hit a non-textual header that we can #include. (We assume textual
889
  // headers of a module with non-textual headers aren't meant to be used to
890
  // import entities from the module.)
891
  auto &SM = getSourceManager();
892
  while (!Loc.isInvalid() && !SM.isInMainFile(Loc)) {
893
    auto ID = SM.getFileID(SM.getExpansionLoc(Loc));
894
    auto FE = SM.getFileEntryRefForID(ID);
895
    if (!FE)
896
      break;
897

898
    // We want to find all possible modules that might contain this header, so
899
    // search all enclosing directories for module maps and load them.
900
    HeaderInfo.hasModuleMap(FE->getName(), /*Root*/ nullptr,
901
                            SourceMgr.isInSystemHeader(Loc));
902

903
    bool InPrivateHeader = false;
904
    for (auto Header : HeaderInfo.findAllModulesForHeader(*FE)) {
905
      if (!Header.isAccessibleFrom(IncM)) {
906
        // It's in a private header; we can't #include it.
907
        // FIXME: If there's a public header in some module that re-exports it,
908
        // then we could suggest including that, but it's not clear that's the
909
        // expected way to make this entity visible.
910
        InPrivateHeader = true;
911
        continue;
912
      }
913

914
      // Don't suggest explicitly excluded headers.
915
      if (Header.getRole() == ModuleMap::ExcludedHeader)
916
        continue;
917

918
      // We'll suggest including textual headers below if they're
919
      // include-guarded.
920
      if (Header.getRole() & ModuleMap::TextualHeader)
921
        continue;
922

923
      // If we have a module import syntax, we shouldn't include a header to
924
      // make a particular module visible. Let the caller know they should
925
      // suggest an import instead.
926
      if (getLangOpts().ObjC || getLangOpts().CPlusPlusModules)
927
        return std::nullopt;
928

929
      // If this is an accessible, non-textual header of M's top-level module
930
      // that transitively includes the given location and makes the
931
      // corresponding module visible, this is the thing to #include.
932
      return *FE;
933
    }
934

935
    // FIXME: If we're bailing out due to a private header, we shouldn't suggest
936
    // an import either.
937
    if (InPrivateHeader)
938
      return std::nullopt;
939

940
    // If the header is includable and has an include guard, assume the
941
    // intended way to expose its contents is by #include, not by importing a
942
    // module that transitively includes it.
943
    if (getHeaderSearchInfo().isFileMultipleIncludeGuarded(*FE))
944
      return *FE;
945

946
    Loc = SM.getIncludeLoc(ID);
947
  }
948

949
  return std::nullopt;
950
}
951

952
OptionalFileEntryRef Preprocessor::LookupFile(
953
    SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
954
    ConstSearchDirIterator FromDir, const FileEntry *FromFile,
955
    ConstSearchDirIterator *CurDirArg, SmallVectorImpl<char> *SearchPath,
956
    SmallVectorImpl<char> *RelativePath,
957
    ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
958
    bool *IsFrameworkFound, bool SkipCache, bool OpenFile, bool CacheFailures) {
959
  ConstSearchDirIterator CurDirLocal = nullptr;
960
  ConstSearchDirIterator &CurDir = CurDirArg ? *CurDirArg : CurDirLocal;
961

962
  Module *RequestingModule = getModuleForLocation(
963
      FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
964

965
  // If the header lookup mechanism may be relative to the current inclusion
966
  // stack, record the parent #includes.
967
  SmallVector<std::pair<OptionalFileEntryRef, DirectoryEntryRef>, 16> Includers;
968
  bool BuildSystemModule = false;
969
  if (!FromDir && !FromFile) {
970
    FileID FID = getCurrentFileLexer()->getFileID();
971
    OptionalFileEntryRef FileEnt = SourceMgr.getFileEntryRefForID(FID);
972

973
    // If there is no file entry associated with this file, it must be the
974
    // predefines buffer or the module includes buffer. Any other file is not
975
    // lexed with a normal lexer, so it won't be scanned for preprocessor
976
    // directives.
977
    //
978
    // If we have the predefines buffer, resolve #include references (which come
979
    // from the -include command line argument) from the current working
980
    // directory instead of relative to the main file.
981
    //
982
    // If we have the module includes buffer, resolve #include references (which
983
    // come from header declarations in the module map) relative to the module
984
    // map file.
985
    if (!FileEnt) {
986
      if (FID == SourceMgr.getMainFileID() && MainFileDir) {
987
        auto IncludeDir =
988
            HeaderInfo.getModuleMap().shouldImportRelativeToBuiltinIncludeDir(
989
                Filename, getCurrentModule())
990
                ? HeaderInfo.getModuleMap().getBuiltinDir()
991
                : MainFileDir;
992
        Includers.push_back(std::make_pair(std::nullopt, *IncludeDir));
993
        BuildSystemModule = getCurrentModule()->IsSystem;
994
      } else if ((FileEnt = SourceMgr.getFileEntryRefForID(
995
                      SourceMgr.getMainFileID()))) {
996
        auto CWD = FileMgr.getOptionalDirectoryRef(".");
997
        Includers.push_back(std::make_pair(*FileEnt, *CWD));
998
      }
999
    } else {
1000
      Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1001
    }
1002

1003
    // MSVC searches the current include stack from top to bottom for
1004
    // headers included by quoted include directives.
1005
    // See: http://msdn.microsoft.com/en-us/library/36k2cdd4.aspx
1006
    if (LangOpts.MSVCCompat && !isAngled) {
1007
      for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1008
        if (IsFileLexer(ISEntry))
1009
          if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
1010
            Includers.push_back(std::make_pair(*FileEnt, FileEnt->getDir()));
1011
      }
1012
    }
1013
  }
1014

1015
  CurDir = CurDirLookup;
1016

1017
  if (FromFile) {
1018
    // We're supposed to start looking from after a particular file. Search
1019
    // the include path until we find that file or run out of files.
1020
    ConstSearchDirIterator TmpCurDir = CurDir;
1021
    ConstSearchDirIterator TmpFromDir = nullptr;
1022
    while (OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1023
               Filename, FilenameLoc, isAngled, TmpFromDir, &TmpCurDir,
1024
               Includers, SearchPath, RelativePath, RequestingModule,
1025
               SuggestedModule, /*IsMapped=*/nullptr,
1026
               /*IsFrameworkFound=*/nullptr, SkipCache)) {
1027
      // Keep looking as if this file did a #include_next.
1028
      TmpFromDir = TmpCurDir;
1029
      ++TmpFromDir;
1030
      if (&FE->getFileEntry() == FromFile) {
1031
        // Found it.
1032
        FromDir = TmpFromDir;
1033
        CurDir = TmpCurDir;
1034
        break;
1035
      }
1036
    }
1037
  }
1038

1039
  // Do a standard file entry lookup.
1040
  OptionalFileEntryRef FE = HeaderInfo.LookupFile(
1041
      Filename, FilenameLoc, isAngled, FromDir, &CurDir, Includers, SearchPath,
1042
      RelativePath, RequestingModule, SuggestedModule, IsMapped,
1043
      IsFrameworkFound, SkipCache, BuildSystemModule, OpenFile, CacheFailures);
1044
  if (FE)
1045
    return FE;
1046

1047
  OptionalFileEntryRef CurFileEnt;
1048
  // Otherwise, see if this is a subframework header.  If so, this is relative
1049
  // to one of the headers on the #include stack.  Walk the list of the current
1050
  // headers on the #include stack and pass them to HeaderInfo.
1051
  if (IsFileLexer()) {
1052
    if ((CurFileEnt = CurPPLexer->getFileEntry())) {
1053
      if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1054
              Filename, *CurFileEnt, SearchPath, RelativePath, RequestingModule,
1055
              SuggestedModule)) {
1056
        return FE;
1057
      }
1058
    }
1059
  }
1060

1061
  for (IncludeStackInfo &ISEntry : llvm::reverse(IncludeMacroStack)) {
1062
    if (IsFileLexer(ISEntry)) {
1063
      if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
1064
        if (OptionalFileEntryRef FE = HeaderInfo.LookupSubframeworkHeader(
1065
                Filename, *CurFileEnt, SearchPath, RelativePath,
1066
                RequestingModule, SuggestedModule)) {
1067
          return FE;
1068
        }
1069
      }
1070
    }
1071
  }
1072

1073
  // Otherwise, we really couldn't find the file.
1074
  return std::nullopt;
1075
}
1076

1077
OptionalFileEntryRef
1078
Preprocessor::LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
1079
                              const FileEntry *LookupFromFile) {
1080
  FileManager &FM = this->getFileManager();
1081
  if (llvm::sys::path::is_absolute(Filename)) {
1082
    // lookup path or immediately fail
1083
    llvm::Expected<FileEntryRef> ShouldBeEntry =
1084
        FM.getFileRef(Filename, OpenFile);
1085
    return llvm::expectedToOptional(std::move(ShouldBeEntry));
1086
  }
1087

1088
  auto SeparateComponents = [](SmallVectorImpl<char> &LookupPath,
1089
                               StringRef StartingFrom, StringRef FileName,
1090
                               bool RemoveInitialFileComponentFromLookupPath) {
1091
    llvm::sys::path::native(StartingFrom, LookupPath);
1092
    if (RemoveInitialFileComponentFromLookupPath)
1093
      llvm::sys::path::remove_filename(LookupPath);
1094
    if (!LookupPath.empty() &&
1095
        !llvm::sys::path::is_separator(LookupPath.back())) {
1096
      LookupPath.push_back(llvm::sys::path::get_separator().front());
1097
    }
1098
    LookupPath.append(FileName.begin(), FileName.end());
1099
  };
1100

1101
  // Otherwise, it's search time!
1102
  SmallString<512> LookupPath;
1103
  // Non-angled lookup
1104
  if (!isAngled) {
1105
    if (LookupFromFile) {
1106
      // Use file-based lookup.
1107
      StringRef FullFileDir = LookupFromFile->tryGetRealPathName();
1108
      if (!FullFileDir.empty()) {
1109
        SeparateComponents(LookupPath, FullFileDir, Filename, true);
1110
        llvm::Expected<FileEntryRef> ShouldBeEntry =
1111
            FM.getFileRef(LookupPath, OpenFile);
1112
        if (ShouldBeEntry)
1113
          return llvm::expectedToOptional(std::move(ShouldBeEntry));
1114
        llvm::consumeError(ShouldBeEntry.takeError());
1115
      }
1116
    }
1117

1118
    // Otherwise, do working directory lookup.
1119
    LookupPath.clear();
1120
    auto MaybeWorkingDirEntry = FM.getDirectoryRef(".");
1121
    if (MaybeWorkingDirEntry) {
1122
      DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry;
1123
      StringRef WorkingDir = WorkingDirEntry.getName();
1124
      if (!WorkingDir.empty()) {
1125
        SeparateComponents(LookupPath, WorkingDir, Filename, false);
1126
        llvm::Expected<FileEntryRef> ShouldBeEntry =
1127
            FM.getFileRef(LookupPath, OpenFile);
1128
        if (ShouldBeEntry)
1129
          return llvm::expectedToOptional(std::move(ShouldBeEntry));
1130
        llvm::consumeError(ShouldBeEntry.takeError());
1131
      }
1132
    }
1133
  }
1134

1135
  for (const auto &Entry : PPOpts->EmbedEntries) {
1136
    LookupPath.clear();
1137
    SeparateComponents(LookupPath, Entry, Filename, false);
1138
    llvm::Expected<FileEntryRef> ShouldBeEntry =
1139
        FM.getFileRef(LookupPath, OpenFile);
1140
    if (ShouldBeEntry)
1141
      return llvm::expectedToOptional(std::move(ShouldBeEntry));
1142
    llvm::consumeError(ShouldBeEntry.takeError());
1143
  }
1144
  return std::nullopt;
1145
}
1146

1147
//===----------------------------------------------------------------------===//
1148
// Preprocessor Directive Handling.
1149
//===----------------------------------------------------------------------===//
1150

1151
class Preprocessor::ResetMacroExpansionHelper {
1152
public:
1153
  ResetMacroExpansionHelper(Preprocessor *pp)
1154
    : PP(pp), save(pp->DisableMacroExpansion) {
1155
    if (pp->MacroExpansionInDirectivesOverride)
1156
      pp->DisableMacroExpansion = false;
1157
  }
1158

1159
  ~ResetMacroExpansionHelper() {
1160
    PP->DisableMacroExpansion = save;
1161
  }
1162

1163
private:
1164
  Preprocessor *PP;
1165
  bool save;
1166
};
1167

1168
/// Process a directive while looking for the through header or a #pragma
1169
/// hdrstop. The following directives are handled:
1170
/// #include (to check if it is the through header)
1171
/// #define (to warn about macros that don't match the PCH)
1172
/// #pragma (to check for pragma hdrstop).
1173
/// All other directives are completely discarded.
1174
void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1175
                                                       SourceLocation HashLoc) {
1176
  if (const IdentifierInfo *II = Result.getIdentifierInfo()) {
1177
    if (II->getPPKeywordID() == tok::pp_define) {
1178
      return HandleDefineDirective(Result,
1179
                                   /*ImmediatelyAfterHeaderGuard=*/false);
1180
    }
1181
    if (SkippingUntilPCHThroughHeader &&
1182
        II->getPPKeywordID() == tok::pp_include) {
1183
      return HandleIncludeDirective(HashLoc, Result);
1184
    }
1185
    if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
1186
      Lex(Result);
1187
      auto *II = Result.getIdentifierInfo();
1188
      if (II && II->getName() == "hdrstop")
1189
        return HandlePragmaHdrstop(Result);
1190
    }
1191
  }
1192
  DiscardUntilEndOfDirective();
1193
}
1194

1195
/// HandleDirective - This callback is invoked when the lexer sees a # token
1196
/// at the start of a line.  This consumes the directive, modifies the
1197
/// lexer/preprocessor state, and advances the lexer(s) so that the next token
1198
/// read is the correct one.
1199
void Preprocessor::HandleDirective(Token &Result) {
1200
  // FIXME: Traditional: # with whitespace before it not recognized by K&R?
1201

1202
  // We just parsed a # character at the start of a line, so we're in directive
1203
  // mode.  Tell the lexer this so any newlines we see will be converted into an
1204
  // EOD token (which terminates the directive).
1205
  CurPPLexer->ParsingPreprocessorDirective = true;
1206
  if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
1207

1208
  bool ImmediatelyAfterTopLevelIfndef =
1209
      CurPPLexer->MIOpt.getImmediatelyAfterTopLevelIfndef();
1210
  CurPPLexer->MIOpt.resetImmediatelyAfterTopLevelIfndef();
1211

1212
  ++NumDirectives;
1213

1214
  // We are about to read a token.  For the multiple-include optimization FA to
1215
  // work, we have to remember if we had read any tokens *before* this
1216
  // pp-directive.
1217
  bool ReadAnyTokensBeforeDirective =CurPPLexer->MIOpt.getHasReadAnyTokensVal();
1218

1219
  // Save the '#' token in case we need to return it later.
1220
  Token SavedHash = Result;
1221

1222
  // Read the next token, the directive flavor.  This isn't expanded due to
1223
  // C99 6.10.3p8.
1224
  LexUnexpandedToken(Result);
1225

1226
  // C99 6.10.3p11: Is this preprocessor directive in macro invocation?  e.g.:
1227
  //   #define A(x) #x
1228
  //   A(abc
1229
  //     #warning blah
1230
  //   def)
1231
  // If so, the user is relying on undefined behavior, emit a diagnostic. Do
1232
  // not support this for #include-like directives, since that can result in
1233
  // terrible diagnostics, and does not work in GCC.
1234
  if (InMacroArgs) {
1235
    if (IdentifierInfo *II = Result.getIdentifierInfo()) {
1236
      switch (II->getPPKeywordID()) {
1237
      case tok::pp_include:
1238
      case tok::pp_import:
1239
      case tok::pp_include_next:
1240
      case tok::pp___include_macros:
1241
      case tok::pp_pragma:
1242
      case tok::pp_embed:
1243
        Diag(Result, diag::err_embedded_directive) << II->getName();
1244
        Diag(*ArgMacro, diag::note_macro_expansion_here)
1245
            << ArgMacro->getIdentifierInfo();
1246
        DiscardUntilEndOfDirective();
1247
        return;
1248
      default:
1249
        break;
1250
      }
1251
    }
1252
    Diag(Result, diag::ext_embedded_directive);
1253
  }
1254

1255
  // Temporarily enable macro expansion if set so
1256
  // and reset to previous state when returning from this function.
1257
  ResetMacroExpansionHelper helper(this);
1258

1259
  if (SkippingUntilPCHThroughHeader || SkippingUntilPragmaHdrStop)
1260
    return HandleSkippedDirectiveWhileUsingPCH(Result, SavedHash.getLocation());
1261

1262
  switch (Result.getKind()) {
1263
  case tok::eod:
1264
    // Ignore the null directive with regards to the multiple-include
1265
    // optimization, i.e. allow the null directive to appear outside of the
1266
    // include guard and still enable the multiple-include optimization.
1267
    CurPPLexer->MIOpt.SetReadToken(ReadAnyTokensBeforeDirective);
1268
    return;   // null directive.
1269
  case tok::code_completion:
1270
    setCodeCompletionReached();
1271
    if (CodeComplete)
1272
      CodeComplete->CodeCompleteDirective(
1273
                                    CurPPLexer->getConditionalStackDepth() > 0);
1274
    return;
1275
  case tok::numeric_constant:  // # 7  GNU line marker directive.
1276
    // In a .S file "# 4" may be a comment so don't treat it as a preprocessor
1277
    // directive. However do permit it in the predefines file, as we use line
1278
    // markers to mark the builtin macros as being in a system header.
1279
    if (getLangOpts().AsmPreprocessor &&
1280
        SourceMgr.getFileID(SavedHash.getLocation()) != getPredefinesFileID())
1281
      break;
1282
    return HandleDigitDirective(Result);
1283
  default:
1284
    IdentifierInfo *II = Result.getIdentifierInfo();
1285
    if (!II) break; // Not an identifier.
1286

1287
    // Ask what the preprocessor keyword ID is.
1288
    switch (II->getPPKeywordID()) {
1289
    default: break;
1290
    // C99 6.10.1 - Conditional Inclusion.
1291
    case tok::pp_if:
1292
      return HandleIfDirective(Result, SavedHash, ReadAnyTokensBeforeDirective);
1293
    case tok::pp_ifdef:
1294
      return HandleIfdefDirective(Result, SavedHash, false,
1295
                                  true /*not valid for miopt*/);
1296
    case tok::pp_ifndef:
1297
      return HandleIfdefDirective(Result, SavedHash, true,
1298
                                  ReadAnyTokensBeforeDirective);
1299
    case tok::pp_elif:
1300
    case tok::pp_elifdef:
1301
    case tok::pp_elifndef:
1302
      return HandleElifFamilyDirective(Result, SavedHash, II->getPPKeywordID());
1303

1304
    case tok::pp_else:
1305
      return HandleElseDirective(Result, SavedHash);
1306
    case tok::pp_endif:
1307
      return HandleEndifDirective(Result);
1308

1309
    // C99 6.10.2 - Source File Inclusion.
1310
    case tok::pp_include:
1311
      // Handle #include.
1312
      return HandleIncludeDirective(SavedHash.getLocation(), Result);
1313
    case tok::pp___include_macros:
1314
      // Handle -imacros.
1315
      return HandleIncludeMacrosDirective(SavedHash.getLocation(), Result);
1316

1317
    // C99 6.10.3 - Macro Replacement.
1318
    case tok::pp_define:
1319
      return HandleDefineDirective(Result, ImmediatelyAfterTopLevelIfndef);
1320
    case tok::pp_undef:
1321
      return HandleUndefDirective();
1322

1323
    // C99 6.10.4 - Line Control.
1324
    case tok::pp_line:
1325
      return HandleLineDirective();
1326

1327
    // C99 6.10.5 - Error Directive.
1328
    case tok::pp_error:
1329
      return HandleUserDiagnosticDirective(Result, false);
1330

1331
    // C99 6.10.6 - Pragma Directive.
1332
    case tok::pp_pragma:
1333
      return HandlePragmaDirective({PIK_HashPragma, SavedHash.getLocation()});
1334

1335
    // GNU Extensions.
1336
    case tok::pp_import:
1337
      return HandleImportDirective(SavedHash.getLocation(), Result);
1338
    case tok::pp_include_next:
1339
      return HandleIncludeNextDirective(SavedHash.getLocation(), Result);
1340

1341
    case tok::pp_warning:
1342
      if (LangOpts.CPlusPlus)
1343
        Diag(Result, LangOpts.CPlusPlus23
1344
                         ? diag::warn_cxx23_compat_warning_directive
1345
                         : diag::ext_pp_warning_directive)
1346
            << /*C++23*/ 1;
1347
      else
1348
        Diag(Result, LangOpts.C23 ? diag::warn_c23_compat_warning_directive
1349
                                  : diag::ext_pp_warning_directive)
1350
            << /*C23*/ 0;
1351

1352
      return HandleUserDiagnosticDirective(Result, true);
1353
    case tok::pp_ident:
1354
      return HandleIdentSCCSDirective(Result);
1355
    case tok::pp_sccs:
1356
      return HandleIdentSCCSDirective(Result);
1357
    case tok::pp_embed:
1358
      return HandleEmbedDirective(SavedHash.getLocation(), Result,
1359
                                  getCurrentFileLexer()
1360
                                      ? *getCurrentFileLexer()->getFileEntry()
1361
                                      : static_cast<FileEntry *>(nullptr));
1362
    case tok::pp_assert:
1363
      //isExtension = true;  // FIXME: implement #assert
1364
      break;
1365
    case tok::pp_unassert:
1366
      //isExtension = true;  // FIXME: implement #unassert
1367
      break;
1368

1369
    case tok::pp___public_macro:
1370
      if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1371
        return HandleMacroPublicDirective(Result);
1372
      break;
1373

1374
    case tok::pp___private_macro:
1375
      if (getLangOpts().Modules || getLangOpts().ModulesLocalVisibility)
1376
        return HandleMacroPrivateDirective();
1377
      break;
1378
    }
1379
    break;
1380
  }
1381

1382
  // If this is a .S file, treat unknown # directives as non-preprocessor
1383
  // directives.  This is important because # may be a comment or introduce
1384
  // various pseudo-ops.  Just return the # token and push back the following
1385
  // token to be lexed next time.
1386
  if (getLangOpts().AsmPreprocessor) {
1387
    auto Toks = std::make_unique<Token[]>(2);
1388
    // Return the # and the token after it.
1389
    Toks[0] = SavedHash;
1390
    Toks[1] = Result;
1391

1392
    // If the second token is a hashhash token, then we need to translate it to
1393
    // unknown so the token lexer doesn't try to perform token pasting.
1394
    if (Result.is(tok::hashhash))
1395
      Toks[1].setKind(tok::unknown);
1396

1397
    // Enter this token stream so that we re-lex the tokens.  Make sure to
1398
    // enable macro expansion, in case the token after the # is an identifier
1399
    // that is expanded.
1400
    EnterTokenStream(std::move(Toks), 2, false, /*IsReinject*/false);
1401
    return;
1402
  }
1403

1404
  // If we reached here, the preprocessing token is not valid!
1405
  // Start suggesting if a similar directive found.
1406
  Diag(Result, diag::err_pp_invalid_directive) << 0;
1407

1408
  // Read the rest of the PP line.
1409
  DiscardUntilEndOfDirective();
1410

1411
  // Okay, we're done parsing the directive.
1412
}
1413

1414
/// GetLineValue - Convert a numeric token into an unsigned value, emitting
1415
/// Diagnostic DiagID if it is invalid, and returning the value in Val.
1416
static bool GetLineValue(Token &DigitTok, unsigned &Val,
1417
                         unsigned DiagID, Preprocessor &PP,
1418
                         bool IsGNULineDirective=false) {
1419
  if (DigitTok.isNot(tok::numeric_constant)) {
1420
    PP.Diag(DigitTok, DiagID);
1421

1422
    if (DigitTok.isNot(tok::eod))
1423
      PP.DiscardUntilEndOfDirective();
1424
    return true;
1425
  }
1426

1427
  SmallString<64> IntegerBuffer;
1428
  IntegerBuffer.resize(DigitTok.getLength());
1429
  const char *DigitTokBegin = &IntegerBuffer[0];
1430
  bool Invalid = false;
1431
  unsigned ActualLength = PP.getSpelling(DigitTok, DigitTokBegin, &Invalid);
1432
  if (Invalid)
1433
    return true;
1434

1435
  // Verify that we have a simple digit-sequence, and compute the value.  This
1436
  // is always a simple digit string computed in decimal, so we do this manually
1437
  // here.
1438
  Val = 0;
1439
  for (unsigned i = 0; i != ActualLength; ++i) {
1440
    // C++1y [lex.fcon]p1:
1441
    //   Optional separating single quotes in a digit-sequence are ignored
1442
    if (DigitTokBegin[i] == '\'')
1443
      continue;
1444

1445
    if (!isDigit(DigitTokBegin[i])) {
1446
      PP.Diag(PP.AdvanceToTokenCharacter(DigitTok.getLocation(), i),
1447
              diag::err_pp_line_digit_sequence) << IsGNULineDirective;
1448
      PP.DiscardUntilEndOfDirective();
1449
      return true;
1450
    }
1451

1452
    unsigned NextVal = Val*10+(DigitTokBegin[i]-'0');
1453
    if (NextVal < Val) { // overflow.
1454
      PP.Diag(DigitTok, DiagID);
1455
      PP.DiscardUntilEndOfDirective();
1456
      return true;
1457
    }
1458
    Val = NextVal;
1459
  }
1460

1461
  if (DigitTokBegin[0] == '0' && Val)
1462
    PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal)
1463
      << IsGNULineDirective;
1464

1465
  return false;
1466
}
1467

1468
/// Handle a \#line directive: C99 6.10.4.
1469
///
1470
/// The two acceptable forms are:
1471
/// \verbatim
1472
///   # line digit-sequence
1473
///   # line digit-sequence "s-char-sequence"
1474
/// \endverbatim
1475
void Preprocessor::HandleLineDirective() {
1476
  // Read the line # and string argument.  Per C99 6.10.4p5, these tokens are
1477
  // expanded.
1478
  Token DigitTok;
1479
  Lex(DigitTok);
1480

1481
  // Validate the number and convert it to an unsigned.
1482
  unsigned LineNo;
1483
  if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this))
1484
    return;
1485

1486
  if (LineNo == 0)
1487
    Diag(DigitTok, diag::ext_pp_line_zero);
1488

1489
  // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a
1490
  // number greater than 2147483647".  C90 requires that the line # be <= 32767.
1491
  unsigned LineLimit = 32768U;
1492
  if (LangOpts.C99 || LangOpts.CPlusPlus11)
1493
    LineLimit = 2147483648U;
1494
  if (LineNo >= LineLimit)
1495
    Diag(DigitTok, diag::ext_pp_line_too_big) << LineLimit;
1496
  else if (LangOpts.CPlusPlus11 && LineNo >= 32768U)
1497
    Diag(DigitTok, diag::warn_cxx98_compat_pp_line_too_big);
1498

1499
  int FilenameID = -1;
1500
  Token StrTok;
1501
  Lex(StrTok);
1502

1503
  // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1504
  // string followed by eod.
1505
  if (StrTok.is(tok::eod))
1506
    ; // ok
1507
  else if (StrTok.isNot(tok::string_literal)) {
1508
    Diag(StrTok, diag::err_pp_line_invalid_filename);
1509
    DiscardUntilEndOfDirective();
1510
    return;
1511
  } else if (StrTok.hasUDSuffix()) {
1512
    Diag(StrTok, diag::err_invalid_string_udl);
1513
    DiscardUntilEndOfDirective();
1514
    return;
1515
  } else {
1516
    // Parse and validate the string, converting it into a unique ID.
1517
    StringLiteralParser Literal(StrTok, *this);
1518
    assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1519
    if (Literal.hadError) {
1520
      DiscardUntilEndOfDirective();
1521
      return;
1522
    }
1523
    if (Literal.Pascal) {
1524
      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1525
      DiscardUntilEndOfDirective();
1526
      return;
1527
    }
1528
    FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1529

1530
    // Verify that there is nothing after the string, other than EOD.  Because
1531
    // of C99 6.10.4p5, macros that expand to empty tokens are ok.
1532
    CheckEndOfDirective("line", true);
1533
  }
1534

1535
  // Take the file kind of the file containing the #line directive. #line
1536
  // directives are often used for generated sources from the same codebase, so
1537
  // the new file should generally be classified the same way as the current
1538
  // file. This is visible in GCC's pre-processed output, which rewrites #line
1539
  // to GNU line markers.
1540
  SrcMgr::CharacteristicKind FileKind =
1541
      SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1542

1543
  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false,
1544
                        false, FileKind);
1545

1546
  if (Callbacks)
1547
    Callbacks->FileChanged(CurPPLexer->getSourceLocation(),
1548
                           PPCallbacks::RenameFile, FileKind);
1549
}
1550

1551
/// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line
1552
/// marker directive.
1553
static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit,
1554
                                SrcMgr::CharacteristicKind &FileKind,
1555
                                Preprocessor &PP) {
1556
  unsigned FlagVal;
1557
  Token FlagTok;
1558
  PP.Lex(FlagTok);
1559
  if (FlagTok.is(tok::eod)) return false;
1560
  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1561
    return true;
1562

1563
  if (FlagVal == 1) {
1564
    IsFileEntry = true;
1565

1566
    PP.Lex(FlagTok);
1567
    if (FlagTok.is(tok::eod)) return false;
1568
    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1569
      return true;
1570
  } else if (FlagVal == 2) {
1571
    IsFileExit = true;
1572

1573
    SourceManager &SM = PP.getSourceManager();
1574
    // If we are leaving the current presumed file, check to make sure the
1575
    // presumed include stack isn't empty!
1576
    FileID CurFileID =
1577
      SM.getDecomposedExpansionLoc(FlagTok.getLocation()).first;
1578
    PresumedLoc PLoc = SM.getPresumedLoc(FlagTok.getLocation());
1579
    if (PLoc.isInvalid())
1580
      return true;
1581

1582
    // If there is no include loc (main file) or if the include loc is in a
1583
    // different physical file, then we aren't in a "1" line marker flag region.
1584
    SourceLocation IncLoc = PLoc.getIncludeLoc();
1585
    if (IncLoc.isInvalid() ||
1586
        SM.getDecomposedExpansionLoc(IncLoc).first != CurFileID) {
1587
      PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_pop);
1588
      PP.DiscardUntilEndOfDirective();
1589
      return true;
1590
    }
1591

1592
    PP.Lex(FlagTok);
1593
    if (FlagTok.is(tok::eod)) return false;
1594
    if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag,PP))
1595
      return true;
1596
  }
1597

1598
  // We must have 3 if there are still flags.
1599
  if (FlagVal != 3) {
1600
    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1601
    PP.DiscardUntilEndOfDirective();
1602
    return true;
1603
  }
1604

1605
  FileKind = SrcMgr::C_System;
1606

1607
  PP.Lex(FlagTok);
1608
  if (FlagTok.is(tok::eod)) return false;
1609
  if (GetLineValue(FlagTok, FlagVal, diag::err_pp_linemarker_invalid_flag, PP))
1610
    return true;
1611

1612
  // We must have 4 if there is yet another flag.
1613
  if (FlagVal != 4) {
1614
    PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1615
    PP.DiscardUntilEndOfDirective();
1616
    return true;
1617
  }
1618

1619
  FileKind = SrcMgr::C_ExternCSystem;
1620

1621
  PP.Lex(FlagTok);
1622
  if (FlagTok.is(tok::eod)) return false;
1623

1624
  // There are no more valid flags here.
1625
  PP.Diag(FlagTok, diag::err_pp_linemarker_invalid_flag);
1626
  PP.DiscardUntilEndOfDirective();
1627
  return true;
1628
}
1629

1630
/// HandleDigitDirective - Handle a GNU line marker directive, whose syntax is
1631
/// one of the following forms:
1632
///
1633
///     # 42
1634
///     # 42 "file" ('1' | '2')?
1635
///     # 42 "file" ('1' | '2')? '3' '4'?
1636
///
1637
void Preprocessor::HandleDigitDirective(Token &DigitTok) {
1638
  // Validate the number and convert it to an unsigned.  GNU does not have a
1639
  // line # limit other than it fit in 32-bits.
1640
  unsigned LineNo;
1641
  if (GetLineValue(DigitTok, LineNo, diag::err_pp_linemarker_requires_integer,
1642
                   *this, true))
1643
    return;
1644

1645
  Token StrTok;
1646
  Lex(StrTok);
1647

1648
  bool IsFileEntry = false, IsFileExit = false;
1649
  int FilenameID = -1;
1650
  SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User;
1651

1652
  // If the StrTok is "eod", then it wasn't present.  Otherwise, it must be a
1653
  // string followed by eod.
1654
  if (StrTok.is(tok::eod)) {
1655
    Diag(StrTok, diag::ext_pp_gnu_line_directive);
1656
    // Treat this like "#line NN", which doesn't change file characteristics.
1657
    FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation());
1658
  } else if (StrTok.isNot(tok::string_literal)) {
1659
    Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1660
    DiscardUntilEndOfDirective();
1661
    return;
1662
  } else if (StrTok.hasUDSuffix()) {
1663
    Diag(StrTok, diag::err_invalid_string_udl);
1664
    DiscardUntilEndOfDirective();
1665
    return;
1666
  } else {
1667
    // Parse and validate the string, converting it into a unique ID.
1668
    StringLiteralParser Literal(StrTok, *this);
1669
    assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1670
    if (Literal.hadError) {
1671
      DiscardUntilEndOfDirective();
1672
      return;
1673
    }
1674
    if (Literal.Pascal) {
1675
      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
1676
      DiscardUntilEndOfDirective();
1677
      return;
1678
    }
1679

1680
    // If a filename was present, read any flags that are present.
1681
    if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this))
1682
      return;
1683
    if (!SourceMgr.isWrittenInBuiltinFile(DigitTok.getLocation()) &&
1684
        !SourceMgr.isWrittenInCommandLineFile(DigitTok.getLocation()))
1685
      Diag(StrTok, diag::ext_pp_gnu_line_directive);
1686

1687
    // Exiting to an empty string means pop to the including file, so leave
1688
    // FilenameID as -1 in that case.
1689
    if (!(IsFileExit && Literal.GetString().empty()))
1690
      FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
1691
  }
1692

1693
  // Create a line note with this information.
1694
  SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry,
1695
                        IsFileExit, FileKind);
1696

1697
  // If the preprocessor has callbacks installed, notify them of the #line
1698
  // change.  This is used so that the line marker comes out in -E mode for
1699
  // example.
1700
  if (Callbacks) {
1701
    PPCallbacks::FileChangeReason Reason = PPCallbacks::RenameFile;
1702
    if (IsFileEntry)
1703
      Reason = PPCallbacks::EnterFile;
1704
    else if (IsFileExit)
1705
      Reason = PPCallbacks::ExitFile;
1706

1707
    Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind);
1708
  }
1709
}
1710

1711
/// HandleUserDiagnosticDirective - Handle a #warning or #error directive.
1712
///
1713
void Preprocessor::HandleUserDiagnosticDirective(Token &Tok,
1714
                                                 bool isWarning) {
1715
  // Read the rest of the line raw.  We do this because we don't want macros
1716
  // to be expanded and we don't require that the tokens be valid preprocessing
1717
  // tokens.  For example, this is allowed: "#warning `   'foo".  GCC does
1718
  // collapse multiple consecutive white space between tokens, but this isn't
1719
  // specified by the standard.
1720
  SmallString<128> Message;
1721
  CurLexer->ReadToEndOfLine(&Message);
1722

1723
  // Find the first non-whitespace character, so that we can make the
1724
  // diagnostic more succinct.
1725
  StringRef Msg = Message.str().ltrim(' ');
1726

1727
  if (isWarning)
1728
    Diag(Tok, diag::pp_hash_warning) << Msg;
1729
  else
1730
    Diag(Tok, diag::err_pp_hash_error) << Msg;
1731
}
1732

1733
/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
1734
///
1735
void Preprocessor::HandleIdentSCCSDirective(Token &Tok) {
1736
  // Yes, this directive is an extension.
1737
  Diag(Tok, diag::ext_pp_ident_directive);
1738

1739
  // Read the string argument.
1740
  Token StrTok;
1741
  Lex(StrTok);
1742

1743
  // If the token kind isn't a string, it's a malformed directive.
1744
  if (StrTok.isNot(tok::string_literal) &&
1745
      StrTok.isNot(tok::wide_string_literal)) {
1746
    Diag(StrTok, diag::err_pp_malformed_ident);
1747
    if (StrTok.isNot(tok::eod))
1748
      DiscardUntilEndOfDirective();
1749
    return;
1750
  }
1751

1752
  if (StrTok.hasUDSuffix()) {
1753
    Diag(StrTok, diag::err_invalid_string_udl);
1754
    DiscardUntilEndOfDirective();
1755
    return;
1756
  }
1757

1758
  // Verify that there is nothing after the string, other than EOD.
1759
  CheckEndOfDirective("ident");
1760

1761
  if (Callbacks) {
1762
    bool Invalid = false;
1763
    std::string Str = getSpelling(StrTok, &Invalid);
1764
    if (!Invalid)
1765
      Callbacks->Ident(Tok.getLocation(), Str);
1766
  }
1767
}
1768

1769
/// Handle a #public directive.
1770
void Preprocessor::HandleMacroPublicDirective(Token &Tok) {
1771
  Token MacroNameTok;
1772
  ReadMacroName(MacroNameTok, MU_Undef);
1773

1774
  // Error reading macro name?  If so, diagnostic already issued.
1775
  if (MacroNameTok.is(tok::eod))
1776
    return;
1777

1778
  // Check to see if this is the last token on the #__public_macro line.
1779
  CheckEndOfDirective("__public_macro");
1780

1781
  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1782
  // Okay, we finally have a valid identifier to undef.
1783
  MacroDirective *MD = getLocalMacroDirective(II);
1784

1785
  // If the macro is not defined, this is an error.
1786
  if (!MD) {
1787
    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1788
    return;
1789
  }
1790

1791
  // Note that this macro has now been exported.
1792
  appendMacroDirective(II, AllocateVisibilityMacroDirective(
1793
                                MacroNameTok.getLocation(), /*isPublic=*/true));
1794
}
1795

1796
/// Handle a #private directive.
1797
void Preprocessor::HandleMacroPrivateDirective() {
1798
  Token MacroNameTok;
1799
  ReadMacroName(MacroNameTok, MU_Undef);
1800

1801
  // Error reading macro name?  If so, diagnostic already issued.
1802
  if (MacroNameTok.is(tok::eod))
1803
    return;
1804

1805
  // Check to see if this is the last token on the #__private_macro line.
1806
  CheckEndOfDirective("__private_macro");
1807

1808
  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
1809
  // Okay, we finally have a valid identifier to undef.
1810
  MacroDirective *MD = getLocalMacroDirective(II);
1811

1812
  // If the macro is not defined, this is an error.
1813
  if (!MD) {
1814
    Diag(MacroNameTok, diag::err_pp_visibility_non_macro) << II;
1815
    return;
1816
  }
1817

1818
  // Note that this macro has now been marked private.
1819
  appendMacroDirective(II, AllocateVisibilityMacroDirective(
1820
                               MacroNameTok.getLocation(), /*isPublic=*/false));
1821
}
1822

1823
//===----------------------------------------------------------------------===//
1824
// Preprocessor Include Directive Handling.
1825
//===----------------------------------------------------------------------===//
1826

1827
/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully
1828
/// checked and spelled filename, e.g. as an operand of \#include. This returns
1829
/// true if the input filename was in <>'s or false if it were in ""'s.  The
1830
/// caller is expected to provide a buffer that is large enough to hold the
1831
/// spelling of the filename, but is also expected to handle the case when
1832
/// this method decides to use a different buffer.
1833
bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
1834
                                              StringRef &Buffer) {
1835
  // Get the text form of the filename.
1836
  assert(!Buffer.empty() && "Can't have tokens with empty spellings!");
1837

1838
  // FIXME: Consider warning on some of the cases described in C11 6.4.7/3 and
1839
  // C++20 [lex.header]/2:
1840
  //
1841
  // If `"`, `'`, `\`, `/*`, or `//` appears in a header-name, then
1842
  //   in C: behavior is undefined
1843
  //   in C++: program is conditionally-supported with implementation-defined
1844
  //           semantics
1845

1846
  // Make sure the filename is <x> or "x".
1847
  bool isAngled;
1848
  if (Buffer[0] == '<') {
1849
    if (Buffer.back() != '>') {
1850
      Diag(Loc, diag::err_pp_expects_filename);
1851
      Buffer = StringRef();
1852
      return true;
1853
    }
1854
    isAngled = true;
1855
  } else if (Buffer[0] == '"') {
1856
    if (Buffer.back() != '"') {
1857
      Diag(Loc, diag::err_pp_expects_filename);
1858
      Buffer = StringRef();
1859
      return true;
1860
    }
1861
    isAngled = false;
1862
  } else {
1863
    Diag(Loc, diag::err_pp_expects_filename);
1864
    Buffer = StringRef();
1865
    return true;
1866
  }
1867

1868
  // Diagnose #include "" as invalid.
1869
  if (Buffer.size() <= 2) {
1870
    Diag(Loc, diag::err_pp_empty_filename);
1871
    Buffer = StringRef();
1872
    return true;
1873
  }
1874

1875
  // Skip the brackets.
1876
  Buffer = Buffer.substr(1, Buffer.size()-2);
1877
  return isAngled;
1878
}
1879

1880
/// Push a token onto the token stream containing an annotation.
1881
void Preprocessor::EnterAnnotationToken(SourceRange Range,
1882
                                        tok::TokenKind Kind,
1883
                                        void *AnnotationVal) {
1884
  // FIXME: Produce this as the current token directly, rather than
1885
  // allocating a new token for it.
1886
  auto Tok = std::make_unique<Token[]>(1);
1887
  Tok[0].startToken();
1888
  Tok[0].setKind(Kind);
1889
  Tok[0].setLocation(Range.getBegin());
1890
  Tok[0].setAnnotationEndLoc(Range.getEnd());
1891
  Tok[0].setAnnotationValue(AnnotationVal);
1892
  EnterTokenStream(std::move(Tok), 1, true, /*IsReinject*/ false);
1893
}
1894

1895
/// Produce a diagnostic informing the user that a #include or similar
1896
/// was implicitly treated as a module import.
1897
static void diagnoseAutoModuleImport(
1898
    Preprocessor &PP, SourceLocation HashLoc, Token &IncludeTok,
1899
    ArrayRef<std::pair<IdentifierInfo *, SourceLocation>> Path,
1900
    SourceLocation PathEnd) {
1901
  SmallString<128> PathString;
1902
  for (size_t I = 0, N = Path.size(); I != N; ++I) {
1903
    if (I)
1904
      PathString += '.';
1905
    PathString += Path[I].first->getName();
1906
  }
1907

1908
  int IncludeKind = 0;
1909
  switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
1910
  case tok::pp_include:
1911
    IncludeKind = 0;
1912
    break;
1913

1914
  case tok::pp_import:
1915
    IncludeKind = 1;
1916
    break;
1917

1918
  case tok::pp_include_next:
1919
    IncludeKind = 2;
1920
    break;
1921

1922
  case tok::pp___include_macros:
1923
    IncludeKind = 3;
1924
    break;
1925

1926
  default:
1927
    llvm_unreachable("unknown include directive kind");
1928
  }
1929

1930
  PP.Diag(HashLoc, diag::remark_pp_include_directive_modular_translation)
1931
      << IncludeKind << PathString;
1932
}
1933

1934
// Given a vector of path components and a string containing the real
1935
// path to the file, build a properly-cased replacement in the vector,
1936
// and return true if the replacement should be suggested.
1937
static bool trySimplifyPath(SmallVectorImpl<StringRef> &Components,
1938
                            StringRef RealPathName,
1939
                            llvm::sys::path::Style Separator) {
1940
  auto RealPathComponentIter = llvm::sys::path::rbegin(RealPathName);
1941
  auto RealPathComponentEnd = llvm::sys::path::rend(RealPathName);
1942
  int Cnt = 0;
1943
  bool SuggestReplacement = false;
1944

1945
  auto IsSep = [Separator](StringRef Component) {
1946
    return Component.size() == 1 &&
1947
           llvm::sys::path::is_separator(Component[0], Separator);
1948
  };
1949

1950
  // Below is a best-effort to handle ".." in paths. It is admittedly
1951
  // not 100% correct in the presence of symlinks.
1952
  for (auto &Component : llvm::reverse(Components)) {
1953
    if ("." == Component) {
1954
    } else if (".." == Component) {
1955
      ++Cnt;
1956
    } else if (Cnt) {
1957
      --Cnt;
1958
    } else if (RealPathComponentIter != RealPathComponentEnd) {
1959
      if (!IsSep(Component) && !IsSep(*RealPathComponentIter) &&
1960
          Component != *RealPathComponentIter) {
1961
        // If these non-separator path components differ by more than just case,
1962
        // then we may be looking at symlinked paths. Bail on this diagnostic to
1963
        // avoid noisy false positives.
1964
        SuggestReplacement =
1965
            RealPathComponentIter->equals_insensitive(Component);
1966
        if (!SuggestReplacement)
1967
          break;
1968
        Component = *RealPathComponentIter;
1969
      }
1970
      ++RealPathComponentIter;
1971
    }
1972
  }
1973
  return SuggestReplacement;
1974
}
1975

1976
bool Preprocessor::checkModuleIsAvailable(const LangOptions &LangOpts,
1977
                                          const TargetInfo &TargetInfo,
1978
                                          const Module &M,
1979
                                          DiagnosticsEngine &Diags) {
1980
  Module::Requirement Requirement;
1981
  Module::UnresolvedHeaderDirective MissingHeader;
1982
  Module *ShadowingModule = nullptr;
1983
  if (M.isAvailable(LangOpts, TargetInfo, Requirement, MissingHeader,
1984
                    ShadowingModule))
1985
    return false;
1986

1987
  if (MissingHeader.FileNameLoc.isValid()) {
1988
    Diags.Report(MissingHeader.FileNameLoc, diag::err_module_header_missing)
1989
        << MissingHeader.IsUmbrella << MissingHeader.FileName;
1990
  } else if (ShadowingModule) {
1991
    Diags.Report(M.DefinitionLoc, diag::err_module_shadowed) << M.Name;
1992
    Diags.Report(ShadowingModule->DefinitionLoc,
1993
                 diag::note_previous_definition);
1994
  } else {
1995
    // FIXME: Track the location at which the requirement was specified, and
1996
    // use it here.
1997
    Diags.Report(M.DefinitionLoc, diag::err_module_unavailable)
1998
        << M.getFullModuleName() << Requirement.RequiredState
1999
        << Requirement.FeatureName;
2000
  }
2001
  return true;
2002
}
2003

2004
std::pair<ConstSearchDirIterator, const FileEntry *>
2005
Preprocessor::getIncludeNextStart(const Token &IncludeNextTok) const {
2006
  // #include_next is like #include, except that we start searching after
2007
  // the current found directory.  If we can't do this, issue a
2008
  // diagnostic.
2009
  ConstSearchDirIterator Lookup = CurDirLookup;
2010
  const FileEntry *LookupFromFile = nullptr;
2011

2012
  if (isInPrimaryFile() && LangOpts.IsHeaderFile) {
2013
    // If the main file is a header, then it's either for PCH/AST generation,
2014
    // or libclang opened it. Either way, handle it as a normal include below
2015
    // and do not complain about include_next.
2016
  } else if (isInPrimaryFile()) {
2017
    Lookup = nullptr;
2018
    Diag(IncludeNextTok, diag::pp_include_next_in_primary);
2019
  } else if (CurLexerSubmodule) {
2020
    // Start looking up in the directory *after* the one in which the current
2021
    // file would be found, if any.
2022
    assert(CurPPLexer && "#include_next directive in macro?");
2023
    if (auto FE = CurPPLexer->getFileEntry())
2024
      LookupFromFile = *FE;
2025
    Lookup = nullptr;
2026
  } else if (!Lookup) {
2027
    // The current file was not found by walking the include path. Either it
2028
    // is the primary file (handled above), or it was found by absolute path,
2029
    // or it was found relative to such a file.
2030
    // FIXME: Track enough information so we know which case we're in.
2031
    Diag(IncludeNextTok, diag::pp_include_next_absolute_path);
2032
  } else {
2033
    // Start looking up in the next directory.
2034
    ++Lookup;
2035
  }
2036

2037
  return {Lookup, LookupFromFile};
2038
}
2039

2040
/// HandleIncludeDirective - The "\#include" tokens have just been read, read
2041
/// the file to be included from the lexer, then include it!  This is a common
2042
/// routine with functionality shared between \#include, \#include_next and
2043
/// \#import.  LookupFrom is set when this is a \#include_next directive, it
2044
/// specifies the file to start searching from.
2045
void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
2046
                                          Token &IncludeTok,
2047
                                          ConstSearchDirIterator LookupFrom,
2048
                                          const FileEntry *LookupFromFile) {
2049
  Token FilenameTok;
2050
  if (LexHeaderName(FilenameTok))
2051
    return;
2052

2053
  if (FilenameTok.isNot(tok::header_name)) {
2054
    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
2055
    if (FilenameTok.isNot(tok::eod))
2056
      DiscardUntilEndOfDirective();
2057
    return;
2058
  }
2059

2060
  // Verify that there is nothing after the filename, other than EOD.  Note
2061
  // that we allow macros that expand to nothing after the filename, because
2062
  // this falls into the category of "#include pp-tokens new-line" specified
2063
  // in C99 6.10.2p4.
2064
  SourceLocation EndLoc =
2065
      CheckEndOfDirective(IncludeTok.getIdentifierInfo()->getNameStart(), true);
2066

2067
  auto Action = HandleHeaderIncludeOrImport(HashLoc, IncludeTok, FilenameTok,
2068
                                            EndLoc, LookupFrom, LookupFromFile);
2069
  switch (Action.Kind) {
2070
  case ImportAction::None:
2071
  case ImportAction::SkippedModuleImport:
2072
    break;
2073
  case ImportAction::ModuleBegin:
2074
    EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2075
                         tok::annot_module_begin, Action.ModuleForHeader);
2076
    break;
2077
  case ImportAction::HeaderUnitImport:
2078
    EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
2079
                         Action.ModuleForHeader);
2080
    break;
2081
  case ImportAction::ModuleImport:
2082
    EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
2083
                         tok::annot_module_include, Action.ModuleForHeader);
2084
    break;
2085
  case ImportAction::Failure:
2086
    assert(TheModuleLoader.HadFatalFailure &&
2087
           "This should be an early exit only to a fatal error");
2088
    TheModuleLoader.HadFatalFailure = true;
2089
    IncludeTok.setKind(tok::eof);
2090
    CurLexer->cutOffLexing();
2091
    return;
2092
  }
2093
}
2094

2095
OptionalFileEntryRef Preprocessor::LookupHeaderIncludeOrImport(
2096
    ConstSearchDirIterator *CurDir, StringRef &Filename,
2097
    SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2098
    const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2099
    bool &IsMapped, ConstSearchDirIterator LookupFrom,
2100
    const FileEntry *LookupFromFile, StringRef &LookupFilename,
2101
    SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2102
    ModuleMap::KnownHeader &SuggestedModule, bool isAngled) {
2103
  auto DiagnoseHeaderInclusion = [&](FileEntryRef FE) {
2104
    if (LangOpts.AsmPreprocessor)
2105
      return;
2106

2107
    Module *RequestingModule = getModuleForLocation(
2108
        FilenameLoc, LangOpts.ModulesValidateTextualHeaderIncludes);
2109
    bool RequestingModuleIsModuleInterface =
2110
        !SourceMgr.isInMainFile(FilenameLoc);
2111

2112
    HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
2113
        RequestingModule, RequestingModuleIsModuleInterface, FilenameLoc,
2114
        Filename, FE);
2115
  };
2116

2117
  OptionalFileEntryRef File = LookupFile(
2118
      FilenameLoc, LookupFilename, isAngled, LookupFrom, LookupFromFile, CurDir,
2119
      Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2120
      &SuggestedModule, &IsMapped, &IsFrameworkFound);
2121
  if (File) {
2122
    DiagnoseHeaderInclusion(*File);
2123
    return File;
2124
  }
2125

2126
  // Give the clients a chance to silently skip this include.
2127
  if (Callbacks && Callbacks->FileNotFound(Filename))
2128
    return std::nullopt;
2129

2130
  if (SuppressIncludeNotFoundError)
2131
    return std::nullopt;
2132

2133
  // If the file could not be located and it was included via angle
2134
  // brackets, we can attempt a lookup as though it were a quoted path to
2135
  // provide the user with a possible fixit.
2136
  if (isAngled) {
2137
    OptionalFileEntryRef File = LookupFile(
2138
        FilenameLoc, LookupFilename, false, LookupFrom, LookupFromFile, CurDir,
2139
        Callbacks ? &SearchPath : nullptr, Callbacks ? &RelativePath : nullptr,
2140
        &SuggestedModule, &IsMapped,
2141
        /*IsFrameworkFound=*/nullptr);
2142
    if (File) {
2143
      DiagnoseHeaderInclusion(*File);
2144
      Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal)
2145
          << Filename << IsImportDecl
2146
          << FixItHint::CreateReplacement(FilenameRange,
2147
                                          "\"" + Filename.str() + "\"");
2148
      return File;
2149
    }
2150
  }
2151

2152
  // Check for likely typos due to leading or trailing non-isAlphanumeric
2153
  // characters
2154
  StringRef OriginalFilename = Filename;
2155
  if (LangOpts.SpellChecking) {
2156
    // A heuristic to correct a typo file name by removing leading and
2157
    // trailing non-isAlphanumeric characters.
2158
    auto CorrectTypoFilename = [](llvm::StringRef Filename) {
2159
      Filename = Filename.drop_until(isAlphanumeric);
2160
      while (!Filename.empty() && !isAlphanumeric(Filename.back())) {
2161
        Filename = Filename.drop_back();
2162
      }
2163
      return Filename;
2164
    };
2165
    StringRef TypoCorrectionName = CorrectTypoFilename(Filename);
2166
    StringRef TypoCorrectionLookupName = CorrectTypoFilename(LookupFilename);
2167

2168
    OptionalFileEntryRef File = LookupFile(
2169
        FilenameLoc, TypoCorrectionLookupName, isAngled, LookupFrom,
2170
        LookupFromFile, CurDir, Callbacks ? &SearchPath : nullptr,
2171
        Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
2172
        /*IsFrameworkFound=*/nullptr);
2173
    if (File) {
2174
      DiagnoseHeaderInclusion(*File);
2175
      auto Hint =
2176
          isAngled ? FixItHint::CreateReplacement(
2177
                         FilenameRange, "<" + TypoCorrectionName.str() + ">")
2178
                   : FixItHint::CreateReplacement(
2179
                         FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
2180
      Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
2181
          << OriginalFilename << TypoCorrectionName << Hint;
2182
      // We found the file, so set the Filename to the name after typo
2183
      // correction.
2184
      Filename = TypoCorrectionName;
2185
      LookupFilename = TypoCorrectionLookupName;
2186
      return File;
2187
    }
2188
  }
2189

2190
  // If the file is still not found, just go with the vanilla diagnostic
2191
  assert(!File && "expected missing file");
2192
  Diag(FilenameTok, diag::err_pp_file_not_found)
2193
      << OriginalFilename << FilenameRange;
2194
  if (IsFrameworkFound) {
2195
    size_t SlashPos = OriginalFilename.find('/');
2196
    assert(SlashPos != StringRef::npos &&
2197
           "Include with framework name should have '/' in the filename");
2198
    StringRef FrameworkName = OriginalFilename.substr(0, SlashPos);
2199
    FrameworkCacheEntry &CacheEntry =
2200
        HeaderInfo.LookupFrameworkCache(FrameworkName);
2201
    assert(CacheEntry.Directory && "Found framework should be in cache");
2202
    Diag(FilenameTok, diag::note_pp_framework_without_header)
2203
        << OriginalFilename.substr(SlashPos + 1) << FrameworkName
2204
        << CacheEntry.Directory->getName();
2205
  }
2206

2207
  return std::nullopt;
2208
}
2209

2210
/// Handle either a #include-like directive or an import declaration that names
2211
/// a header file.
2212
///
2213
/// \param HashLoc The location of the '#' token for an include, or
2214
///        SourceLocation() for an import declaration.
2215
/// \param IncludeTok The include / include_next / import token.
2216
/// \param FilenameTok The header-name token.
2217
/// \param EndLoc The location at which any imported macros become visible.
2218
/// \param LookupFrom For #include_next, the starting directory for the
2219
///        directory lookup.
2220
/// \param LookupFromFile For #include_next, the starting file for the directory
2221
///        lookup.
2222
Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
2223
    SourceLocation HashLoc, Token &IncludeTok, Token &FilenameTok,
2224
    SourceLocation EndLoc, ConstSearchDirIterator LookupFrom,
2225
    const FileEntry *LookupFromFile) {
2226
  SmallString<128> FilenameBuffer;
2227
  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
2228
  SourceLocation CharEnd = FilenameTok.getEndLoc();
2229

2230
  CharSourceRange FilenameRange
2231
    = CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
2232
  StringRef OriginalFilename = Filename;
2233
  bool isAngled =
2234
    GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
2235

2236
  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
2237
  // error.
2238
  if (Filename.empty())
2239
    return {ImportAction::None};
2240

2241
  bool IsImportDecl = HashLoc.isInvalid();
2242
  SourceLocation StartLoc = IsImportDecl ? IncludeTok.getLocation() : HashLoc;
2243

2244
  // Complain about attempts to #include files in an audit pragma.
2245
  if (PragmaARCCFCodeAuditedInfo.second.isValid()) {
2246
    Diag(StartLoc, diag::err_pp_include_in_arc_cf_code_audited) << IsImportDecl;
2247
    Diag(PragmaARCCFCodeAuditedInfo.second, diag::note_pragma_entered_here);
2248

2249
    // Immediately leave the pragma.
2250
    PragmaARCCFCodeAuditedInfo = {nullptr, SourceLocation()};
2251
  }
2252

2253
  // Complain about attempts to #include files in an assume-nonnull pragma.
2254
  if (PragmaAssumeNonNullLoc.isValid()) {
2255
    Diag(StartLoc, diag::err_pp_include_in_assume_nonnull) << IsImportDecl;
2256
    Diag(PragmaAssumeNonNullLoc, diag::note_pragma_entered_here);
2257

2258
    // Immediately leave the pragma.
2259
    PragmaAssumeNonNullLoc = SourceLocation();
2260
  }
2261

2262
  if (HeaderInfo.HasIncludeAliasMap()) {
2263
    // Map the filename with the brackets still attached.  If the name doesn't
2264
    // map to anything, fall back on the filename we've already gotten the
2265
    // spelling for.
2266
    StringRef NewName = HeaderInfo.MapHeaderToIncludeAlias(OriginalFilename);
2267
    if (!NewName.empty())
2268
      Filename = NewName;
2269
  }
2270

2271
  // Search include directories.
2272
  bool IsMapped = false;
2273
  bool IsFrameworkFound = false;
2274
  ConstSearchDirIterator CurDir = nullptr;
2275
  SmallString<1024> SearchPath;
2276
  SmallString<1024> RelativePath;
2277
  // We get the raw path only if we have 'Callbacks' to which we later pass
2278
  // the path.
2279
  ModuleMap::KnownHeader SuggestedModule;
2280
  SourceLocation FilenameLoc = FilenameTok.getLocation();
2281
  StringRef LookupFilename = Filename;
2282

2283
  // Normalize slashes when compiling with -fms-extensions on non-Windows. This
2284
  // is unnecessary on Windows since the filesystem there handles backslashes.
2285
  SmallString<128> NormalizedPath;
2286
  llvm::sys::path::Style BackslashStyle = llvm::sys::path::Style::native;
2287
  if (is_style_posix(BackslashStyle) && LangOpts.MicrosoftExt) {
2288
    NormalizedPath = Filename.str();
2289
    llvm::sys::path::native(NormalizedPath);
2290
    LookupFilename = NormalizedPath;
2291
    BackslashStyle = llvm::sys::path::Style::windows;
2292
  }
2293

2294
  OptionalFileEntryRef File = LookupHeaderIncludeOrImport(
2295
      &CurDir, Filename, FilenameLoc, FilenameRange, FilenameTok,
2296
      IsFrameworkFound, IsImportDecl, IsMapped, LookupFrom, LookupFromFile,
2297
      LookupFilename, RelativePath, SearchPath, SuggestedModule, isAngled);
2298

2299
  if (usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) {
2300
    if (File && isPCHThroughHeader(&File->getFileEntry()))
2301
      SkippingUntilPCHThroughHeader = false;
2302
    return {ImportAction::None};
2303
  }
2304

2305
  // Should we enter the source file? Set to Skip if either the source file is
2306
  // known to have no effect beyond its effect on module visibility -- that is,
2307
  // if it's got an include guard that is already defined, set to Import if it
2308
  // is a modular header we've already built and should import.
2309

2310
  // For C++20 Modules
2311
  // [cpp.include]/7 If the header identified by the header-name denotes an
2312
  // importable header, it is implementation-defined whether the #include
2313
  // preprocessing directive is instead replaced by an import directive.
2314
  // For this implementation, the translation is permitted when we are parsing
2315
  // the Global Module Fragment, and not otherwise (the cases where it would be
2316
  // valid to replace an include with an import are highly constrained once in
2317
  // named module purview; this choice avoids considerable complexity in
2318
  // determining valid cases).
2319

2320
  enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
2321

2322
  if (PPOpts->SingleFileParseMode)
2323
    Action = IncludeLimitReached;
2324

2325
  // If we've reached the max allowed include depth, it is usually due to an
2326
  // include cycle. Don't enter already processed files again as it can lead to
2327
  // reaching the max allowed include depth again.
2328
  if (Action == Enter && HasReachedMaxIncludeDepth && File &&
2329
      alreadyIncluded(*File))
2330
    Action = IncludeLimitReached;
2331

2332
  // FIXME: We do not have a good way to disambiguate C++ clang modules from
2333
  // C++ standard modules (other than use/non-use of Header Units).
2334

2335
  Module *ModuleToImport = SuggestedModule.getModule();
2336

2337
  bool MaybeTranslateInclude = Action == Enter && File && ModuleToImport &&
2338
                               !ModuleToImport->isForBuilding(getLangOpts());
2339

2340
  // Maybe a usable Header Unit
2341
  bool UsableHeaderUnit = false;
2342
  if (getLangOpts().CPlusPlusModules && ModuleToImport &&
2343
      ModuleToImport->isHeaderUnit()) {
2344
    if (TrackGMFState.inGMF() || IsImportDecl)
2345
      UsableHeaderUnit = true;
2346
    else if (!IsImportDecl) {
2347
      // This is a Header Unit that we do not include-translate
2348
      ModuleToImport = nullptr;
2349
    }
2350
  }
2351
  // Maybe a usable clang header module.
2352
  bool UsableClangHeaderModule =
2353
      (getLangOpts().CPlusPlusModules || getLangOpts().Modules) &&
2354
      ModuleToImport && !ModuleToImport->isHeaderUnit();
2355

2356
  // Determine whether we should try to import the module for this #include, if
2357
  // there is one. Don't do so if precompiled module support is disabled or we
2358
  // are processing this module textually (because we're building the module).
2359
  if (MaybeTranslateInclude && (UsableHeaderUnit || UsableClangHeaderModule)) {
2360
    // If this include corresponds to a module but that module is
2361
    // unavailable, diagnose the situation and bail out.
2362
    // FIXME: Remove this; loadModule does the same check (but produces
2363
    // slightly worse diagnostics).
2364
    if (checkModuleIsAvailable(getLangOpts(), getTargetInfo(), *ModuleToImport,
2365
                               getDiagnostics())) {
2366
      Diag(FilenameTok.getLocation(),
2367
           diag::note_implicit_top_level_module_import_here)
2368
          << ModuleToImport->getTopLevelModuleName();
2369
      return {ImportAction::None};
2370
    }
2371

2372
    // Compute the module access path corresponding to this module.
2373
    // FIXME: Should we have a second loadModule() overload to avoid this
2374
    // extra lookup step?
2375
    SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2376
    for (Module *Mod = ModuleToImport; Mod; Mod = Mod->Parent)
2377
      Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
2378
                                    FilenameTok.getLocation()));
2379
    std::reverse(Path.begin(), Path.end());
2380

2381
    // Warn that we're replacing the include/import with a module import.
2382
    if (!IsImportDecl)
2383
      diagnoseAutoModuleImport(*this, StartLoc, IncludeTok, Path, CharEnd);
2384

2385
    // Load the module to import its macros. We'll make the declarations
2386
    // visible when the parser gets here.
2387
    // FIXME: Pass ModuleToImport in here rather than converting it to a path
2388
    // and making the module loader convert it back again.
2389
    ModuleLoadResult Imported = TheModuleLoader.loadModule(
2390
        IncludeTok.getLocation(), Path, Module::Hidden,
2391
        /*IsInclusionDirective=*/true);
2392
    assert((Imported == nullptr || Imported == ModuleToImport) &&
2393
           "the imported module is different than the suggested one");
2394

2395
    if (Imported) {
2396
      Action = Import;
2397
    } else if (Imported.isMissingExpected()) {
2398
      markClangModuleAsAffecting(
2399
          static_cast<Module *>(Imported)->getTopLevelModule());
2400
      // We failed to find a submodule that we assumed would exist (because it
2401
      // was in the directory of an umbrella header, for instance), but no
2402
      // actual module containing it exists (because the umbrella header is
2403
      // incomplete).  Treat this as a textual inclusion.
2404
      ModuleToImport = nullptr;
2405
    } else if (Imported.isConfigMismatch()) {
2406
      // On a configuration mismatch, enter the header textually. We still know
2407
      // that it's part of the corresponding module.
2408
    } else {
2409
      // We hit an error processing the import. Bail out.
2410
      if (hadModuleLoaderFatalFailure()) {
2411
        // With a fatal failure in the module loader, we abort parsing.
2412
        Token &Result = IncludeTok;
2413
        assert(CurLexer && "#include but no current lexer set!");
2414
        Result.startToken();
2415
        CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd, tok::eof);
2416
        CurLexer->cutOffLexing();
2417
      }
2418
      return {ImportAction::None};
2419
    }
2420
  }
2421

2422
  // The #included file will be considered to be a system header if either it is
2423
  // in a system include directory, or if the #includer is a system include
2424
  // header.
2425
  SrcMgr::CharacteristicKind FileCharacter =
2426
      SourceMgr.getFileCharacteristic(FilenameTok.getLocation());
2427
  if (File)
2428
    FileCharacter = std::max(HeaderInfo.getFileDirFlavor(*File), FileCharacter);
2429

2430
  // If this is a '#import' or an import-declaration, don't re-enter the file.
2431
  //
2432
  // FIXME: If we have a suggested module for a '#include', and we've already
2433
  // visited this file, don't bother entering it again. We know it has no
2434
  // further effect.
2435
  bool EnterOnce =
2436
      IsImportDecl ||
2437
      IncludeTok.getIdentifierInfo()->getPPKeywordID() == tok::pp_import;
2438

2439
  bool IsFirstIncludeOfFile = false;
2440

2441
  // Ask HeaderInfo if we should enter this #include file.  If not, #including
2442
  // this file will have no effect.
2443
  if (Action == Enter && File &&
2444
      !HeaderInfo.ShouldEnterIncludeFile(*this, *File, EnterOnce,
2445
                                         getLangOpts().Modules, ModuleToImport,
2446
                                         IsFirstIncludeOfFile)) {
2447
    // C++ standard modules:
2448
    // If we are not in the GMF, then we textually include only
2449
    // clang modules:
2450
    // Even if we've already preprocessed this header once and know that we
2451
    // don't need to see its contents again, we still need to import it if it's
2452
    // modular because we might not have imported it from this submodule before.
2453
    //
2454
    // FIXME: We don't do this when compiling a PCH because the AST
2455
    // serialization layer can't cope with it. This means we get local
2456
    // submodule visibility semantics wrong in that case.
2457
    if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2458
      Action = TrackGMFState.inGMF() ? Import : Skip;
2459
    else
2460
      Action = (ModuleToImport && !getLangOpts().CompilingPCH) ? Import : Skip;
2461
  }
2462

2463
  // Check for circular inclusion of the main file.
2464
  // We can't generate a consistent preamble with regard to the conditional
2465
  // stack if the main file is included again as due to the preamble bounds
2466
  // some directives (e.g. #endif of a header guard) will never be seen.
2467
  // Since this will lead to confusing errors, avoid the inclusion.
2468
  if (Action == Enter && File && PreambleConditionalStack.isRecording() &&
2469
      SourceMgr.isMainFile(File->getFileEntry())) {
2470
    Diag(FilenameTok.getLocation(),
2471
         diag::err_pp_including_mainfile_in_preamble);
2472
    return {ImportAction::None};
2473
  }
2474

2475
  if (Callbacks && !IsImportDecl) {
2476
    // Notify the callback object that we've seen an inclusion directive.
2477
    // FIXME: Use a different callback for a pp-import?
2478
    Callbacks->InclusionDirective(HashLoc, IncludeTok, LookupFilename, isAngled,
2479
                                  FilenameRange, File, SearchPath, RelativePath,
2480
                                  SuggestedModule.getModule(), Action == Import,
2481
                                  FileCharacter);
2482
    if (Action == Skip && File)
2483
      Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
2484
  }
2485

2486
  if (!File)
2487
    return {ImportAction::None};
2488

2489
  // If this is a C++20 pp-import declaration, diagnose if we didn't find any
2490
  // module corresponding to the named header.
2491
  if (IsImportDecl && !ModuleToImport) {
2492
    Diag(FilenameTok, diag::err_header_import_not_header_unit)
2493
      << OriginalFilename << File->getName();
2494
    return {ImportAction::None};
2495
  }
2496

2497
  // Issue a diagnostic if the name of the file on disk has a different case
2498
  // than the one we're about to open.
2499
  const bool CheckIncludePathPortability =
2500
      !IsMapped && !File->getFileEntry().tryGetRealPathName().empty();
2501

2502
  if (CheckIncludePathPortability) {
2503
    StringRef Name = LookupFilename;
2504
    StringRef NameWithoriginalSlashes = Filename;
2505
#if defined(_WIN32)
2506
    // Skip UNC prefix if present. (tryGetRealPathName() always
2507
    // returns a path with the prefix skipped.)
2508
    bool NameWasUNC = Name.consume_front("\\\\?\\");
2509
    NameWithoriginalSlashes.consume_front("\\\\?\\");
2510
#endif
2511
    StringRef RealPathName = File->getFileEntry().tryGetRealPathName();
2512
    SmallVector<StringRef, 16> Components(llvm::sys::path::begin(Name),
2513
                                          llvm::sys::path::end(Name));
2514
#if defined(_WIN32)
2515
    // -Wnonportable-include-path is designed to diagnose includes using
2516
    // case even on systems with a case-insensitive file system.
2517
    // On Windows, RealPathName always starts with an upper-case drive
2518
    // letter for absolute paths, but Name might start with either
2519
    // case depending on if `cd c:\foo` or `cd C:\foo` was used in the shell.
2520
    // ("foo" will always have on-disk case, no matter which case was
2521
    // used in the cd command). To not emit this warning solely for
2522
    // the drive letter, whose case is dependent on if `cd` is used
2523
    // with upper- or lower-case drive letters, always consider the
2524
    // given drive letter case as correct for the purpose of this warning.
2525
    SmallString<128> FixedDriveRealPath;
2526
    if (llvm::sys::path::is_absolute(Name) &&
2527
        llvm::sys::path::is_absolute(RealPathName) &&
2528
        toLowercase(Name[0]) == toLowercase(RealPathName[0]) &&
2529
        isLowercase(Name[0]) != isLowercase(RealPathName[0])) {
2530
      assert(Components.size() >= 3 && "should have drive, backslash, name");
2531
      assert(Components[0].size() == 2 && "should start with drive");
2532
      assert(Components[0][1] == ':' && "should have colon");
2533
      FixedDriveRealPath = (Name.substr(0, 1) + RealPathName.substr(1)).str();
2534
      RealPathName = FixedDriveRealPath;
2535
    }
2536
#endif
2537

2538
    if (trySimplifyPath(Components, RealPathName, BackslashStyle)) {
2539
      SmallString<128> Path;
2540
      Path.reserve(Name.size()+2);
2541
      Path.push_back(isAngled ? '<' : '"');
2542

2543
      const auto IsSep = [BackslashStyle](char c) {
2544
        return llvm::sys::path::is_separator(c, BackslashStyle);
2545
      };
2546

2547
      for (auto Component : Components) {
2548
        // On POSIX, Components will contain a single '/' as first element
2549
        // exactly if Name is an absolute path.
2550
        // On Windows, it will contain "C:" followed by '\' for absolute paths.
2551
        // The drive letter is optional for absolute paths on Windows, but
2552
        // clang currently cannot process absolute paths in #include lines that
2553
        // don't have a drive.
2554
        // If the first entry in Components is a directory separator,
2555
        // then the code at the bottom of this loop that keeps the original
2556
        // directory separator style copies it. If the second entry is
2557
        // a directory separator (the C:\ case), then that separator already
2558
        // got copied when the C: was processed and we want to skip that entry.
2559
        if (!(Component.size() == 1 && IsSep(Component[0])))
2560
          Path.append(Component);
2561
        else if (Path.size() != 1)
2562
          continue;
2563

2564
        // Append the separator(s) the user used, or the close quote
2565
        if (Path.size() > NameWithoriginalSlashes.size()) {
2566
          Path.push_back(isAngled ? '>' : '"');
2567
          continue;
2568
        }
2569
        assert(IsSep(NameWithoriginalSlashes[Path.size()-1]));
2570
        do
2571
          Path.push_back(NameWithoriginalSlashes[Path.size()-1]);
2572
        while (Path.size() <= NameWithoriginalSlashes.size() &&
2573
               IsSep(NameWithoriginalSlashes[Path.size()-1]));
2574
      }
2575

2576
#if defined(_WIN32)
2577
      // Restore UNC prefix if it was there.
2578
      if (NameWasUNC)
2579
        Path = (Path.substr(0, 1) + "\\\\?\\" + Path.substr(1)).str();
2580
#endif
2581

2582
      // For user files and known standard headers, issue a diagnostic.
2583
      // For other system headers, don't. They can be controlled separately.
2584
      auto DiagId =
2585
          (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name))
2586
              ? diag::pp_nonportable_path
2587
              : diag::pp_nonportable_system_path;
2588
      Diag(FilenameTok, DiagId) << Path <<
2589
        FixItHint::CreateReplacement(FilenameRange, Path);
2590
    }
2591
  }
2592

2593
  switch (Action) {
2594
  case Skip:
2595
    // If we don't need to enter the file, stop now.
2596
    if (ModuleToImport)
2597
      return {ImportAction::SkippedModuleImport, ModuleToImport};
2598
    return {ImportAction::None};
2599

2600
  case IncludeLimitReached:
2601
    // If we reached our include limit and don't want to enter any more files,
2602
    // don't go any further.
2603
    return {ImportAction::None};
2604

2605
  case Import: {
2606
    // If this is a module import, make it visible if needed.
2607
    assert(ModuleToImport && "no module to import");
2608

2609
    makeModuleVisible(ModuleToImport, EndLoc);
2610

2611
    if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
2612
        tok::pp___include_macros)
2613
      return {ImportAction::None};
2614

2615
    return {ImportAction::ModuleImport, ModuleToImport};
2616
  }
2617

2618
  case Enter:
2619
    break;
2620
  }
2621

2622
  // Check that we don't have infinite #include recursion.
2623
  if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) {
2624
    Diag(FilenameTok, diag::err_pp_include_too_deep);
2625
    HasReachedMaxIncludeDepth = true;
2626
    return {ImportAction::None};
2627
  }
2628

2629
  if (isAngled && isInNamedModule())
2630
    Diag(FilenameTok, diag::warn_pp_include_angled_in_module_purview)
2631
        << getNamedModuleName();
2632

2633
  // Look up the file, create a File ID for it.
2634
  SourceLocation IncludePos = FilenameTok.getLocation();
2635
  // If the filename string was the result of macro expansions, set the include
2636
  // position on the file where it will be included and after the expansions.
2637
  if (IncludePos.isMacroID())
2638
    IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
2639
  FileID FID = SourceMgr.createFileID(*File, IncludePos, FileCharacter);
2640
  if (!FID.isValid()) {
2641
    TheModuleLoader.HadFatalFailure = true;
2642
    return ImportAction::Failure;
2643
  }
2644

2645
  // If all is good, enter the new file!
2646
  if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation(),
2647
                      IsFirstIncludeOfFile))
2648
    return {ImportAction::None};
2649

2650
  // Determine if we're switching to building a new submodule, and which one.
2651
  // This does not apply for C++20 modules header units.
2652
  if (ModuleToImport && !ModuleToImport->isHeaderUnit()) {
2653
    if (ModuleToImport->getTopLevelModule()->ShadowingModule) {
2654
      // We are building a submodule that belongs to a shadowed module. This
2655
      // means we find header files in the shadowed module.
2656
      Diag(ModuleToImport->DefinitionLoc,
2657
           diag::err_module_build_shadowed_submodule)
2658
          << ModuleToImport->getFullModuleName();
2659
      Diag(ModuleToImport->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2660
           diag::note_previous_definition);
2661
      return {ImportAction::None};
2662
    }
2663
    // When building a pch, -fmodule-name tells the compiler to textually
2664
    // include headers in the specified module. We are not building the
2665
    // specified module.
2666
    //
2667
    // FIXME: This is the wrong way to handle this. We should produce a PCH
2668
    // that behaves the same as the header would behave in a compilation using
2669
    // that PCH, which means we should enter the submodule. We need to teach
2670
    // the AST serialization layer to deal with the resulting AST.
2671
    if (getLangOpts().CompilingPCH &&
2672
        ModuleToImport->isForBuilding(getLangOpts()))
2673
      return {ImportAction::None};
2674

2675
    assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2676
    CurLexerSubmodule = ModuleToImport;
2677

2678
    // Let the macro handling code know that any future macros are within
2679
    // the new submodule.
2680
    EnterSubmodule(ModuleToImport, EndLoc, /*ForPragma*/ false);
2681

2682
    // Let the parser know that any future declarations are within the new
2683
    // submodule.
2684
    // FIXME: There's no point doing this if we're handling a #__include_macros
2685
    // directive.
2686
    return {ImportAction::ModuleBegin, ModuleToImport};
2687
  }
2688

2689
  assert(!IsImportDecl && "failed to diagnose missing module for import decl");
2690
  return {ImportAction::None};
2691
}
2692

2693
/// HandleIncludeNextDirective - Implements \#include_next.
2694
///
2695
void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc,
2696
                                              Token &IncludeNextTok) {
2697
  Diag(IncludeNextTok, diag::ext_pp_include_next_directive);
2698

2699
  ConstSearchDirIterator Lookup = nullptr;
2700
  const FileEntry *LookupFromFile;
2701
  std::tie(Lookup, LookupFromFile) = getIncludeNextStart(IncludeNextTok);
2702

2703
  return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup,
2704
                                LookupFromFile);
2705
}
2706

2707
/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode
2708
void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) {
2709
  // The Microsoft #import directive takes a type library and generates header
2710
  // files from it, and includes those.  This is beyond the scope of what clang
2711
  // does, so we ignore it and error out.  However, #import can optionally have
2712
  // trailing attributes that span multiple lines.  We're going to eat those
2713
  // so we can continue processing from there.
2714
  Diag(Tok, diag::err_pp_import_directive_ms );
2715

2716
  // Read tokens until we get to the end of the directive.  Note that the
2717
  // directive can be split over multiple lines using the backslash character.
2718
  DiscardUntilEndOfDirective();
2719
}
2720

2721
/// HandleImportDirective - Implements \#import.
2722
///
2723
void Preprocessor::HandleImportDirective(SourceLocation HashLoc,
2724
                                         Token &ImportTok) {
2725
  if (!LangOpts.ObjC) {  // #import is standard for ObjC.
2726
    if (LangOpts.MSVCCompat)
2727
      return HandleMicrosoftImportDirective(ImportTok);
2728
    Diag(ImportTok, diag::ext_pp_import_directive);
2729
  }
2730
  return HandleIncludeDirective(HashLoc, ImportTok);
2731
}
2732

2733
/// HandleIncludeMacrosDirective - The -imacros command line option turns into a
2734
/// pseudo directive in the predefines buffer.  This handles it by sucking all
2735
/// tokens through the preprocessor and discarding them (only keeping the side
2736
/// effects on the preprocessor).
2737
void Preprocessor::HandleIncludeMacrosDirective(SourceLocation HashLoc,
2738
                                                Token &IncludeMacrosTok) {
2739
  // This directive should only occur in the predefines buffer.  If not, emit an
2740
  // error and reject it.
2741
  SourceLocation Loc = IncludeMacrosTok.getLocation();
2742
  if (SourceMgr.getBufferName(Loc) != "<built-in>") {
2743
    Diag(IncludeMacrosTok.getLocation(),
2744
         diag::pp_include_macros_out_of_predefines);
2745
    DiscardUntilEndOfDirective();
2746
    return;
2747
  }
2748

2749
  // Treat this as a normal #include for checking purposes.  If this is
2750
  // successful, it will push a new lexer onto the include stack.
2751
  HandleIncludeDirective(HashLoc, IncludeMacrosTok);
2752

2753
  Token TmpTok;
2754
  do {
2755
    Lex(TmpTok);
2756
    assert(TmpTok.isNot(tok::eof) && "Didn't find end of -imacros!");
2757
  } while (TmpTok.isNot(tok::hashhash));
2758
}
2759

2760
//===----------------------------------------------------------------------===//
2761
// Preprocessor Macro Directive Handling.
2762
//===----------------------------------------------------------------------===//
2763

2764
/// ReadMacroParameterList - The ( starting a parameter list of a macro
2765
/// definition has just been read.  Lex the rest of the parameters and the
2766
/// closing ), updating MI with what we learn.  Return true if an error occurs
2767
/// parsing the param list.
2768
bool Preprocessor::ReadMacroParameterList(MacroInfo *MI, Token &Tok) {
2769
  SmallVector<IdentifierInfo*, 32> Parameters;
2770

2771
  while (true) {
2772
    LexUnexpandedNonComment(Tok);
2773
    switch (Tok.getKind()) {
2774
    case tok::r_paren:
2775
      // Found the end of the parameter list.
2776
      if (Parameters.empty())  // #define FOO()
2777
        return false;
2778
      // Otherwise we have #define FOO(A,)
2779
      Diag(Tok, diag::err_pp_expected_ident_in_arg_list);
2780
      return true;
2781
    case tok::ellipsis:  // #define X(... -> C99 varargs
2782
      if (!LangOpts.C99)
2783
        Diag(Tok, LangOpts.CPlusPlus11 ?
2784
             diag::warn_cxx98_compat_variadic_macro :
2785
             diag::ext_variadic_macro);
2786

2787
      // OpenCL v1.2 s6.9.e: variadic macros are not supported.
2788
      if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus) {
2789
        Diag(Tok, diag::ext_pp_opencl_variadic_macros);
2790
      }
2791

2792
      // Lex the token after the identifier.
2793
      LexUnexpandedNonComment(Tok);
2794
      if (Tok.isNot(tok::r_paren)) {
2795
        Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2796
        return true;
2797
      }
2798
      // Add the __VA_ARGS__ identifier as a parameter.
2799
      Parameters.push_back(Ident__VA_ARGS__);
2800
      MI->setIsC99Varargs();
2801
      MI->setParameterList(Parameters, BP);
2802
      return false;
2803
    case tok::eod:  // #define X(
2804
      Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2805
      return true;
2806
    default:
2807
      // Handle keywords and identifiers here to accept things like
2808
      // #define Foo(for) for.
2809
      IdentifierInfo *II = Tok.getIdentifierInfo();
2810
      if (!II) {
2811
        // #define X(1
2812
        Diag(Tok, diag::err_pp_invalid_tok_in_arg_list);
2813
        return true;
2814
      }
2815

2816
      // If this is already used as a parameter, it is used multiple times (e.g.
2817
      // #define X(A,A.
2818
      if (llvm::is_contained(Parameters, II)) { // C99 6.10.3p6
2819
        Diag(Tok, diag::err_pp_duplicate_name_in_arg_list) << II;
2820
        return true;
2821
      }
2822

2823
      // Add the parameter to the macro info.
2824
      Parameters.push_back(II);
2825

2826
      // Lex the token after the identifier.
2827
      LexUnexpandedNonComment(Tok);
2828

2829
      switch (Tok.getKind()) {
2830
      default:          // #define X(A B
2831
        Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
2832
        return true;
2833
      case tok::r_paren: // #define X(A)
2834
        MI->setParameterList(Parameters, BP);
2835
        return false;
2836
      case tok::comma:  // #define X(A,
2837
        break;
2838
      case tok::ellipsis:  // #define X(A... -> GCC extension
2839
        // Diagnose extension.
2840
        Diag(Tok, diag::ext_named_variadic_macro);
2841

2842
        // Lex the token after the identifier.
2843
        LexUnexpandedNonComment(Tok);
2844
        if (Tok.isNot(tok::r_paren)) {
2845
          Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
2846
          return true;
2847
        }
2848

2849
        MI->setIsGNUVarargs();
2850
        MI->setParameterList(Parameters, BP);
2851
        return false;
2852
      }
2853
    }
2854
  }
2855
}
2856

2857
static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
2858
                                   const LangOptions &LOptions) {
2859
  if (MI->getNumTokens() == 1) {
2860
    const Token &Value = MI->getReplacementToken(0);
2861

2862
    // Macro that is identity, like '#define inline inline' is a valid pattern.
2863
    if (MacroName.getKind() == Value.getKind())
2864
      return true;
2865

2866
    // Macro that maps a keyword to the same keyword decorated with leading/
2867
    // trailing underscores is a valid pattern:
2868
    //    #define inline __inline
2869
    //    #define inline __inline__
2870
    //    #define inline _inline (in MS compatibility mode)
2871
    StringRef MacroText = MacroName.getIdentifierInfo()->getName();
2872
    if (IdentifierInfo *II = Value.getIdentifierInfo()) {
2873
      if (!II->isKeyword(LOptions))
2874
        return false;
2875
      StringRef ValueText = II->getName();
2876
      StringRef TrimmedValue = ValueText;
2877
      if (!ValueText.starts_with("__")) {
2878
        if (ValueText.starts_with("_"))
2879
          TrimmedValue = TrimmedValue.drop_front(1);
2880
        else
2881
          return false;
2882
      } else {
2883
        TrimmedValue = TrimmedValue.drop_front(2);
2884
        if (TrimmedValue.ends_with("__"))
2885
          TrimmedValue = TrimmedValue.drop_back(2);
2886
      }
2887
      return TrimmedValue == MacroText;
2888
    } else {
2889
      return false;
2890
    }
2891
  }
2892

2893
  // #define inline
2894
  return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
2895
                           tok::kw_const) &&
2896
         MI->getNumTokens() == 0;
2897
}
2898

2899
// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2900
// entire line) of the macro's tokens and adds them to MacroInfo, and while
2901
// doing so performs certain validity checks including (but not limited to):
2902
//   - # (stringization) is followed by a macro parameter
2903
//
2904
//  Returns a nullptr if an invalid sequence of tokens is encountered or returns
2905
//  a pointer to a MacroInfo object.
2906

2907
MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
2908
    const Token &MacroNameTok, const bool ImmediatelyAfterHeaderGuard) {
2909

2910
  Token LastTok = MacroNameTok;
2911
  // Create the new macro.
2912
  MacroInfo *const MI = AllocateMacroInfo(MacroNameTok.getLocation());
2913

2914
  Token Tok;
2915
  LexUnexpandedToken(Tok);
2916

2917
  // Ensure we consume the rest of the macro body if errors occur.
2918
  auto _ = llvm::make_scope_exit([&]() {
2919
    // The flag indicates if we are still waiting for 'eod'.
2920
    if (CurLexer->ParsingPreprocessorDirective)
2921
      DiscardUntilEndOfDirective();
2922
  });
2923

2924
  // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
2925
  // within their appropriate context.
2926
  VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
2927

2928
  // If this is a function-like macro definition, parse the argument list,
2929
  // marking each of the identifiers as being used as macro arguments.  Also,
2930
  // check other constraints on the first token of the macro body.
2931
  if (Tok.is(tok::eod)) {
2932
    if (ImmediatelyAfterHeaderGuard) {
2933
      // Save this macro information since it may part of a header guard.
2934
      CurPPLexer->MIOpt.SetDefinedMacro(MacroNameTok.getIdentifierInfo(),
2935
                                        MacroNameTok.getLocation());
2936
    }
2937
    // If there is no body to this macro, we have no special handling here.
2938
  } else if (Tok.hasLeadingSpace()) {
2939
    // This is a normal token with leading space.  Clear the leading space
2940
    // marker on the first token to get proper expansion.
2941
    Tok.clearFlag(Token::LeadingSpace);
2942
  } else if (Tok.is(tok::l_paren)) {
2943
    // This is a function-like macro definition.  Read the argument list.
2944
    MI->setIsFunctionLike();
2945
    if (ReadMacroParameterList(MI, LastTok))
2946
      return nullptr;
2947

2948
    // If this is a definition of an ISO C/C++ variadic function-like macro (not
2949
    // using the GNU named varargs extension) inform our variadic scope guard
2950
    // which un-poisons and re-poisons certain identifiers (e.g. __VA_ARGS__)
2951
    // allowed only within the definition of a variadic macro.
2952

2953
    if (MI->isC99Varargs()) {
2954
      VariadicMacroScopeGuard.enterScope();
2955
    }
2956

2957
    // Read the first token after the arg list for down below.
2958
    LexUnexpandedToken(Tok);
2959
  } else if (LangOpts.C99 || LangOpts.CPlusPlus11) {
2960
    // C99 requires whitespace between the macro definition and the body.  Emit
2961
    // a diagnostic for something like "#define X+".
2962
    Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name);
2963
  } else {
2964
    // C90 6.8 TC1 says: "In the definition of an object-like macro, if the
2965
    // first character of a replacement list is not a character required by
2966
    // subclause 5.2.1, then there shall be white-space separation between the
2967
    // identifier and the replacement list.".  5.2.1 lists this set:
2968
    //   "A-Za-z0-9!"#%&'()*+,_./:;<=>?[\]^_{|}~" as well as whitespace, which
2969
    // is irrelevant here.
2970
    bool isInvalid = false;
2971
    if (Tok.is(tok::at)) // @ is not in the list above.
2972
      isInvalid = true;
2973
    else if (Tok.is(tok::unknown)) {
2974
      // If we have an unknown token, it is something strange like "`".  Since
2975
      // all of valid characters would have lexed into a single character
2976
      // token of some sort, we know this is not a valid case.
2977
      isInvalid = true;
2978
    }
2979
    if (isInvalid)
2980
      Diag(Tok, diag::ext_missing_whitespace_after_macro_name);
2981
    else
2982
      Diag(Tok, diag::warn_missing_whitespace_after_macro_name);
2983
  }
2984

2985
  if (!Tok.is(tok::eod))
2986
    LastTok = Tok;
2987

2988
  SmallVector<Token, 16> Tokens;
2989

2990
  // Read the rest of the macro body.
2991
  if (MI->isObjectLike()) {
2992
    // Object-like macros are very simple, just read their body.
2993
    while (Tok.isNot(tok::eod)) {
2994
      LastTok = Tok;
2995
      Tokens.push_back(Tok);
2996
      // Get the next token of the macro.
2997
      LexUnexpandedToken(Tok);
2998
    }
2999
  } else {
3000
    // Otherwise, read the body of a function-like macro.  While we are at it,
3001
    // check C99 6.10.3.2p1: ensure that # operators are followed by macro
3002
    // parameters in function-like macro expansions.
3003

3004
    VAOptDefinitionContext VAOCtx(*this);
3005

3006
    while (Tok.isNot(tok::eod)) {
3007
      LastTok = Tok;
3008

3009
      if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
3010
        Tokens.push_back(Tok);
3011

3012
        if (VAOCtx.isVAOptToken(Tok)) {
3013
          // If we're already within a VAOPT, emit an error.
3014
          if (VAOCtx.isInVAOpt()) {
3015
            Diag(Tok, diag::err_pp_vaopt_nested_use);
3016
            return nullptr;
3017
          }
3018
          // Ensure VAOPT is followed by a '(' .
3019
          LexUnexpandedToken(Tok);
3020
          if (Tok.isNot(tok::l_paren)) {
3021
            Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
3022
            return nullptr;
3023
          }
3024
          Tokens.push_back(Tok);
3025
          VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
3026
          LexUnexpandedToken(Tok);
3027
          if (Tok.is(tok::hashhash)) {
3028
            Diag(Tok, diag::err_vaopt_paste_at_start);
3029
            return nullptr;
3030
          }
3031
          continue;
3032
        } else if (VAOCtx.isInVAOpt()) {
3033
          if (Tok.is(tok::r_paren)) {
3034
            if (VAOCtx.sawClosingParen()) {
3035
              assert(Tokens.size() >= 3 &&
3036
                     "Must have seen at least __VA_OPT__( "
3037
                     "and a subsequent tok::r_paren");
3038
              if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
3039
                Diag(Tok, diag::err_vaopt_paste_at_end);
3040
                return nullptr;
3041
              }
3042
            }
3043
          } else if (Tok.is(tok::l_paren)) {
3044
            VAOCtx.sawOpeningParen(Tok.getLocation());
3045
          }
3046
        }
3047
        // Get the next token of the macro.
3048
        LexUnexpandedToken(Tok);
3049
        continue;
3050
      }
3051

3052
      // If we're in -traditional mode, then we should ignore stringification
3053
      // and token pasting. Mark the tokens as unknown so as not to confuse
3054
      // things.
3055
      if (getLangOpts().TraditionalCPP) {
3056
        Tok.setKind(tok::unknown);
3057
        Tokens.push_back(Tok);
3058

3059
        // Get the next token of the macro.
3060
        LexUnexpandedToken(Tok);
3061
        continue;
3062
      }
3063

3064
      if (Tok.is(tok::hashhash)) {
3065
        // If we see token pasting, check if it looks like the gcc comma
3066
        // pasting extension.  We'll use this information to suppress
3067
        // diagnostics later on.
3068

3069
        // Get the next token of the macro.
3070
        LexUnexpandedToken(Tok);
3071

3072
        if (Tok.is(tok::eod)) {
3073
          Tokens.push_back(LastTok);
3074
          break;
3075
        }
3076

3077
        if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
3078
            Tokens[Tokens.size() - 1].is(tok::comma))
3079
          MI->setHasCommaPasting();
3080

3081
        // Things look ok, add the '##' token to the macro.
3082
        Tokens.push_back(LastTok);
3083
        continue;
3084
      }
3085

3086
      // Our Token is a stringization operator.
3087
      // Get the next token of the macro.
3088
      LexUnexpandedToken(Tok);
3089

3090
      // Check for a valid macro arg identifier or __VA_OPT__.
3091
      if (!VAOCtx.isVAOptToken(Tok) &&
3092
          (Tok.getIdentifierInfo() == nullptr ||
3093
           MI->getParameterNum(Tok.getIdentifierInfo()) == -1)) {
3094

3095
        // If this is assembler-with-cpp mode, we accept random gibberish after
3096
        // the '#' because '#' is often a comment character.  However, change
3097
        // the kind of the token to tok::unknown so that the preprocessor isn't
3098
        // confused.
3099
        if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
3100
          LastTok.setKind(tok::unknown);
3101
          Tokens.push_back(LastTok);
3102
          continue;
3103
        } else {
3104
          Diag(Tok, diag::err_pp_stringize_not_parameter)
3105
            << LastTok.is(tok::hashat);
3106
          return nullptr;
3107
        }
3108
      }
3109

3110
      // Things look ok, add the '#' and param name tokens to the macro.
3111
      Tokens.push_back(LastTok);
3112

3113
      // If the token following '#' is VAOPT, let the next iteration handle it
3114
      // and check it for correctness, otherwise add the token and prime the
3115
      // loop with the next one.
3116
      if (!VAOCtx.isVAOptToken(Tok)) {
3117
        Tokens.push_back(Tok);
3118
        LastTok = Tok;
3119

3120
        // Get the next token of the macro.
3121
        LexUnexpandedToken(Tok);
3122
      }
3123
    }
3124
    if (VAOCtx.isInVAOpt()) {
3125
      assert(Tok.is(tok::eod) && "Must be at End Of preprocessing Directive");
3126
      Diag(Tok, diag::err_pp_expected_after)
3127
        << LastTok.getKind() << tok::r_paren;
3128
      Diag(VAOCtx.getUnmatchedOpeningParenLoc(), diag::note_matching) << tok::l_paren;
3129
      return nullptr;
3130
    }
3131
  }
3132
  MI->setDefinitionEndLoc(LastTok.getLocation());
3133

3134
  MI->setTokens(Tokens, BP);
3135
  return MI;
3136
}
3137

3138
static bool isObjCProtectedMacro(const IdentifierInfo *II) {
3139
  return II->isStr("__strong") || II->isStr("__weak") ||
3140
         II->isStr("__unsafe_unretained") || II->isStr("__autoreleasing");
3141
}
3142

3143
/// HandleDefineDirective - Implements \#define.  This consumes the entire macro
3144
/// line then lets the caller lex the next real token.
3145
void Preprocessor::HandleDefineDirective(
3146
    Token &DefineTok, const bool ImmediatelyAfterHeaderGuard) {
3147
  ++NumDefined;
3148

3149
  Token MacroNameTok;
3150
  bool MacroShadowsKeyword;
3151
  ReadMacroName(MacroNameTok, MU_Define, &MacroShadowsKeyword);
3152

3153
  // Error reading macro name?  If so, diagnostic already issued.
3154
  if (MacroNameTok.is(tok::eod))
3155
    return;
3156

3157
  IdentifierInfo *II = MacroNameTok.getIdentifierInfo();
3158
  // Issue a final pragma warning if we're defining a macro that was has been
3159
  // undefined and is being redefined.
3160
  if (!II->hasMacroDefinition() && II->hadMacroDefinition() && II->isFinal())
3161
    emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3162

3163
  // If we are supposed to keep comments in #defines, reenable comment saving
3164
  // mode.
3165
  if (CurLexer) CurLexer->SetCommentRetentionState(KeepMacroComments);
3166

3167
  MacroInfo *const MI = ReadOptionalMacroParameterListAndBody(
3168
      MacroNameTok, ImmediatelyAfterHeaderGuard);
3169

3170
  if (!MI) return;
3171

3172
  if (MacroShadowsKeyword &&
3173
      !isConfigurationPattern(MacroNameTok, MI, getLangOpts())) {
3174
    Diag(MacroNameTok, diag::warn_pp_macro_hides_keyword);
3175
  }
3176
  // Check that there is no paste (##) operator at the beginning or end of the
3177
  // replacement list.
3178
  unsigned NumTokens = MI->getNumTokens();
3179
  if (NumTokens != 0) {
3180
    if (MI->getReplacementToken(0).is(tok::hashhash)) {
3181
      Diag(MI->getReplacementToken(0), diag::err_paste_at_start);
3182
      return;
3183
    }
3184
    if (MI->getReplacementToken(NumTokens-1).is(tok::hashhash)) {
3185
      Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end);
3186
      return;
3187
    }
3188
  }
3189

3190
  // When skipping just warn about macros that do not match.
3191
  if (SkippingUntilPCHThroughHeader) {
3192
    const MacroInfo *OtherMI = getMacroInfo(MacroNameTok.getIdentifierInfo());
3193
    if (!OtherMI || !MI->isIdenticalTo(*OtherMI, *this,
3194
                             /*Syntactic=*/LangOpts.MicrosoftExt))
3195
      Diag(MI->getDefinitionLoc(), diag::warn_pp_macro_def_mismatch_with_pch)
3196
          << MacroNameTok.getIdentifierInfo();
3197
    // Issue the diagnostic but allow the change if msvc extensions are enabled
3198
    if (!LangOpts.MicrosoftExt)
3199
      return;
3200
  }
3201

3202
  // Finally, if this identifier already had a macro defined for it, verify that
3203
  // the macro bodies are identical, and issue diagnostics if they are not.
3204
  if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
3205
    // Final macros are hard-mode: they always warn. Even if the bodies are
3206
    // identical. Even if they are in system headers. Even if they are things we
3207
    // would silently allow in the past.
3208
    if (MacroNameTok.getIdentifierInfo()->isFinal())
3209
      emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/false);
3210

3211
    // In Objective-C, ignore attempts to directly redefine the builtin
3212
    // definitions of the ownership qualifiers.  It's still possible to
3213
    // #undef them.
3214
    if (getLangOpts().ObjC &&
3215
        SourceMgr.getFileID(OtherMI->getDefinitionLoc()) ==
3216
            getPredefinesFileID() &&
3217
        isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
3218
      // Warn if it changes the tokens.
3219
      if ((!getDiagnostics().getSuppressSystemWarnings() ||
3220
           !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
3221
          !MI->isIdenticalTo(*OtherMI, *this,
3222
                             /*Syntactic=*/LangOpts.MicrosoftExt)) {
3223
        Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
3224
      }
3225
      assert(!OtherMI->isWarnIfUnused());
3226
      return;
3227
    }
3228

3229
    // It is very common for system headers to have tons of macro redefinitions
3230
    // and for warnings to be disabled in system headers.  If this is the case,
3231
    // then don't bother calling MacroInfo::isIdenticalTo.
3232
    if (!getDiagnostics().getSuppressSystemWarnings() ||
3233
        !SourceMgr.isInSystemHeader(DefineTok.getLocation())) {
3234

3235
      if (!OtherMI->isUsed() && OtherMI->isWarnIfUnused())
3236
        Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used);
3237

3238
      // Warn if defining "__LINE__" and other builtins, per C99 6.10.8/4 and
3239
      // C++ [cpp.predefined]p4, but allow it as an extension.
3240
      if (isLanguageDefinedBuiltin(SourceMgr, OtherMI, II->getName()))
3241
        Diag(MacroNameTok, diag::ext_pp_redef_builtin_macro);
3242
      // Macros must be identical.  This means all tokens and whitespace
3243
      // separation must be the same.  C99 6.10.3p2.
3244
      else if (!OtherMI->isAllowRedefinitionsWithoutWarning() &&
3245
               !MI->isIdenticalTo(*OtherMI, *this, /*Syntactic=*/LangOpts.MicrosoftExt)) {
3246
        Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef)
3247
          << MacroNameTok.getIdentifierInfo();
3248
        Diag(OtherMI->getDefinitionLoc(), diag::note_previous_definition);
3249
      }
3250
    }
3251
    if (OtherMI->isWarnIfUnused())
3252
      WarnUnusedMacroLocs.erase(OtherMI->getDefinitionLoc());
3253
  }
3254

3255
  DefMacroDirective *MD =
3256
      appendDefMacroDirective(MacroNameTok.getIdentifierInfo(), MI);
3257

3258
  assert(!MI->isUsed());
3259
  // If we need warning for not using the macro, add its location in the
3260
  // warn-because-unused-macro set. If it gets used it will be removed from set.
3261
  if (getSourceManager().isInMainFile(MI->getDefinitionLoc()) &&
3262
      !Diags->isIgnored(diag::pp_macro_not_used, MI->getDefinitionLoc()) &&
3263
      !MacroExpansionInDirectivesOverride &&
3264
      getSourceManager().getFileID(MI->getDefinitionLoc()) !=
3265
          getPredefinesFileID()) {
3266
    MI->setIsWarnIfUnused(true);
3267
    WarnUnusedMacroLocs.insert(MI->getDefinitionLoc());
3268
  }
3269

3270
  // If the callbacks want to know, tell them about the macro definition.
3271
  if (Callbacks)
3272
    Callbacks->MacroDefined(MacroNameTok, MD);
3273

3274
  // If we're in MS compatibility mode and the macro being defined is the
3275
  // assert macro, implicitly add a macro definition for static_assert to work
3276
  // around their broken assert.h header file in C. Only do so if there isn't
3277
  // already a static_assert macro defined.
3278
  if (!getLangOpts().CPlusPlus && getLangOpts().MSVCCompat &&
3279
      MacroNameTok.getIdentifierInfo()->isStr("assert") &&
3280
      !isMacroDefined("static_assert")) {
3281
    MacroInfo *MI = AllocateMacroInfo(SourceLocation());
3282

3283
    Token Tok;
3284
    Tok.startToken();
3285
    Tok.setKind(tok::kw__Static_assert);
3286
    Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
3287
    MI->setTokens({Tok}, BP);
3288
    (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
3289
  }
3290
}
3291

3292
/// HandleUndefDirective - Implements \#undef.
3293
///
3294
void Preprocessor::HandleUndefDirective() {
3295
  ++NumUndefined;
3296

3297
  Token MacroNameTok;
3298
  ReadMacroName(MacroNameTok, MU_Undef);
3299

3300
  // Error reading macro name?  If so, diagnostic already issued.
3301
  if (MacroNameTok.is(tok::eod))
3302
    return;
3303

3304
  // Check to see if this is the last token on the #undef line.
3305
  CheckEndOfDirective("undef");
3306

3307
  // Okay, we have a valid identifier to undef.
3308
  auto *II = MacroNameTok.getIdentifierInfo();
3309
  auto MD = getMacroDefinition(II);
3310
  UndefMacroDirective *Undef = nullptr;
3311

3312
  if (II->isFinal())
3313
    emitFinalMacroWarning(MacroNameTok, /*IsUndef=*/true);
3314

3315
  // If the macro is not defined, this is a noop undef.
3316
  if (const MacroInfo *MI = MD.getMacroInfo()) {
3317
    if (!MI->isUsed() && MI->isWarnIfUnused())
3318
      Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used);
3319

3320
    // Warn if undefining "__LINE__" and other builtins, per C99 6.10.8/4 and
3321
    // C++ [cpp.predefined]p4, but allow it as an extension.
3322
    if (isLanguageDefinedBuiltin(SourceMgr, MI, II->getName()))
3323
      Diag(MacroNameTok, diag::ext_pp_undef_builtin_macro);
3324

3325
    if (MI->isWarnIfUnused())
3326
      WarnUnusedMacroLocs.erase(MI->getDefinitionLoc());
3327

3328
    Undef = AllocateUndefMacroDirective(MacroNameTok.getLocation());
3329
  }
3330

3331
  // If the callbacks want to know, tell them about the macro #undef.
3332
  // Note: no matter if the macro was defined or not.
3333
  if (Callbacks)
3334
    Callbacks->MacroUndefined(MacroNameTok, MD, Undef);
3335

3336
  if (Undef)
3337
    appendMacroDirective(II, Undef);
3338
}
3339

3340
//===----------------------------------------------------------------------===//
3341
// Preprocessor Conditional Directive Handling.
3342
//===----------------------------------------------------------------------===//
3343

3344
/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive.  isIfndef
3345
/// is true when this is a \#ifndef directive.  ReadAnyTokensBeforeDirective is
3346
/// true if any tokens have been returned or pp-directives activated before this
3347
/// \#ifndef has been lexed.
3348
///
3349
void Preprocessor::HandleIfdefDirective(Token &Result,
3350
                                        const Token &HashToken,
3351
                                        bool isIfndef,
3352
                                        bool ReadAnyTokensBeforeDirective) {
3353
  ++NumIf;
3354
  Token DirectiveTok = Result;
3355

3356
  Token MacroNameTok;
3357
  ReadMacroName(MacroNameTok);
3358

3359
  // Error reading macro name?  If so, diagnostic already issued.
3360
  if (MacroNameTok.is(tok::eod)) {
3361
    // Skip code until we get to #endif.  This helps with recovery by not
3362
    // emitting an error when the #endif is reached.
3363
    SkipExcludedConditionalBlock(HashToken.getLocation(),
3364
                                 DirectiveTok.getLocation(),
3365
                                 /*Foundnonskip*/ false, /*FoundElse*/ false);
3366
    return;
3367
  }
3368

3369
  emitMacroExpansionWarnings(MacroNameTok, /*IsIfnDef=*/true);
3370

3371
  // Check to see if this is the last token on the #if[n]def line.
3372
  CheckEndOfDirective(isIfndef ? "ifndef" : "ifdef");
3373

3374
  IdentifierInfo *MII = MacroNameTok.getIdentifierInfo();
3375
  auto MD = getMacroDefinition(MII);
3376
  MacroInfo *MI = MD.getMacroInfo();
3377

3378
  if (CurPPLexer->getConditionalStackDepth() == 0) {
3379
    // If the start of a top-level #ifdef and if the macro is not defined,
3380
    // inform MIOpt that this might be the start of a proper include guard.
3381
    // Otherwise it is some other form of unknown conditional which we can't
3382
    // handle.
3383
    if (!ReadAnyTokensBeforeDirective && !MI) {
3384
      assert(isIfndef && "#ifdef shouldn't reach here");
3385
      CurPPLexer->MIOpt.EnterTopLevelIfndef(MII, MacroNameTok.getLocation());
3386
    } else
3387
      CurPPLexer->MIOpt.EnterTopLevelConditional();
3388
  }
3389

3390
  // If there is a macro, process it.
3391
  if (MI)  // Mark it used.
3392
    markMacroAsUsed(MI);
3393

3394
  if (Callbacks) {
3395
    if (isIfndef)
3396
      Callbacks->Ifndef(DirectiveTok.getLocation(), MacroNameTok, MD);
3397
    else
3398
      Callbacks->Ifdef(DirectiveTok.getLocation(), MacroNameTok, MD);
3399
  }
3400

3401
  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3402
    getSourceManager().isInMainFile(DirectiveTok.getLocation());
3403

3404
  // Should we include the stuff contained by this directive?
3405
  if (PPOpts->SingleFileParseMode && !MI) {
3406
    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3407
    // the directive blocks.
3408
    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3409
                                     /*wasskip*/false, /*foundnonskip*/false,
3410
                                     /*foundelse*/false);
3411
  } else if (!MI == isIfndef || RetainExcludedCB) {
3412
    // Yes, remember that we are inside a conditional, then lex the next token.
3413
    CurPPLexer->pushConditionalLevel(DirectiveTok.getLocation(),
3414
                                     /*wasskip*/false, /*foundnonskip*/true,
3415
                                     /*foundelse*/false);
3416
  } else {
3417
    // No, skip the contents of this block.
3418
    SkipExcludedConditionalBlock(HashToken.getLocation(),
3419
                                 DirectiveTok.getLocation(),
3420
                                 /*Foundnonskip*/ false,
3421
                                 /*FoundElse*/ false);
3422
  }
3423
}
3424

3425
/// HandleIfDirective - Implements the \#if directive.
3426
///
3427
void Preprocessor::HandleIfDirective(Token &IfToken,
3428
                                     const Token &HashToken,
3429
                                     bool ReadAnyTokensBeforeDirective) {
3430
  ++NumIf;
3431

3432
  // Parse and evaluate the conditional expression.
3433
  IdentifierInfo *IfNDefMacro = nullptr;
3434
  const DirectiveEvalResult DER = EvaluateDirectiveExpression(IfNDefMacro);
3435
  const bool ConditionalTrue = DER.Conditional;
3436
  // Lexer might become invalid if we hit code completion point while evaluating
3437
  // expression.
3438
  if (!CurPPLexer)
3439
    return;
3440

3441
  // If this condition is equivalent to #ifndef X, and if this is the first
3442
  // directive seen, handle it for the multiple-include optimization.
3443
  if (CurPPLexer->getConditionalStackDepth() == 0) {
3444
    if (!ReadAnyTokensBeforeDirective && IfNDefMacro && ConditionalTrue)
3445
      // FIXME: Pass in the location of the macro name, not the 'if' token.
3446
      CurPPLexer->MIOpt.EnterTopLevelIfndef(IfNDefMacro, IfToken.getLocation());
3447
    else
3448
      CurPPLexer->MIOpt.EnterTopLevelConditional();
3449
  }
3450

3451
  if (Callbacks)
3452
    Callbacks->If(
3453
        IfToken.getLocation(), DER.ExprRange,
3454
        (ConditionalTrue ? PPCallbacks::CVK_True : PPCallbacks::CVK_False));
3455

3456
  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3457
    getSourceManager().isInMainFile(IfToken.getLocation());
3458

3459
  // Should we include the stuff contained by this directive?
3460
  if (PPOpts->SingleFileParseMode && DER.IncludedUndefinedIds) {
3461
    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3462
    // the directive blocks.
3463
    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3464
                                     /*foundnonskip*/false, /*foundelse*/false);
3465
  } else if (ConditionalTrue || RetainExcludedCB) {
3466
    // Yes, remember that we are inside a conditional, then lex the next token.
3467
    CurPPLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false,
3468
                                   /*foundnonskip*/true, /*foundelse*/false);
3469
  } else {
3470
    // No, skip the contents of this block.
3471
    SkipExcludedConditionalBlock(HashToken.getLocation(), IfToken.getLocation(),
3472
                                 /*Foundnonskip*/ false,
3473
                                 /*FoundElse*/ false);
3474
  }
3475
}
3476

3477
/// HandleEndifDirective - Implements the \#endif directive.
3478
///
3479
void Preprocessor::HandleEndifDirective(Token &EndifToken) {
3480
  ++NumEndif;
3481

3482
  // Check that this is the whole directive.
3483
  CheckEndOfDirective("endif");
3484

3485
  PPConditionalInfo CondInfo;
3486
  if (CurPPLexer->popConditionalLevel(CondInfo)) {
3487
    // No conditionals on the stack: this is an #endif without an #if.
3488
    Diag(EndifToken, diag::err_pp_endif_without_if);
3489
    return;
3490
  }
3491

3492
  // If this the end of a top-level #endif, inform MIOpt.
3493
  if (CurPPLexer->getConditionalStackDepth() == 0)
3494
    CurPPLexer->MIOpt.ExitTopLevelConditional();
3495

3496
  assert(!CondInfo.WasSkipping && !CurPPLexer->LexingRawMode &&
3497
         "This code should only be reachable in the non-skipping case!");
3498

3499
  if (Callbacks)
3500
    Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc);
3501
}
3502

3503
/// HandleElseDirective - Implements the \#else directive.
3504
///
3505
void Preprocessor::HandleElseDirective(Token &Result, const Token &HashToken) {
3506
  ++NumElse;
3507

3508
  // #else directive in a non-skipping conditional... start skipping.
3509
  CheckEndOfDirective("else");
3510

3511
  PPConditionalInfo CI;
3512
  if (CurPPLexer->popConditionalLevel(CI)) {
3513
    Diag(Result, diag::pp_err_else_without_if);
3514
    return;
3515
  }
3516

3517
  // If this is a top-level #else, inform the MIOpt.
3518
  if (CurPPLexer->getConditionalStackDepth() == 0)
3519
    CurPPLexer->MIOpt.EnterTopLevelConditional();
3520

3521
  // If this is a #else with a #else before it, report the error.
3522
  if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
3523

3524
  if (Callbacks)
3525
    Callbacks->Else(Result.getLocation(), CI.IfLoc);
3526

3527
  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3528
    getSourceManager().isInMainFile(Result.getLocation());
3529

3530
  if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3531
    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3532
    // the directive blocks.
3533
    CurPPLexer->pushConditionalLevel(CI.IfLoc, /*wasskip*/false,
3534
                                     /*foundnonskip*/false, /*foundelse*/true);
3535
    return;
3536
  }
3537

3538
  // Finally, skip the rest of the contents of this block.
3539
  SkipExcludedConditionalBlock(HashToken.getLocation(), CI.IfLoc,
3540
                               /*Foundnonskip*/ true,
3541
                               /*FoundElse*/ true, Result.getLocation());
3542
}
3543

3544
/// Implements the \#elif, \#elifdef, and \#elifndef directives.
3545
void Preprocessor::HandleElifFamilyDirective(Token &ElifToken,
3546
                                             const Token &HashToken,
3547
                                             tok::PPKeywordKind Kind) {
3548
  PPElifDiag DirKind = Kind == tok::pp_elif      ? PED_Elif
3549
                       : Kind == tok::pp_elifdef ? PED_Elifdef
3550
                                                 : PED_Elifndef;
3551
  ++NumElse;
3552

3553
  // Warn if using `#elifdef` & `#elifndef` in not C23 & C++23 mode.
3554
  switch (DirKind) {
3555
  case PED_Elifdef:
3556
  case PED_Elifndef:
3557
    unsigned DiagID;
3558
    if (LangOpts.CPlusPlus)
3559
      DiagID = LangOpts.CPlusPlus23 ? diag::warn_cxx23_compat_pp_directive
3560
                                    : diag::ext_cxx23_pp_directive;
3561
    else
3562
      DiagID = LangOpts.C23 ? diag::warn_c23_compat_pp_directive
3563
                            : diag::ext_c23_pp_directive;
3564
    Diag(ElifToken, DiagID) << DirKind;
3565
    break;
3566
  default:
3567
    break;
3568
  }
3569

3570
  // #elif directive in a non-skipping conditional... start skipping.
3571
  // We don't care what the condition is, because we will always skip it (since
3572
  // the block immediately before it was included).
3573
  SourceRange ConditionRange = DiscardUntilEndOfDirective();
3574

3575
  PPConditionalInfo CI;
3576
  if (CurPPLexer->popConditionalLevel(CI)) {
3577
    Diag(ElifToken, diag::pp_err_elif_without_if) << DirKind;
3578
    return;
3579
  }
3580

3581
  // If this is a top-level #elif, inform the MIOpt.
3582
  if (CurPPLexer->getConditionalStackDepth() == 0)
3583
    CurPPLexer->MIOpt.EnterTopLevelConditional();
3584

3585
  // If this is a #elif with a #else before it, report the error.
3586
  if (CI.FoundElse)
3587
    Diag(ElifToken, diag::pp_err_elif_after_else) << DirKind;
3588

3589
  if (Callbacks) {
3590
    switch (Kind) {
3591
    case tok::pp_elif:
3592
      Callbacks->Elif(ElifToken.getLocation(), ConditionRange,
3593
                      PPCallbacks::CVK_NotEvaluated, CI.IfLoc);
3594
      break;
3595
    case tok::pp_elifdef:
3596
      Callbacks->Elifdef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3597
      break;
3598
    case tok::pp_elifndef:
3599
      Callbacks->Elifndef(ElifToken.getLocation(), ConditionRange, CI.IfLoc);
3600
      break;
3601
    default:
3602
      assert(false && "unexpected directive kind");
3603
      break;
3604
    }
3605
  }
3606

3607
  bool RetainExcludedCB = PPOpts->RetainExcludedConditionalBlocks &&
3608
    getSourceManager().isInMainFile(ElifToken.getLocation());
3609

3610
  if ((PPOpts->SingleFileParseMode && !CI.FoundNonSkip) || RetainExcludedCB) {
3611
    // In 'single-file-parse mode' undefined identifiers trigger parsing of all
3612
    // the directive blocks.
3613
    CurPPLexer->pushConditionalLevel(ElifToken.getLocation(), /*wasskip*/false,
3614
                                     /*foundnonskip*/false, /*foundelse*/false);
3615
    return;
3616
  }
3617

3618
  // Finally, skip the rest of the contents of this block.
3619
  SkipExcludedConditionalBlock(
3620
      HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true,
3621
      /*FoundElse*/ CI.FoundElse, ElifToken.getLocation());
3622
}
3623

3624
std::optional<LexEmbedParametersResult>
3625
Preprocessor::LexEmbedParameters(Token &CurTok, bool ForHasEmbed) {
3626
  LexEmbedParametersResult Result{};
3627
  SmallVector<Token, 2> ParameterTokens;
3628
  tok::TokenKind EndTokenKind = ForHasEmbed ? tok::r_paren : tok::eod;
3629

3630
  auto DiagMismatchedBracesAndSkipToEOD =
3631
      [&](tok::TokenKind Expected,
3632
          std::pair<tok::TokenKind, SourceLocation> Matches) {
3633
        Diag(CurTok, diag::err_expected) << Expected;
3634
        Diag(Matches.second, diag::note_matching) << Matches.first;
3635
        if (CurTok.isNot(tok::eod))
3636
          DiscardUntilEndOfDirective(CurTok);
3637
      };
3638

3639
  auto ExpectOrDiagAndSkipToEOD = [&](tok::TokenKind Kind) {
3640
    if (CurTok.isNot(Kind)) {
3641
      Diag(CurTok, diag::err_expected) << Kind;
3642
      if (CurTok.isNot(tok::eod))
3643
        DiscardUntilEndOfDirective(CurTok);
3644
      return false;
3645
    }
3646
    return true;
3647
  };
3648

3649
  // C23 6.10:
3650
  // pp-parameter-name:
3651
  //   pp-standard-parameter
3652
  //   pp-prefixed-parameter
3653
  //
3654
  // pp-standard-parameter:
3655
  //   identifier
3656
  //
3657
  // pp-prefixed-parameter:
3658
  //   identifier :: identifier
3659
  auto LexPPParameterName = [&]() -> std::optional<std::string> {
3660
    // We expect the current token to be an identifier; if it's not, things
3661
    // have gone wrong.
3662
    if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3663
      return std::nullopt;
3664

3665
    const IdentifierInfo *Prefix = CurTok.getIdentifierInfo();
3666

3667
    // Lex another token; it is either a :: or we're done with the parameter
3668
    // name.
3669
    LexNonComment(CurTok);
3670
    if (CurTok.is(tok::coloncolon)) {
3671
      // We found a ::, so lex another identifier token.
3672
      LexNonComment(CurTok);
3673
      if (!ExpectOrDiagAndSkipToEOD(tok::identifier))
3674
        return std::nullopt;
3675

3676
      const IdentifierInfo *Suffix = CurTok.getIdentifierInfo();
3677

3678
      // Lex another token so we're past the name.
3679
      LexNonComment(CurTok);
3680
      return (llvm::Twine(Prefix->getName()) + "::" + Suffix->getName()).str();
3681
    }
3682
    return Prefix->getName().str();
3683
  };
3684

3685
  // C23 6.10p5: In all aspects, a preprocessor standard parameter specified by
3686
  // this document as an identifier pp_param and an identifier of the form
3687
  // __pp_param__ shall behave the same when used as a preprocessor parameter,
3688
  // except for the spelling.
3689
  auto NormalizeParameterName = [](StringRef Name) {
3690
    if (Name.size() > 4 && Name.starts_with("__") && Name.ends_with("__"))
3691
      return Name.substr(2, Name.size() - 4);
3692
    return Name;
3693
  };
3694

3695
  auto LexParenthesizedIntegerExpr = [&]() -> std::optional<size_t> {
3696
    // we have a limit parameter and its internals are processed using
3697
    // evaluation rules from #if.
3698
    if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3699
      return std::nullopt;
3700

3701
    // We do not consume the ( because EvaluateDirectiveExpression will lex
3702
    // the next token for us.
3703
    IdentifierInfo *ParameterIfNDef = nullptr;
3704
    bool EvaluatedDefined;
3705
    DirectiveEvalResult LimitEvalResult = EvaluateDirectiveExpression(
3706
        ParameterIfNDef, CurTok, EvaluatedDefined, /*CheckForEOD=*/false);
3707

3708
    if (!LimitEvalResult.Value) {
3709
      // If there was an error evaluating the directive expression, we expect
3710
      // to be at the end of directive token.
3711
      assert(CurTok.is(tok::eod) && "expect to be at the end of directive");
3712
      return std::nullopt;
3713
    }
3714

3715
    if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3716
      return std::nullopt;
3717

3718
    // Eat the ).
3719
    LexNonComment(CurTok);
3720

3721
    // C23 6.10.3.2p2: The token defined shall not appear within the constant
3722
    // expression.
3723
    if (EvaluatedDefined) {
3724
      Diag(CurTok, diag::err_defined_in_pp_embed);
3725
      return std::nullopt;
3726
    }
3727

3728
    if (LimitEvalResult.Value) {
3729
      const llvm::APSInt &Result = *LimitEvalResult.Value;
3730
      if (Result.isNegative()) {
3731
        Diag(CurTok, diag::err_requires_positive_value)
3732
            << toString(Result, 10) << /*positive*/ 0;
3733
        return std::nullopt;
3734
      }
3735
      return Result.getLimitedValue();
3736
    }
3737
    return std::nullopt;
3738
  };
3739

3740
  auto GetMatchingCloseBracket = [](tok::TokenKind Kind) {
3741
    switch (Kind) {
3742
    case tok::l_paren:
3743
      return tok::r_paren;
3744
    case tok::l_brace:
3745
      return tok::r_brace;
3746
    case tok::l_square:
3747
      return tok::r_square;
3748
    default:
3749
      llvm_unreachable("should not get here");
3750
    }
3751
  };
3752

3753
  auto LexParenthesizedBalancedTokenSoup =
3754
      [&](llvm::SmallVectorImpl<Token> &Tokens) {
3755
        std::vector<std::pair<tok::TokenKind, SourceLocation>> BracketStack;
3756

3757
        // We expect the current token to be a left paren.
3758
        if (!ExpectOrDiagAndSkipToEOD(tok::l_paren))
3759
          return false;
3760
        LexNonComment(CurTok); // Eat the (
3761

3762
        bool WaitingForInnerCloseParen = false;
3763
        while (CurTok.isNot(tok::eod) &&
3764
               (WaitingForInnerCloseParen || CurTok.isNot(tok::r_paren))) {
3765
          switch (CurTok.getKind()) {
3766
          default: // Shutting up diagnostics about not fully-covered switch.
3767
            break;
3768
          case tok::l_paren:
3769
            WaitingForInnerCloseParen = true;
3770
            [[fallthrough]];
3771
          case tok::l_brace:
3772
          case tok::l_square:
3773
            BracketStack.push_back({CurTok.getKind(), CurTok.getLocation()});
3774
            break;
3775
          case tok::r_paren:
3776
            WaitingForInnerCloseParen = false;
3777
            [[fallthrough]];
3778
          case tok::r_brace:
3779
          case tok::r_square: {
3780
            tok::TokenKind Matching =
3781
                GetMatchingCloseBracket(BracketStack.back().first);
3782
            if (BracketStack.empty() || CurTok.getKind() != Matching) {
3783
              DiagMismatchedBracesAndSkipToEOD(Matching, BracketStack.back());
3784
              return false;
3785
            }
3786
            BracketStack.pop_back();
3787
          } break;
3788
          }
3789
          Tokens.push_back(CurTok);
3790
          LexNonComment(CurTok);
3791
        }
3792

3793
        // When we're done, we want to eat the closing paren.
3794
        if (!ExpectOrDiagAndSkipToEOD(tok::r_paren))
3795
          return false;
3796

3797
        LexNonComment(CurTok); // Eat the )
3798
        return true;
3799
      };
3800

3801
  LexNonComment(CurTok); // Prime the pump.
3802
  while (!CurTok.isOneOf(EndTokenKind, tok::eod)) {
3803
    SourceLocation ParamStartLoc = CurTok.getLocation();
3804
    std::optional<std::string> ParamName = LexPPParameterName();
3805
    if (!ParamName)
3806
      return std::nullopt;
3807
    StringRef Parameter = NormalizeParameterName(*ParamName);
3808

3809
    // Lex the parameters (dependent on the parameter type we want!).
3810
    //
3811
    // C23 6.10.3.Xp1: The X standard embed parameter may appear zero times or
3812
    // one time in the embed parameter sequence.
3813
    if (Parameter == "limit") {
3814
      if (Result.MaybeLimitParam)
3815
        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3816

3817
      std::optional<size_t> Limit = LexParenthesizedIntegerExpr();
3818
      if (!Limit)
3819
        return std::nullopt;
3820
      Result.MaybeLimitParam =
3821
          PPEmbedParameterLimit{*Limit, {ParamStartLoc, CurTok.getLocation()}};
3822
    } else if (Parameter == "clang::offset") {
3823
      if (Result.MaybeOffsetParam)
3824
        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3825

3826
      std::optional<size_t> Offset = LexParenthesizedIntegerExpr();
3827
      if (!Offset)
3828
        return std::nullopt;
3829
      Result.MaybeOffsetParam = PPEmbedParameterOffset{
3830
          *Offset, {ParamStartLoc, CurTok.getLocation()}};
3831
    } else if (Parameter == "prefix") {
3832
      if (Result.MaybePrefixParam)
3833
        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3834

3835
      SmallVector<Token, 4> Soup;
3836
      if (!LexParenthesizedBalancedTokenSoup(Soup))
3837
        return std::nullopt;
3838
      Result.MaybePrefixParam = PPEmbedParameterPrefix{
3839
          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3840
    } else if (Parameter == "suffix") {
3841
      if (Result.MaybeSuffixParam)
3842
        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3843

3844
      SmallVector<Token, 4> Soup;
3845
      if (!LexParenthesizedBalancedTokenSoup(Soup))
3846
        return std::nullopt;
3847
      Result.MaybeSuffixParam = PPEmbedParameterSuffix{
3848
          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3849
    } else if (Parameter == "if_empty") {
3850
      if (Result.MaybeIfEmptyParam)
3851
        Diag(CurTok, diag::err_pp_embed_dup_params) << Parameter;
3852

3853
      SmallVector<Token, 4> Soup;
3854
      if (!LexParenthesizedBalancedTokenSoup(Soup))
3855
        return std::nullopt;
3856
      Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{
3857
          std::move(Soup), {ParamStartLoc, CurTok.getLocation()}};
3858
    } else {
3859
      ++Result.UnrecognizedParams;
3860

3861
      // If there's a left paren, we need to parse a balanced token sequence
3862
      // and just eat those tokens.
3863
      if (CurTok.is(tok::l_paren)) {
3864
        SmallVector<Token, 4> Soup;
3865
        if (!LexParenthesizedBalancedTokenSoup(Soup))
3866
          return std::nullopt;
3867
      }
3868
      if (!ForHasEmbed) {
3869
        Diag(CurTok, diag::err_pp_unknown_parameter) << 1 << Parameter;
3870
        return std::nullopt;
3871
      }
3872
    }
3873
  }
3874
  return Result;
3875
}
3876

3877
void Preprocessor::HandleEmbedDirectiveImpl(
3878
    SourceLocation HashLoc, const LexEmbedParametersResult &Params,
3879
    StringRef BinaryContents) {
3880
  if (BinaryContents.empty()) {
3881
    // If we have no binary contents, the only thing we need to emit are the
3882
    // if_empty tokens, if any.
3883
    // FIXME: this loses AST fidelity; nothing in the compiler will see that
3884
    // these tokens came from #embed. We have to hack around this when printing
3885
    // preprocessed output. The same is true for prefix and suffix tokens.
3886
    if (Params.MaybeIfEmptyParam) {
3887
      ArrayRef<Token> Toks = Params.MaybeIfEmptyParam->Tokens;
3888
      size_t TokCount = Toks.size();
3889
      auto NewToks = std::make_unique<Token[]>(TokCount);
3890
      llvm::copy(Toks, NewToks.get());
3891
      EnterTokenStream(std::move(NewToks), TokCount, true, true);
3892
    }
3893
    return;
3894
  }
3895

3896
  size_t NumPrefixToks = Params.PrefixTokenCount(),
3897
         NumSuffixToks = Params.SuffixTokenCount();
3898
  size_t TotalNumToks = 1 + NumPrefixToks + NumSuffixToks;
3899
  size_t CurIdx = 0;
3900
  auto Toks = std::make_unique<Token[]>(TotalNumToks);
3901

3902
  // Add the prefix tokens, if any.
3903
  if (Params.MaybePrefixParam) {
3904
    llvm::copy(Params.MaybePrefixParam->Tokens, &Toks[CurIdx]);
3905
    CurIdx += NumPrefixToks;
3906
  }
3907

3908
  EmbedAnnotationData *Data = new (BP) EmbedAnnotationData;
3909
  Data->BinaryData = BinaryContents;
3910

3911
  Toks[CurIdx].startToken();
3912
  Toks[CurIdx].setKind(tok::annot_embed);
3913
  Toks[CurIdx].setAnnotationRange(HashLoc);
3914
  Toks[CurIdx++].setAnnotationValue(Data);
3915

3916
  // Now add the suffix tokens, if any.
3917
  if (Params.MaybeSuffixParam) {
3918
    llvm::copy(Params.MaybeSuffixParam->Tokens, &Toks[CurIdx]);
3919
    CurIdx += NumSuffixToks;
3920
  }
3921

3922
  assert(CurIdx == TotalNumToks && "Calculated the incorrect number of tokens");
3923
  EnterTokenStream(std::move(Toks), TotalNumToks, true, true);
3924
}
3925

3926
void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok,
3927
                                        const FileEntry *LookupFromFile) {
3928
  // Give the usual extension/compatibility warnings.
3929
  if (LangOpts.C23)
3930
    Diag(EmbedTok, diag::warn_compat_pp_embed_directive);
3931
  else
3932
    Diag(EmbedTok, diag::ext_pp_embed_directive)
3933
        << (LangOpts.CPlusPlus ? /*Clang*/ 1 : /*C23*/ 0);
3934

3935
  // Parse the filename header
3936
  Token FilenameTok;
3937
  if (LexHeaderName(FilenameTok))
3938
    return;
3939

3940
  if (FilenameTok.isNot(tok::header_name)) {
3941
    Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
3942
    if (FilenameTok.isNot(tok::eod))
3943
      DiscardUntilEndOfDirective();
3944
    return;
3945
  }
3946

3947
  // Parse the optional sequence of
3948
  // directive-parameters:
3949
  //     identifier parameter-name-list[opt] directive-argument-list[opt]
3950
  // directive-argument-list:
3951
  //    '(' balanced-token-sequence ')'
3952
  // parameter-name-list:
3953
  //    '::' identifier parameter-name-list[opt]
3954
  Token CurTok;
3955
  std::optional<LexEmbedParametersResult> Params =
3956
      LexEmbedParameters(CurTok, /*ForHasEmbed=*/false);
3957

3958
  assert((Params || CurTok.is(tok::eod)) &&
3959
         "expected success or to be at the end of the directive");
3960
  if (!Params)
3961
    return;
3962

3963
  // Now, splat the data out!
3964
  SmallString<128> FilenameBuffer;
3965
  StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
3966
  StringRef OriginalFilename = Filename;
3967
  bool isAngled =
3968
      GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
3969
  // If GetIncludeFilenameSpelling set the start ptr to null, there was an
3970
  // error.
3971
  assert(!Filename.empty());
3972
  OptionalFileEntryRef MaybeFileRef =
3973
      this->LookupEmbedFile(Filename, isAngled, true, LookupFromFile);
3974
  if (!MaybeFileRef) {
3975
    // could not find file
3976
    if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) {
3977
      return;
3978
    }
3979
    Diag(FilenameTok, diag::err_pp_file_not_found) << Filename;
3980
    return;
3981
  }
3982
  std::optional<llvm::MemoryBufferRef> MaybeFile =
3983
      getSourceManager().getMemoryBufferForFileOrNone(*MaybeFileRef);
3984
  if (!MaybeFile) {
3985
    // could not find file
3986
    Diag(FilenameTok, diag::err_cannot_open_file)
3987
        << Filename << "a buffer to the contents could not be created";
3988
    return;
3989
  }
3990
  StringRef BinaryContents = MaybeFile->getBuffer();
3991

3992
  // The order is important between 'offset' and 'limit'; we want to offset
3993
  // first and then limit second; otherwise we may reduce the notional resource
3994
  // size to something too small to offset into.
3995
  if (Params->MaybeOffsetParam) {
3996
    // FIXME: just like with the limit() and if_empty() parameters, this loses
3997
    // source fidelity in the AST; it has no idea that there was an offset
3998
    // involved.
3999
    // offsets all the way to the end of the file make for an empty file.
4000
    BinaryContents = BinaryContents.substr(Params->MaybeOffsetParam->Offset);
4001
  }
4002

4003
  if (Params->MaybeLimitParam) {
4004
    // FIXME: just like with the clang::offset() and if_empty() parameters,
4005
    // this loses source fidelity in the AST; it has no idea there was a limit
4006
    // involved.
4007
    BinaryContents = BinaryContents.substr(0, Params->MaybeLimitParam->Limit);
4008
  }
4009

4010
  if (Callbacks)
4011
    Callbacks->EmbedDirective(HashLoc, Filename, isAngled, MaybeFileRef,
4012
                              *Params);
4013
  HandleEmbedDirectiveImpl(HashLoc, *Params, BinaryContents);
4014
}
4015

4016
Product

Resources

Company