Path: blob/main/contrib/llvm-project/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
35266 views
//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This code rewrites include invocations into their expansions. This gives you9// a file with all included files merged into it.10//11//===----------------------------------------------------------------------===//1213#include "clang/Rewrite/Frontend/Rewriters.h"14#include "clang/Basic/SourceManager.h"15#include "clang/Frontend/PreprocessorOutputOptions.h"16#include "clang/Lex/Pragma.h"17#include "clang/Lex/Preprocessor.h"18#include "llvm/ADT/SmallString.h"19#include "llvm/Support/raw_ostream.h"20#include <optional>2122using namespace clang;23using namespace llvm;2425namespace {2627class InclusionRewriter : public PPCallbacks {28/// Information about which #includes were actually performed,29/// created by preprocessor callbacks.30struct IncludedFile {31FileID Id;32SrcMgr::CharacteristicKind FileType;33IncludedFile(FileID Id, SrcMgr::CharacteristicKind FileType)34: Id(Id), FileType(FileType) {}35};36Preprocessor &PP; ///< Used to find inclusion directives.37SourceManager &SM; ///< Used to read and manage source files.38raw_ostream &OS; ///< The destination stream for rewritten contents.39StringRef MainEOL; ///< The line ending marker to use.40llvm::MemoryBufferRef PredefinesBuffer; ///< The preprocessor predefines.41bool ShowLineMarkers; ///< Show #line markers.42bool UseLineDirectives; ///< Use of line directives or line markers.43/// Tracks where inclusions that change the file are found.44std::map<SourceLocation, IncludedFile> FileIncludes;45/// Tracks where inclusions that import modules are found.46std::map<SourceLocation, const Module *> ModuleIncludes;47/// Tracks where inclusions that enter modules (in a module build) are found.48std::map<SourceLocation, const Module *> ModuleEntryIncludes;49/// Tracks where #if and #elif directives get evaluated and whether to true.50std::map<SourceLocation, bool> IfConditions;51/// Used transitively for building up the FileIncludes mapping over the52/// various \c PPCallbacks callbacks.53SourceLocation LastInclusionLocation;54public:55InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers,56bool UseLineDirectives);57void Process(FileID FileId, SrcMgr::CharacteristicKind FileType);58void setPredefinesBuffer(const llvm::MemoryBufferRef &Buf) {59PredefinesBuffer = Buf;60}61void detectMainFileEOL();62void handleModuleBegin(Token &Tok) {63assert(Tok.getKind() == tok::annot_module_begin);64ModuleEntryIncludes.insert(65{Tok.getLocation(), (Module *)Tok.getAnnotationValue()});66}67private:68void FileChanged(SourceLocation Loc, FileChangeReason Reason,69SrcMgr::CharacteristicKind FileType,70FileID PrevFID) override;71void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok,72SrcMgr::CharacteristicKind FileType) override;73void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,74StringRef FileName, bool IsAngled,75CharSourceRange FilenameRange,76OptionalFileEntryRef File, StringRef SearchPath,77StringRef RelativePath, const Module *SuggestedModule,78bool ModuleImported,79SrcMgr::CharacteristicKind FileType) override;80void If(SourceLocation Loc, SourceRange ConditionRange,81ConditionValueKind ConditionValue) override;82void Elif(SourceLocation Loc, SourceRange ConditionRange,83ConditionValueKind ConditionValue, SourceLocation IfLoc) override;84void WriteLineInfo(StringRef Filename, int Line,85SrcMgr::CharacteristicKind FileType,86StringRef Extra = StringRef());87void WriteImplicitModuleImport(const Module *Mod);88void OutputContentUpTo(const MemoryBufferRef &FromFile, unsigned &WriteFrom,89unsigned WriteTo, StringRef EOL, int &lines,90bool EnsureNewline);91void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,92const MemoryBufferRef &FromFile, StringRef EOL,93unsigned &NextToWrite, int &Lines,94const IncludedFile *Inc = nullptr);95const IncludedFile *FindIncludeAtLocation(SourceLocation Loc) const;96StringRef getIncludedFileName(const IncludedFile *Inc) const;97const Module *FindModuleAtLocation(SourceLocation Loc) const;98const Module *FindEnteredModule(SourceLocation Loc) const;99bool IsIfAtLocationTrue(SourceLocation Loc) const;100StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);101};102103} // end anonymous namespace104105/// Initializes an InclusionRewriter with a \p PP source and \p OS destination.106InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,107bool ShowLineMarkers,108bool UseLineDirectives)109: PP(PP), SM(PP.getSourceManager()), OS(OS), MainEOL("\n"),110ShowLineMarkers(ShowLineMarkers), UseLineDirectives(UseLineDirectives),111LastInclusionLocation(SourceLocation()) {}112113/// Write appropriate line information as either #line directives or GNU line114/// markers depending on what mode we're in, including the \p Filename and115/// \p Line we are located at, using the specified \p EOL line separator, and116/// any \p Extra context specifiers in GNU line directives.117void InclusionRewriter::WriteLineInfo(StringRef Filename, int Line,118SrcMgr::CharacteristicKind FileType,119StringRef Extra) {120if (!ShowLineMarkers)121return;122if (UseLineDirectives) {123OS << "#line" << ' ' << Line << ' ' << '"';124OS.write_escaped(Filename);125OS << '"';126} else {127// Use GNU linemarkers as described here:128// http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html129OS << '#' << ' ' << Line << ' ' << '"';130OS.write_escaped(Filename);131OS << '"';132if (!Extra.empty())133OS << Extra;134if (FileType == SrcMgr::C_System)135// "`3' This indicates that the following text comes from a system header136// file, so certain warnings should be suppressed."137OS << " 3";138else if (FileType == SrcMgr::C_ExternCSystem)139// as above for `3', plus "`4' This indicates that the following text140// should be treated as being wrapped in an implicit extern "C" block."141OS << " 3 4";142}143OS << MainEOL;144}145146void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod) {147OS << "#pragma clang module import " << Mod->getFullModuleName(true)148<< " /* clang -frewrite-includes: implicit import */" << MainEOL;149}150151/// FileChanged - Whenever the preprocessor enters or exits a #include file152/// it invokes this handler.153void InclusionRewriter::FileChanged(SourceLocation Loc,154FileChangeReason Reason,155SrcMgr::CharacteristicKind NewFileType,156FileID) {157if (Reason != EnterFile)158return;159if (LastInclusionLocation.isInvalid())160// we didn't reach this file (eg: the main file) via an inclusion directive161return;162FileID Id = FullSourceLoc(Loc, SM).getFileID();163auto P = FileIncludes.insert(164std::make_pair(LastInclusionLocation, IncludedFile(Id, NewFileType)));165(void)P;166assert(P.second && "Unexpected revisitation of the same include directive");167LastInclusionLocation = SourceLocation();168}169170/// Called whenever an inclusion is skipped due to canonical header protection171/// macros.172void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/,173const Token & /*FilenameTok*/,174SrcMgr::CharacteristicKind /*FileType*/) {175assert(LastInclusionLocation.isValid() &&176"A file, that wasn't found via an inclusion directive, was skipped");177LastInclusionLocation = SourceLocation();178}179180/// This should be called whenever the preprocessor encounters include181/// directives. It does not say whether the file has been included, but it182/// provides more information about the directive (hash location instead183/// of location inside the included file). It is assumed that the matching184/// FileChanged() or FileSkipped() is called after this (or neither is185/// called if this #include results in an error or does not textually include186/// anything).187void InclusionRewriter::InclusionDirective(188SourceLocation HashLoc, const Token & /*IncludeTok*/,189StringRef /*FileName*/, bool /*IsAngled*/,190CharSourceRange /*FilenameRange*/, OptionalFileEntryRef /*File*/,191StringRef /*SearchPath*/, StringRef /*RelativePath*/,192const Module *SuggestedModule, bool ModuleImported,193SrcMgr::CharacteristicKind FileType) {194if (ModuleImported) {195auto P = ModuleIncludes.insert(std::make_pair(HashLoc, SuggestedModule));196(void)P;197assert(P.second && "Unexpected revisitation of the same include directive");198} else199LastInclusionLocation = HashLoc;200}201202void InclusionRewriter::If(SourceLocation Loc, SourceRange ConditionRange,203ConditionValueKind ConditionValue) {204auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));205(void)P;206assert(P.second && "Unexpected revisitation of the same if directive");207}208209void InclusionRewriter::Elif(SourceLocation Loc, SourceRange ConditionRange,210ConditionValueKind ConditionValue,211SourceLocation IfLoc) {212auto P = IfConditions.insert(std::make_pair(Loc, ConditionValue == CVK_True));213(void)P;214assert(P.second && "Unexpected revisitation of the same elif directive");215}216217/// Simple lookup for a SourceLocation (specifically one denoting the hash in218/// an inclusion directive) in the map of inclusion information, FileChanges.219const InclusionRewriter::IncludedFile *220InclusionRewriter::FindIncludeAtLocation(SourceLocation Loc) const {221const auto I = FileIncludes.find(Loc);222if (I != FileIncludes.end())223return &I->second;224return nullptr;225}226227/// Simple lookup for a SourceLocation (specifically one denoting the hash in228/// an inclusion directive) in the map of module inclusion information.229const Module *230InclusionRewriter::FindModuleAtLocation(SourceLocation Loc) const {231const auto I = ModuleIncludes.find(Loc);232if (I != ModuleIncludes.end())233return I->second;234return nullptr;235}236237/// Simple lookup for a SourceLocation (specifically one denoting the hash in238/// an inclusion directive) in the map of module entry information.239const Module *240InclusionRewriter::FindEnteredModule(SourceLocation Loc) const {241const auto I = ModuleEntryIncludes.find(Loc);242if (I != ModuleEntryIncludes.end())243return I->second;244return nullptr;245}246247bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {248const auto I = IfConditions.find(Loc);249if (I != IfConditions.end())250return I->second;251return false;252}253254void InclusionRewriter::detectMainFileEOL() {255std::optional<MemoryBufferRef> FromFile =256*SM.getBufferOrNone(SM.getMainFileID());257assert(FromFile);258if (!FromFile)259return; // Should never happen, but whatever.260MainEOL = FromFile->getBuffer().detectEOL();261}262263/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at264/// \p WriteTo - 1.265void InclusionRewriter::OutputContentUpTo(const MemoryBufferRef &FromFile,266unsigned &WriteFrom, unsigned WriteTo,267StringRef LocalEOL, int &Line,268bool EnsureNewline) {269if (WriteTo <= WriteFrom)270return;271if (FromFile == PredefinesBuffer) {272// Ignore the #defines of the predefines buffer.273WriteFrom = WriteTo;274return;275}276277// If we would output half of a line ending, advance one character to output278// the whole line ending. All buffers are null terminated, so looking ahead279// one byte is safe.280if (LocalEOL.size() == 2 &&281LocalEOL[0] == (FromFile.getBufferStart() + WriteTo)[-1] &&282LocalEOL[1] == (FromFile.getBufferStart() + WriteTo)[0])283WriteTo++;284285StringRef TextToWrite(FromFile.getBufferStart() + WriteFrom,286WriteTo - WriteFrom);287// count lines manually, it's faster than getPresumedLoc()288Line += TextToWrite.count(LocalEOL);289290if (MainEOL == LocalEOL) {291OS << TextToWrite;292} else {293// Output the file one line at a time, rewriting the line endings as we go.294StringRef Rest = TextToWrite;295while (!Rest.empty()) {296// Identify and output the next line excluding an EOL sequence if present.297size_t Idx = Rest.find(LocalEOL);298StringRef LineText = Rest.substr(0, Idx);299OS << LineText;300if (Idx != StringRef::npos) {301// An EOL sequence was present, output the EOL sequence for the302// main source file and skip past the local EOL sequence.303OS << MainEOL;304Idx += LocalEOL.size();305}306// Strip the line just handled. If Idx is npos or matches the end of the307// text, Rest will be set to an empty string and the loop will terminate.308Rest = Rest.substr(Idx);309}310}311if (EnsureNewline && !TextToWrite.ends_with(LocalEOL))312OS << MainEOL;313314WriteFrom = WriteTo;315}316317StringRef318InclusionRewriter::getIncludedFileName(const IncludedFile *Inc) const {319if (Inc) {320auto B = SM.getBufferOrNone(Inc->Id);321assert(B && "Attempting to process invalid inclusion");322if (B)323return llvm::sys::path::filename(B->getBufferIdentifier());324}325return StringRef();326}327328/// Print characters from \p FromFile starting at \p NextToWrite up until the329/// inclusion directive at \p StartToken, then print out the inclusion330/// inclusion directive disabled by a #if directive, updating \p NextToWrite331/// and \p Line to track the number of source lines visited and the progress332/// through the \p FromFile buffer.333void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,334const Token &StartToken,335const MemoryBufferRef &FromFile,336StringRef LocalEOL,337unsigned &NextToWrite, int &Line,338const IncludedFile *Inc) {339OutputContentUpTo(FromFile, NextToWrite,340SM.getFileOffset(StartToken.getLocation()), LocalEOL, Line,341false);342Token DirectiveToken;343do {344DirectiveLex.LexFromRawLexer(DirectiveToken);345} while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));346if (FromFile == PredefinesBuffer) {347// OutputContentUpTo() would not output anything anyway.348return;349}350if (Inc) {351OS << "#if defined(__CLANG_REWRITTEN_INCLUDES) ";352if (isSystem(Inc->FileType))353OS << "|| defined(__CLANG_REWRITTEN_SYSTEM_INCLUDES) ";354OS << "/* " << getIncludedFileName(Inc);355} else {356OS << "#if 0 /*";357}358OS << " expanded by -frewrite-includes */" << MainEOL;359OutputContentUpTo(FromFile, NextToWrite,360SM.getFileOffset(DirectiveToken.getLocation()) +361DirectiveToken.getLength(),362LocalEOL, Line, true);363OS << (Inc ? "#else /* " : "#endif /*") << getIncludedFileName(Inc)364<< " expanded by -frewrite-includes */" << MainEOL;365}366367/// Find the next identifier in the pragma directive specified by \p RawToken.368StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,369Token &RawToken) {370RawLex.LexFromRawLexer(RawToken);371if (RawToken.is(tok::raw_identifier))372PP.LookUpIdentifierInfo(RawToken);373if (RawToken.is(tok::identifier))374return RawToken.getIdentifierInfo()->getName();375return StringRef();376}377378/// Use a raw lexer to analyze \p FileId, incrementally copying parts of it379/// and including content of included files recursively.380void InclusionRewriter::Process(FileID FileId,381SrcMgr::CharacteristicKind FileType) {382MemoryBufferRef FromFile;383{384auto B = SM.getBufferOrNone(FileId);385assert(B && "Attempting to process invalid inclusion");386if (B)387FromFile = *B;388}389StringRef FileName = FromFile.getBufferIdentifier();390Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());391RawLex.SetCommentRetentionState(false);392393StringRef LocalEOL = FromFile.getBuffer().detectEOL();394395// Per the GNU docs: "1" indicates entering a new file.396if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())397WriteLineInfo(FileName, 1, FileType, "");398else399WriteLineInfo(FileName, 1, FileType, " 1");400401if (SM.getFileIDSize(FileId) == 0)402return;403404// The next byte to be copied from the source file, which may be non-zero if405// the lexer handled a BOM.406unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());407assert(SM.getLineNumber(FileId, NextToWrite) == 1);408int Line = 1; // The current input file line number.409410Token RawToken;411RawLex.LexFromRawLexer(RawToken);412413// TODO: Consider adding a switch that strips possibly unimportant content,414// such as comments, to reduce the size of repro files.415while (RawToken.isNot(tok::eof)) {416if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {417RawLex.setParsingPreprocessorDirective(true);418Token HashToken = RawToken;419RawLex.LexFromRawLexer(RawToken);420if (RawToken.is(tok::raw_identifier))421PP.LookUpIdentifierInfo(RawToken);422if (RawToken.getIdentifierInfo() != nullptr) {423switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {424case tok::pp_include:425case tok::pp_include_next:426case tok::pp_import: {427SourceLocation Loc = HashToken.getLocation();428const IncludedFile *Inc = FindIncludeAtLocation(Loc);429CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,430NextToWrite, Line, Inc);431if (FileId != PP.getPredefinesFileID())432WriteLineInfo(FileName, Line - 1, FileType, "");433StringRef LineInfoExtra;434if (const Module *Mod = FindModuleAtLocation(Loc))435WriteImplicitModuleImport(Mod);436else if (Inc) {437const Module *Mod = FindEnteredModule(Loc);438if (Mod)439OS << "#pragma clang module begin "440<< Mod->getFullModuleName(true) << "\n";441442// Include and recursively process the file.443Process(Inc->Id, Inc->FileType);444445if (Mod)446OS << "#pragma clang module end /*"447<< Mod->getFullModuleName(true) << "*/\n";448// There's no #include, therefore no #if, for -include files.449if (FromFile != PredefinesBuffer) {450OS << "#endif /* " << getIncludedFileName(Inc)451<< " expanded by -frewrite-includes */" << LocalEOL;452}453454// Add line marker to indicate we're returning from an included455// file.456LineInfoExtra = " 2";457}458// fix up lineinfo (since commented out directive changed line459// numbers) for inclusions that were skipped due to header guards460WriteLineInfo(FileName, Line, FileType, LineInfoExtra);461break;462}463case tok::pp_pragma: {464StringRef Identifier = NextIdentifierName(RawLex, RawToken);465if (Identifier == "clang" || Identifier == "GCC") {466if (NextIdentifierName(RawLex, RawToken) == "system_header") {467// keep the directive in, commented out468CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,469NextToWrite, Line);470// update our own type471FileType = SM.getFileCharacteristic(RawToken.getLocation());472WriteLineInfo(FileName, Line, FileType);473}474} else if (Identifier == "once") {475// keep the directive in, commented out476CommentOutDirective(RawLex, HashToken, FromFile, LocalEOL,477NextToWrite, Line);478WriteLineInfo(FileName, Line, FileType);479}480break;481}482case tok::pp_if:483case tok::pp_elif: {484bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==485tok::pp_elif);486bool isTrue = IsIfAtLocationTrue(RawToken.getLocation());487OutputContentUpTo(FromFile, NextToWrite,488SM.getFileOffset(HashToken.getLocation()),489LocalEOL, Line, /*EnsureNewline=*/true);490do {491RawLex.LexFromRawLexer(RawToken);492} while (!RawToken.is(tok::eod) && RawToken.isNot(tok::eof));493// We need to disable the old condition, but that is tricky.494// Trying to comment it out can easily lead to comment nesting.495// So instead make the condition harmless by making it enclose496// and empty block. Moreover, put it itself inside an #if 0 block497// to disable it from getting evaluated (e.g. __has_include_next498// warns if used from the primary source file).499OS << "#if 0 /* disabled by -frewrite-includes */" << MainEOL;500if (elif) {501OS << "#if 0" << MainEOL;502}503OutputContentUpTo(FromFile, NextToWrite,504SM.getFileOffset(RawToken.getLocation()) +505RawToken.getLength(),506LocalEOL, Line, /*EnsureNewline=*/true);507// Close the empty block and the disabling block.508OS << "#endif" << MainEOL;509OS << "#endif /* disabled by -frewrite-includes */" << MainEOL;510OS << (elif ? "#elif " : "#if ") << (isTrue ? "1" : "0")511<< " /* evaluated by -frewrite-includes */" << MainEOL;512WriteLineInfo(FileName, Line, FileType);513break;514}515case tok::pp_endif:516case tok::pp_else: {517// We surround every #include by #if 0 to comment it out, but that518// changes line numbers. These are fixed up right after that, but519// the whole #include could be inside a preprocessor conditional520// that is not processed. So it is necessary to fix the line521// numbers one the next line after each #else/#endif as well.522RawLex.SetKeepWhitespaceMode(true);523do {524RawLex.LexFromRawLexer(RawToken);525} while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));526OutputContentUpTo(FromFile, NextToWrite,527SM.getFileOffset(RawToken.getLocation()) +528RawToken.getLength(),529LocalEOL, Line, /*EnsureNewline=*/ true);530WriteLineInfo(FileName, Line, FileType);531RawLex.SetKeepWhitespaceMode(false);532break;533}534default:535break;536}537}538RawLex.setParsingPreprocessorDirective(false);539}540RawLex.LexFromRawLexer(RawToken);541}542OutputContentUpTo(FromFile, NextToWrite,543SM.getFileOffset(SM.getLocForEndOfFile(FileId)), LocalEOL,544Line, /*EnsureNewline=*/true);545}546547/// InclusionRewriterInInput - Implement -frewrite-includes mode.548void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,549const PreprocessorOutputOptions &Opts) {550SourceManager &SM = PP.getSourceManager();551InclusionRewriter *Rewrite = new InclusionRewriter(552PP, *OS, Opts.ShowLineMarkers, Opts.UseLineDirectives);553Rewrite->detectMainFileEOL();554555PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Rewrite));556PP.IgnorePragmas();557558// First let the preprocessor process the entire file and call callbacks.559// Callbacks will record which #include's were actually performed.560PP.EnterMainSourceFile();561Token Tok;562// Only preprocessor directives matter here, so disable macro expansion563// everywhere else as an optimization.564// TODO: It would be even faster if the preprocessor could be switched565// to a mode where it would parse only preprocessor directives and comments,566// nothing else matters for parsing or processing.567PP.SetMacroExpansionOnlyInDirectives();568do {569PP.Lex(Tok);570if (Tok.is(tok::annot_module_begin))571Rewrite->handleModuleBegin(Tok);572} while (Tok.isNot(tok::eof));573Rewrite->setPredefinesBuffer(SM.getBufferOrFake(PP.getPredefinesFileID()));574Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);575Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);576OS->flush();577}578579580