Path: blob/main/contrib/llvm-project/llvm/tools/llvm-extract/llvm-extract.cpp
35258 views
//===- llvm-extract.cpp - LLVM function extraction utility ----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This utility changes the input module to only contain a single function,9// which is primarily used for debugging transformations.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ADT/SetVector.h"14#include "llvm/ADT/SmallPtrSet.h"15#include "llvm/Bitcode/BitcodeWriterPass.h"16#include "llvm/IR/DataLayout.h"17#include "llvm/IR/IRPrintingPasses.h"18#include "llvm/IR/Instructions.h"19#include "llvm/IR/LLVMContext.h"20#include "llvm/IR/Module.h"21#include "llvm/IRPrinter/IRPrintingPasses.h"22#include "llvm/IRReader/IRReader.h"23#include "llvm/Passes/PassBuilder.h"24#include "llvm/Support/CommandLine.h"25#include "llvm/Support/Error.h"26#include "llvm/Support/FileSystem.h"27#include "llvm/Support/InitLLVM.h"28#include "llvm/Support/Regex.h"29#include "llvm/Support/SourceMgr.h"30#include "llvm/Support/SystemUtils.h"31#include "llvm/Support/ToolOutputFile.h"32#include "llvm/Transforms/IPO.h"33#include "llvm/Transforms/IPO/BlockExtractor.h"34#include "llvm/Transforms/IPO/ExtractGV.h"35#include "llvm/Transforms/IPO/GlobalDCE.h"36#include "llvm/Transforms/IPO/StripDeadPrototypes.h"37#include "llvm/Transforms/IPO/StripSymbols.h"38#include <memory>39#include <utility>4041using namespace llvm;4243cl::OptionCategory ExtractCat("llvm-extract Options");4445// InputFilename - The filename to read from.46static cl::opt<std::string> InputFilename(cl::Positional,47cl::desc("<input bitcode file>"),48cl::init("-"),49cl::value_desc("filename"));5051static cl::opt<std::string> OutputFilename("o",52cl::desc("Specify output filename"),53cl::value_desc("filename"),54cl::init("-"), cl::cat(ExtractCat));5556static cl::opt<bool> Force("f", cl::desc("Enable binary output on terminals"),57cl::cat(ExtractCat));5859static cl::opt<bool> DeleteFn("delete",60cl::desc("Delete specified Globals from Module"),61cl::cat(ExtractCat));6263static cl::opt<bool> KeepConstInit("keep-const-init",64cl::desc("Keep initializers of constants"),65cl::cat(ExtractCat));6667static cl::opt<bool>68Recursive("recursive", cl::desc("Recursively extract all called functions"),69cl::cat(ExtractCat));7071// ExtractFuncs - The functions to extract from the module.72static cl::list<std::string>73ExtractFuncs("func", cl::desc("Specify function to extract"),74cl::value_desc("function"), cl::cat(ExtractCat));7576// ExtractRegExpFuncs - The functions, matched via regular expression, to77// extract from the module.78static cl::list<std::string>79ExtractRegExpFuncs("rfunc",80cl::desc("Specify function(s) to extract using a "81"regular expression"),82cl::value_desc("rfunction"), cl::cat(ExtractCat));8384// ExtractBlocks - The blocks to extract from the module.85static cl::list<std::string> ExtractBlocks(86"bb",87cl::desc(88"Specify <function, basic block1[;basic block2...]> pairs to extract.\n"89"Each pair will create a function.\n"90"If multiple basic blocks are specified in one pair,\n"91"the first block in the sequence should dominate the rest.\n"92"eg:\n"93" --bb=f:bb1;bb2 will extract one function with both bb1 and bb2;\n"94" --bb=f:bb1 --bb=f:bb2 will extract two functions, one with bb1, one "95"with bb2."),96cl::value_desc("function:bb1[;bb2...]"), cl::cat(ExtractCat));9798// ExtractAlias - The alias to extract from the module.99static cl::list<std::string>100ExtractAliases("alias", cl::desc("Specify alias to extract"),101cl::value_desc("alias"), cl::cat(ExtractCat));102103// ExtractRegExpAliases - The aliases, matched via regular expression, to104// extract from the module.105static cl::list<std::string>106ExtractRegExpAliases("ralias",107cl::desc("Specify alias(es) to extract using a "108"regular expression"),109cl::value_desc("ralias"), cl::cat(ExtractCat));110111// ExtractGlobals - The globals to extract from the module.112static cl::list<std::string>113ExtractGlobals("glob", cl::desc("Specify global to extract"),114cl::value_desc("global"), cl::cat(ExtractCat));115116// ExtractRegExpGlobals - The globals, matched via regular expression, to117// extract from the module...118static cl::list<std::string>119ExtractRegExpGlobals("rglob",120cl::desc("Specify global(s) to extract using a "121"regular expression"),122cl::value_desc("rglobal"), cl::cat(ExtractCat));123124static cl::opt<bool> OutputAssembly("S",125cl::desc("Write output as LLVM assembly"),126cl::Hidden, cl::cat(ExtractCat));127128static cl::opt<bool> PreserveBitcodeUseListOrder(129"preserve-bc-uselistorder",130cl::desc("Preserve use-list order when writing LLVM bitcode."),131cl::init(true), cl::Hidden, cl::cat(ExtractCat));132133static cl::opt<bool> PreserveAssemblyUseListOrder(134"preserve-ll-uselistorder",135cl::desc("Preserve use-list order when writing LLVM assembly."),136cl::init(false), cl::Hidden, cl::cat(ExtractCat));137138int main(int argc, char **argv) {139InitLLVM X(argc, argv);140141LLVMContext Context;142cl::HideUnrelatedOptions(ExtractCat);143cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");144145// Use lazy loading, since we only care about selected global values.146SMDiagnostic Err;147std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context);148149if (!M) {150Err.print(argv[0], errs());151return 1;152}153154// Use SetVector to avoid duplicates.155SetVector<GlobalValue *> GVs;156157// Figure out which aliases we should extract.158for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) {159GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]);160if (!GA) {161errs() << argv[0] << ": program doesn't contain alias named '"162<< ExtractAliases[i] << "'!\n";163return 1;164}165GVs.insert(GA);166}167168// Extract aliases via regular expression matching.169for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) {170std::string Error;171Regex RegEx(ExtractRegExpAliases[i]);172if (!RegEx.isValid(Error)) {173errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' "174"invalid regex: " << Error;175}176bool match = false;177for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end();178GA != E; GA++) {179if (RegEx.match(GA->getName())) {180GVs.insert(&*GA);181match = true;182}183}184if (!match) {185errs() << argv[0] << ": program doesn't contain global named '"186<< ExtractRegExpAliases[i] << "'!\n";187return 1;188}189}190191// Figure out which globals we should extract.192for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) {193GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]);194if (!GV) {195errs() << argv[0] << ": program doesn't contain global named '"196<< ExtractGlobals[i] << "'!\n";197return 1;198}199GVs.insert(GV);200}201202// Extract globals via regular expression matching.203for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) {204std::string Error;205Regex RegEx(ExtractRegExpGlobals[i]);206if (!RegEx.isValid(Error)) {207errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' "208"invalid regex: " << Error;209}210bool match = false;211for (auto &GV : M->globals()) {212if (RegEx.match(GV.getName())) {213GVs.insert(&GV);214match = true;215}216}217if (!match) {218errs() << argv[0] << ": program doesn't contain global named '"219<< ExtractRegExpGlobals[i] << "'!\n";220return 1;221}222}223224// Figure out which functions we should extract.225for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {226GlobalValue *GV = M->getFunction(ExtractFuncs[i]);227if (!GV) {228errs() << argv[0] << ": program doesn't contain function named '"229<< ExtractFuncs[i] << "'!\n";230return 1;231}232GVs.insert(GV);233}234// Extract functions via regular expression matching.235for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) {236std::string Error;237StringRef RegExStr = ExtractRegExpFuncs[i];238Regex RegEx(RegExStr);239if (!RegEx.isValid(Error)) {240errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' "241"invalid regex: " << Error;242}243bool match = false;244for (Module::iterator F = M->begin(), E = M->end(); F != E;245F++) {246if (RegEx.match(F->getName())) {247GVs.insert(&*F);248match = true;249}250}251if (!match) {252errs() << argv[0] << ": program doesn't contain global named '"253<< ExtractRegExpFuncs[i] << "'!\n";254return 1;255}256}257258// Figure out which BasicBlocks we should extract.259SmallVector<std::pair<Function *, SmallVector<StringRef, 16>>, 2> BBMap;260for (StringRef StrPair : ExtractBlocks) {261SmallVector<StringRef, 16> BBNames;262auto BBInfo = StrPair.split(':');263// Get the function.264Function *F = M->getFunction(BBInfo.first);265if (!F) {266errs() << argv[0] << ": program doesn't contain a function named '"267<< BBInfo.first << "'!\n";268return 1;269}270// Add the function to the materialize list, and store the basic block names271// to check after materialization.272GVs.insert(F);273BBInfo.second.split(BBNames, ';', /*MaxSplit=*/-1, /*KeepEmpty=*/false);274BBMap.push_back({F, std::move(BBNames)});275}276277// Use *argv instead of argv[0] to work around a wrong GCC warning.278ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: ");279280if (Recursive) {281std::vector<llvm::Function *> Workqueue;282for (GlobalValue *GV : GVs) {283if (auto *F = dyn_cast<Function>(GV)) {284Workqueue.push_back(F);285}286}287while (!Workqueue.empty()) {288Function *F = &*Workqueue.back();289Workqueue.pop_back();290ExitOnErr(F->materialize());291for (auto &BB : *F) {292for (auto &I : BB) {293CallBase *CB = dyn_cast<CallBase>(&I);294if (!CB)295continue;296Function *CF = CB->getCalledFunction();297if (!CF)298continue;299if (CF->isDeclaration() || GVs.count(CF))300continue;301GVs.insert(CF);302Workqueue.push_back(CF);303}304}305}306}307308auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); };309310// Materialize requisite global values.311if (!DeleteFn) {312for (size_t i = 0, e = GVs.size(); i != e; ++i)313Materialize(*GVs[i]);314} else {315// Deleting. Materialize every GV that's *not* in GVs.316SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());317for (auto &F : *M) {318if (!GVSet.count(&F))319Materialize(F);320}321}322323{324std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end());325LoopAnalysisManager LAM;326FunctionAnalysisManager FAM;327CGSCCAnalysisManager CGAM;328ModuleAnalysisManager MAM;329330PassBuilder PB;331332PB.registerModuleAnalyses(MAM);333PB.registerCGSCCAnalyses(CGAM);334PB.registerFunctionAnalyses(FAM);335PB.registerLoopAnalyses(LAM);336PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);337338ModulePassManager PM;339PM.addPass(ExtractGVPass(Gvs, DeleteFn, KeepConstInit));340PM.run(*M, MAM);341342// Now that we have all the GVs we want, mark the module as fully343// materialized.344// FIXME: should the GVExtractionPass handle this?345ExitOnErr(M->materializeAll());346}347348// Extract the specified basic blocks from the module and erase the existing349// functions.350if (!ExtractBlocks.empty()) {351// Figure out which BasicBlocks we should extract.352std::vector<std::vector<BasicBlock *>> GroupOfBBs;353for (auto &P : BBMap) {354std::vector<BasicBlock *> BBs;355for (StringRef BBName : P.second) {356// The function has been materialized, so add its matching basic blocks357// to the block extractor list, or fail if a name is not found.358auto Res = llvm::find_if(*P.first, [&](const BasicBlock &BB) {359return BB.getName() == BBName;360});361if (Res == P.first->end()) {362errs() << argv[0] << ": function " << P.first->getName()363<< " doesn't contain a basic block named '" << BBName364<< "'!\n";365return 1;366}367BBs.push_back(&*Res);368}369GroupOfBBs.push_back(BBs);370}371372LoopAnalysisManager LAM;373FunctionAnalysisManager FAM;374CGSCCAnalysisManager CGAM;375ModuleAnalysisManager MAM;376377PassBuilder PB;378379PB.registerModuleAnalyses(MAM);380PB.registerCGSCCAnalyses(CGAM);381PB.registerFunctionAnalyses(FAM);382PB.registerLoopAnalyses(LAM);383PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);384385ModulePassManager PM;386PM.addPass(BlockExtractorPass(std::move(GroupOfBBs), true));387PM.run(*M, MAM);388}389390// In addition to deleting all other functions, we also want to spiff it391// up a little bit. Do this now.392393LoopAnalysisManager LAM;394FunctionAnalysisManager FAM;395CGSCCAnalysisManager CGAM;396ModuleAnalysisManager MAM;397398PassBuilder PB;399400PB.registerModuleAnalyses(MAM);401PB.registerCGSCCAnalyses(CGAM);402PB.registerFunctionAnalyses(FAM);403PB.registerLoopAnalyses(LAM);404PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);405406ModulePassManager PM;407if (!DeleteFn)408PM.addPass(GlobalDCEPass());409PM.addPass(StripDeadDebugInfoPass());410PM.addPass(StripDeadPrototypesPass());411412std::error_code EC;413ToolOutputFile Out(OutputFilename, EC, sys::fs::OF_None);414if (EC) {415errs() << EC.message() << '\n';416return 1;417}418419if (OutputAssembly)420PM.addPass(PrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder));421else if (Force || !CheckBitcodeOutputToConsole(Out.os()))422PM.addPass(BitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder));423424PM.run(*M, MAM);425426// Declare success.427Out.keep();428429return 0;430}431432433