Path: blob/main/contrib/llvm-project/compiler-rt/lib/fuzzer/FuzzerFork.cpp
35262 views
//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7// Spawn and orchestrate separate fuzzing processes.8//===----------------------------------------------------------------------===//910#include "FuzzerCommand.h"11#include "FuzzerFork.h"12#include "FuzzerIO.h"13#include "FuzzerInternal.h"14#include "FuzzerMerge.h"15#include "FuzzerSHA1.h"16#include "FuzzerTracePC.h"17#include "FuzzerUtil.h"1819#include <atomic>20#include <chrono>21#include <condition_variable>22#include <fstream>23#include <memory>24#include <mutex>25#include <queue>26#include <sstream>27#include <thread>2829namespace fuzzer {3031struct Stats {32size_t number_of_executed_units = 0;33size_t peak_rss_mb = 0;34size_t average_exec_per_sec = 0;35};3637static Stats ParseFinalStatsFromLog(const std::string &LogPath) {38std::ifstream In(LogPath);39std::string Line;40Stats Res;41struct {42const char *Name;43size_t *Var;44} NameVarPairs[] = {45{"stat::number_of_executed_units:", &Res.number_of_executed_units},46{"stat::peak_rss_mb:", &Res.peak_rss_mb},47{"stat::average_exec_per_sec:", &Res.average_exec_per_sec},48{nullptr, nullptr},49};50while (std::getline(In, Line, '\n')) {51if (Line.find("stat::") != 0) continue;52std::istringstream ISS(Line);53std::string Name;54size_t Val;55ISS >> Name >> Val;56for (size_t i = 0; NameVarPairs[i].Name; i++)57if (Name == NameVarPairs[i].Name)58*NameVarPairs[i].Var = Val;59}60return Res;61}6263struct FuzzJob {64// Inputs.65Command Cmd;66std::string CorpusDir;67std::string FeaturesDir;68std::string LogPath;69std::string SeedListPath;70std::string CFPath;71size_t JobId;7273int DftTimeInSeconds = 0;7475// Fuzzing Outputs.76int ExitCode;7778~FuzzJob() {79RemoveFile(CFPath);80RemoveFile(LogPath);81RemoveFile(SeedListPath);82RmDirRecursive(CorpusDir);83RmDirRecursive(FeaturesDir);84}85};8687struct GlobalEnv {88std::vector<std::string> Args;89std::vector<std::string> CorpusDirs;90std::string MainCorpusDir;91std::string TempDir;92std::string DFTDir;93std::string DataFlowBinary;94std::set<uint32_t> Features, Cov;95std::set<std::string> FilesWithDFT;96std::vector<std::string> Files;97std::vector<std::size_t> FilesSizes;98Random *Rand;99std::chrono::system_clock::time_point ProcessStartTime;100int Verbosity = 0;101int Group = 0;102int NumCorpuses = 8;103104size_t NumTimeouts = 0;105size_t NumOOMs = 0;106size_t NumCrashes = 0;107108109size_t NumRuns = 0;110111std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }112113size_t secondsSinceProcessStartUp() const {114return std::chrono::duration_cast<std::chrono::seconds>(115std::chrono::system_clock::now() - ProcessStartTime)116.count();117}118119FuzzJob *CreateNewJob(size_t JobId) {120Command Cmd(Args);121Cmd.removeFlag("fork");122Cmd.removeFlag("runs");123Cmd.removeFlag("collect_data_flow");124for (auto &C : CorpusDirs) // Remove all corpora from the args.125Cmd.removeArgument(C);126Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.127Cmd.addFlag("print_final_stats", "1");128Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.129Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));130Cmd.addFlag("stop_file", StopFile());131if (!DataFlowBinary.empty()) {132Cmd.addFlag("data_flow_trace", DFTDir);133if (!Cmd.hasFlag("focus_function"))134Cmd.addFlag("focus_function", "auto");135}136auto Job = new FuzzJob;137std::string Seeds;138if (size_t CorpusSubsetSize =139std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {140auto Time1 = std::chrono::system_clock::now();141if (Group) { // whether to group the corpus.142size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;143size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;144for (size_t i = 0; i < CorpusSubsetSize; i++) {145size_t RandNum = (*Rand)(AverageCorpusSize);146size_t Index = RandNum + StartIndex;147Index = Index < Files.size() ? Index148: Rand->SkewTowardsLast(Files.size());149auto &SF = Files[Index];150Seeds += (Seeds.empty() ? "" : ",") + SF;151CollectDFT(SF);152}153} else {154for (size_t i = 0; i < CorpusSubsetSize; i++) {155auto &SF = Files[Rand->SkewTowardsLast(Files.size())];156Seeds += (Seeds.empty() ? "" : ",") + SF;157CollectDFT(SF);158}159}160auto Time2 = std::chrono::system_clock::now();161auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();162assert(DftTimeInSeconds < std::numeric_limits<int>::max());163Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);164}165if (!Seeds.empty()) {166Job->SeedListPath =167DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");168WriteToFile(Seeds, Job->SeedListPath);169Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);170}171Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");172Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));173Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));174Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");175Job->JobId = JobId;176177178Cmd.addArgument(Job->CorpusDir);179Cmd.addFlag("features_dir", Job->FeaturesDir);180181for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {182RmDirRecursive(D);183MkDir(D);184}185186Cmd.setOutputFile(Job->LogPath);187Cmd.combineOutAndErr();188189Job->Cmd = Cmd;190191if (Verbosity >= 2)192Printf("Job %zd/%p Created: %s\n", JobId, Job,193Job->Cmd.toString().c_str());194// Start from very short runs and gradually increase them.195return Job;196}197198void RunOneMergeJob(FuzzJob *Job) {199auto Stats = ParseFinalStatsFromLog(Job->LogPath);200NumRuns += Stats.number_of_executed_units;201202std::vector<SizedFile> TempFiles, MergeCandidates;203// Read all newly created inputs and their feature sets.204// Choose only those inputs that have new features.205GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);206std::sort(TempFiles.begin(), TempFiles.end());207for (auto &F : TempFiles) {208auto FeatureFile = F.File;209FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);210auto FeatureBytes = FileToVector(FeatureFile, 0, false);211assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);212std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));213memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());214for (auto Ft : NewFeatures) {215if (!Features.count(Ft)) {216MergeCandidates.push_back(F);217break;218}219}220}221// if (!FilesToAdd.empty() || Job->ExitCode != 0)222Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd "223"oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",224NumRuns, Cov.size(), Features.size(), Files.size(),225Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,226secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);227228if (MergeCandidates.empty()) return;229230std::vector<std::string> FilesToAdd;231std::set<uint32_t> NewFeatures, NewCov;232bool IsSetCoverMerge =233!Job->Cmd.getFlagValue("set_cover_merge").compare("1");234CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,235&NewFeatures, Cov, &NewCov, Job->CFPath, false,236IsSetCoverMerge);237for (auto &Path : FilesToAdd) {238auto U = FileToVector(Path);239auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));240WriteToFile(U, NewPath);241if (Group) { // Insert the queue according to the size of the seed.242size_t UnitSize = U.size();243auto Idx =244std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -245FilesSizes.begin();246FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);247Files.insert(Files.begin() + Idx, NewPath);248} else {249Files.push_back(NewPath);250}251}252Features.insert(NewFeatures.begin(), NewFeatures.end());253Cov.insert(NewCov.begin(), NewCov.end());254for (auto Idx : NewCov)255if (auto *TE = TPC.PCTableEntryByIdx(Idx))256if (TPC.PcIsFuncEntry(TE))257PrintPC(" NEW_FUNC: %p %F %L\n", "",258TPC.GetNextInstructionPc(TE->PC));259}260261void CollectDFT(const std::string &InputPath) {262if (DataFlowBinary.empty()) return;263if (!FilesWithDFT.insert(InputPath).second) return;264Command Cmd(Args);265Cmd.removeFlag("fork");266Cmd.removeFlag("runs");267Cmd.addFlag("data_flow_trace", DFTDir);268Cmd.addArgument(InputPath);269for (auto &C : CorpusDirs) // Remove all corpora from the args.270Cmd.removeArgument(C);271Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));272Cmd.combineOutAndErr();273// Printf("CollectDFT: %s\n", Cmd.toString().c_str());274ExecuteCommand(Cmd);275}276277};278279struct JobQueue {280std::queue<FuzzJob *> Qu;281std::mutex Mu;282std::condition_variable Cv;283284void Push(FuzzJob *Job) {285{286std::lock_guard<std::mutex> Lock(Mu);287Qu.push(Job);288}289Cv.notify_one();290}291FuzzJob *Pop() {292std::unique_lock<std::mutex> Lk(Mu);293// std::lock_guard<std::mutex> Lock(Mu);294Cv.wait(Lk, [&]{return !Qu.empty();});295assert(!Qu.empty());296auto Job = Qu.front();297Qu.pop();298return Job;299}300};301302void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {303while (auto Job = FuzzQ->Pop()) {304// Printf("WorkerThread: job %p\n", Job);305Job->ExitCode = ExecuteCommand(Job->Cmd);306MergeQ->Push(Job);307}308}309310// This is just a skeleton of an experimental -fork=1 feature.311void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,312const std::vector<std::string> &Args,313const std::vector<std::string> &CorpusDirs, int NumJobs) {314Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);315316GlobalEnv Env;317Env.Args = Args;318Env.CorpusDirs = CorpusDirs;319Env.Rand = &Rand;320Env.Verbosity = Options.Verbosity;321Env.ProcessStartTime = std::chrono::system_clock::now();322Env.DataFlowBinary = Options.CollectDataFlow;323Env.Group = Options.ForkCorpusGroups;324325std::vector<SizedFile> SeedFiles;326for (auto &Dir : CorpusDirs)327GetSizedFilesFromDir(Dir, &SeedFiles);328std::sort(SeedFiles.begin(), SeedFiles.end());329Env.TempDir = TempPath("FuzzWithFork", ".dir");330Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");331RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.332MkDir(Env.TempDir);333MkDir(Env.DFTDir);334335336if (CorpusDirs.empty())337MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));338else339Env.MainCorpusDir = CorpusDirs[0];340341if (Options.KeepSeed) {342for (auto &File : SeedFiles)343Env.Files.push_back(File.File);344} else {345auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");346std::set<uint32_t> NewFeatures, NewCov;347CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,348&NewFeatures, Env.Cov, &NewCov, CFPath,349/*Verbose=*/false, /*IsSetCoverMerge=*/false);350Env.Features.insert(NewFeatures.begin(), NewFeatures.end());351Env.Cov.insert(NewCov.begin(), NewCov.end());352RemoveFile(CFPath);353}354355if (Env.Group) {356for (auto &path : Env.Files)357Env.FilesSizes.push_back(FileSize(path));358}359360Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,361Env.Files.size(), Env.TempDir.c_str());362363int ExitCode = 0;364365JobQueue FuzzQ, MergeQ;366367auto StopJobs = [&]() {368for (int i = 0; i < NumJobs; i++)369FuzzQ.Push(nullptr);370MergeQ.Push(nullptr);371WriteToFile(Unit({1}), Env.StopFile());372};373374size_t MergeCycle = 20;375size_t JobExecuted = 0;376size_t JobId = 1;377std::vector<std::thread> Threads;378for (int t = 0; t < NumJobs; t++) {379Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));380FuzzQ.Push(Env.CreateNewJob(JobId++));381}382383while (true) {384std::unique_ptr<FuzzJob> Job(MergeQ.Pop());385if (!Job)386break;387ExitCode = Job->ExitCode;388if (ExitCode == Options.InterruptExitCode) {389Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());390StopJobs();391break;392}393Fuzzer::MaybeExitGracefully();394395Env.RunOneMergeJob(Job.get());396397// merge the corpus .398JobExecuted++;399if (Env.Group && JobExecuted >= MergeCycle) {400std::vector<SizedFile> CurrentSeedFiles;401for (auto &Dir : CorpusDirs)402GetSizedFilesFromDir(Dir, &CurrentSeedFiles);403std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());404405auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");406std::set<uint32_t> TmpNewFeatures, TmpNewCov;407std::set<uint32_t> TmpFeatures, TmpCov;408Env.Files.clear();409Env.FilesSizes.clear();410CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,411TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,412CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);413for (auto &path : Env.Files)414Env.FilesSizes.push_back(FileSize(path));415RemoveFile(CFPath);416JobExecuted = 0;417MergeCycle += 5;418}419420// Since the number of corpus seeds will gradually increase, in order to421// control the number in each group to be about three times the number of422// seeds selected each time, the number of groups is dynamically adjusted.423if (Env.Files.size() < 2000)424Env.NumCorpuses = 12;425else if (Env.Files.size() < 6000)426Env.NumCorpuses = 20;427else if (Env.Files.size() < 12000)428Env.NumCorpuses = 32;429else if (Env.Files.size() < 16000)430Env.NumCorpuses = 40;431else if (Env.Files.size() < 24000)432Env.NumCorpuses = 60;433else434Env.NumCorpuses = 80;435436// Continue if our crash is one of the ignored ones.437if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)438Env.NumTimeouts++;439else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)440Env.NumOOMs++;441else if (ExitCode != 0) {442Env.NumCrashes++;443if (Options.IgnoreCrashes) {444std::ifstream In(Job->LogPath);445std::string Line;446while (std::getline(In, Line, '\n'))447if (Line.find("ERROR:") != Line.npos ||448Line.find("runtime error:") != Line.npos)449Printf("%s\n", Line.c_str());450} else {451// And exit if we don't ignore this crash.452Printf("INFO: log from the inner process:\n%s",453FileToString(Job->LogPath).c_str());454StopJobs();455break;456}457}458459// Stop if we are over the time budget.460// This is not precise, since other threads are still running461// and we will wait while joining them.462// We also don't stop instantly: other jobs need to finish.463if (Options.MaxTotalTimeSec > 0 &&464Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {465Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",466Env.secondsSinceProcessStartUp());467StopJobs();468break;469}470if (Env.NumRuns >= Options.MaxNumberOfRuns) {471Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",472Env.NumRuns);473StopJobs();474break;475}476477FuzzQ.Push(Env.CreateNewJob(JobId++));478}479480for (auto &T : Threads)481T.join();482483// The workers have terminated. Don't try to remove the directory before they484// terminate to avoid a race condition preventing cleanup on Windows.485RmDirRecursive(Env.TempDir);486487// Use the exit code from the last child process.488Printf("INFO: exiting: %d time: %zds\n", ExitCode,489Env.secondsSinceProcessStartUp());490exit(ExitCode);491}492493} // namespace fuzzer494495496