Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/fuzzer/FuzzerFork.cpp
35262 views
1
//===- FuzzerFork.cpp - run fuzzing in separate subprocesses --------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// Spawn and orchestrate separate fuzzing processes.
9
//===----------------------------------------------------------------------===//
10
11
#include "FuzzerCommand.h"
12
#include "FuzzerFork.h"
13
#include "FuzzerIO.h"
14
#include "FuzzerInternal.h"
15
#include "FuzzerMerge.h"
16
#include "FuzzerSHA1.h"
17
#include "FuzzerTracePC.h"
18
#include "FuzzerUtil.h"
19
20
#include <atomic>
21
#include <chrono>
22
#include <condition_variable>
23
#include <fstream>
24
#include <memory>
25
#include <mutex>
26
#include <queue>
27
#include <sstream>
28
#include <thread>
29
30
namespace fuzzer {
31
32
struct Stats {
33
size_t number_of_executed_units = 0;
34
size_t peak_rss_mb = 0;
35
size_t average_exec_per_sec = 0;
36
};
37
38
static Stats ParseFinalStatsFromLog(const std::string &LogPath) {
39
std::ifstream In(LogPath);
40
std::string Line;
41
Stats Res;
42
struct {
43
const char *Name;
44
size_t *Var;
45
} NameVarPairs[] = {
46
{"stat::number_of_executed_units:", &Res.number_of_executed_units},
47
{"stat::peak_rss_mb:", &Res.peak_rss_mb},
48
{"stat::average_exec_per_sec:", &Res.average_exec_per_sec},
49
{nullptr, nullptr},
50
};
51
while (std::getline(In, Line, '\n')) {
52
if (Line.find("stat::") != 0) continue;
53
std::istringstream ISS(Line);
54
std::string Name;
55
size_t Val;
56
ISS >> Name >> Val;
57
for (size_t i = 0; NameVarPairs[i].Name; i++)
58
if (Name == NameVarPairs[i].Name)
59
*NameVarPairs[i].Var = Val;
60
}
61
return Res;
62
}
63
64
struct FuzzJob {
65
// Inputs.
66
Command Cmd;
67
std::string CorpusDir;
68
std::string FeaturesDir;
69
std::string LogPath;
70
std::string SeedListPath;
71
std::string CFPath;
72
size_t JobId;
73
74
int DftTimeInSeconds = 0;
75
76
// Fuzzing Outputs.
77
int ExitCode;
78
79
~FuzzJob() {
80
RemoveFile(CFPath);
81
RemoveFile(LogPath);
82
RemoveFile(SeedListPath);
83
RmDirRecursive(CorpusDir);
84
RmDirRecursive(FeaturesDir);
85
}
86
};
87
88
struct GlobalEnv {
89
std::vector<std::string> Args;
90
std::vector<std::string> CorpusDirs;
91
std::string MainCorpusDir;
92
std::string TempDir;
93
std::string DFTDir;
94
std::string DataFlowBinary;
95
std::set<uint32_t> Features, Cov;
96
std::set<std::string> FilesWithDFT;
97
std::vector<std::string> Files;
98
std::vector<std::size_t> FilesSizes;
99
Random *Rand;
100
std::chrono::system_clock::time_point ProcessStartTime;
101
int Verbosity = 0;
102
int Group = 0;
103
int NumCorpuses = 8;
104
105
size_t NumTimeouts = 0;
106
size_t NumOOMs = 0;
107
size_t NumCrashes = 0;
108
109
110
size_t NumRuns = 0;
111
112
std::string StopFile() { return DirPlusFile(TempDir, "STOP"); }
113
114
size_t secondsSinceProcessStartUp() const {
115
return std::chrono::duration_cast<std::chrono::seconds>(
116
std::chrono::system_clock::now() - ProcessStartTime)
117
.count();
118
}
119
120
FuzzJob *CreateNewJob(size_t JobId) {
121
Command Cmd(Args);
122
Cmd.removeFlag("fork");
123
Cmd.removeFlag("runs");
124
Cmd.removeFlag("collect_data_flow");
125
for (auto &C : CorpusDirs) // Remove all corpora from the args.
126
Cmd.removeArgument(C);
127
Cmd.addFlag("reload", "0"); // working in an isolated dir, no reload.
128
Cmd.addFlag("print_final_stats", "1");
129
Cmd.addFlag("print_funcs", "0"); // no need to spend time symbolizing.
130
Cmd.addFlag("max_total_time", std::to_string(std::min((size_t)300, JobId)));
131
Cmd.addFlag("stop_file", StopFile());
132
if (!DataFlowBinary.empty()) {
133
Cmd.addFlag("data_flow_trace", DFTDir);
134
if (!Cmd.hasFlag("focus_function"))
135
Cmd.addFlag("focus_function", "auto");
136
}
137
auto Job = new FuzzJob;
138
std::string Seeds;
139
if (size_t CorpusSubsetSize =
140
std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
141
auto Time1 = std::chrono::system_clock::now();
142
if (Group) { // whether to group the corpus.
143
size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
144
size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
145
for (size_t i = 0; i < CorpusSubsetSize; i++) {
146
size_t RandNum = (*Rand)(AverageCorpusSize);
147
size_t Index = RandNum + StartIndex;
148
Index = Index < Files.size() ? Index
149
: Rand->SkewTowardsLast(Files.size());
150
auto &SF = Files[Index];
151
Seeds += (Seeds.empty() ? "" : ",") + SF;
152
CollectDFT(SF);
153
}
154
} else {
155
for (size_t i = 0; i < CorpusSubsetSize; i++) {
156
auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
157
Seeds += (Seeds.empty() ? "" : ",") + SF;
158
CollectDFT(SF);
159
}
160
}
161
auto Time2 = std::chrono::system_clock::now();
162
auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
163
assert(DftTimeInSeconds < std::numeric_limits<int>::max());
164
Job->DftTimeInSeconds = static_cast<int>(DftTimeInSeconds);
165
}
166
if (!Seeds.empty()) {
167
Job->SeedListPath =
168
DirPlusFile(TempDir, std::to_string(JobId) + ".seeds");
169
WriteToFile(Seeds, Job->SeedListPath);
170
Cmd.addFlag("seed_inputs", "@" + Job->SeedListPath);
171
}
172
Job->LogPath = DirPlusFile(TempDir, std::to_string(JobId) + ".log");
173
Job->CorpusDir = DirPlusFile(TempDir, "C" + std::to_string(JobId));
174
Job->FeaturesDir = DirPlusFile(TempDir, "F" + std::to_string(JobId));
175
Job->CFPath = DirPlusFile(TempDir, std::to_string(JobId) + ".merge");
176
Job->JobId = JobId;
177
178
179
Cmd.addArgument(Job->CorpusDir);
180
Cmd.addFlag("features_dir", Job->FeaturesDir);
181
182
for (auto &D : {Job->CorpusDir, Job->FeaturesDir}) {
183
RmDirRecursive(D);
184
MkDir(D);
185
}
186
187
Cmd.setOutputFile(Job->LogPath);
188
Cmd.combineOutAndErr();
189
190
Job->Cmd = Cmd;
191
192
if (Verbosity >= 2)
193
Printf("Job %zd/%p Created: %s\n", JobId, Job,
194
Job->Cmd.toString().c_str());
195
// Start from very short runs and gradually increase them.
196
return Job;
197
}
198
199
void RunOneMergeJob(FuzzJob *Job) {
200
auto Stats = ParseFinalStatsFromLog(Job->LogPath);
201
NumRuns += Stats.number_of_executed_units;
202
203
std::vector<SizedFile> TempFiles, MergeCandidates;
204
// Read all newly created inputs and their feature sets.
205
// Choose only those inputs that have new features.
206
GetSizedFilesFromDir(Job->CorpusDir, &TempFiles);
207
std::sort(TempFiles.begin(), TempFiles.end());
208
for (auto &F : TempFiles) {
209
auto FeatureFile = F.File;
210
FeatureFile.replace(0, Job->CorpusDir.size(), Job->FeaturesDir);
211
auto FeatureBytes = FileToVector(FeatureFile, 0, false);
212
assert((FeatureBytes.size() % sizeof(uint32_t)) == 0);
213
std::vector<uint32_t> NewFeatures(FeatureBytes.size() / sizeof(uint32_t));
214
memcpy(NewFeatures.data(), FeatureBytes.data(), FeatureBytes.size());
215
for (auto Ft : NewFeatures) {
216
if (!Features.count(Ft)) {
217
MergeCandidates.push_back(F);
218
break;
219
}
220
}
221
}
222
// if (!FilesToAdd.empty() || Job->ExitCode != 0)
223
Printf("#%zd: cov: %zd ft: %zd corp: %zd exec/s: %zd "
224
"oom/timeout/crash: %zd/%zd/%zd time: %zds job: %zd dft_time: %d\n",
225
NumRuns, Cov.size(), Features.size(), Files.size(),
226
Stats.average_exec_per_sec, NumOOMs, NumTimeouts, NumCrashes,
227
secondsSinceProcessStartUp(), Job->JobId, Job->DftTimeInSeconds);
228
229
if (MergeCandidates.empty()) return;
230
231
std::vector<std::string> FilesToAdd;
232
std::set<uint32_t> NewFeatures, NewCov;
233
bool IsSetCoverMerge =
234
!Job->Cmd.getFlagValue("set_cover_merge").compare("1");
235
CrashResistantMerge(Args, {}, MergeCandidates, &FilesToAdd, Features,
236
&NewFeatures, Cov, &NewCov, Job->CFPath, false,
237
IsSetCoverMerge);
238
for (auto &Path : FilesToAdd) {
239
auto U = FileToVector(Path);
240
auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
241
WriteToFile(U, NewPath);
242
if (Group) { // Insert the queue according to the size of the seed.
243
size_t UnitSize = U.size();
244
auto Idx =
245
std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
246
FilesSizes.begin();
247
FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
248
Files.insert(Files.begin() + Idx, NewPath);
249
} else {
250
Files.push_back(NewPath);
251
}
252
}
253
Features.insert(NewFeatures.begin(), NewFeatures.end());
254
Cov.insert(NewCov.begin(), NewCov.end());
255
for (auto Idx : NewCov)
256
if (auto *TE = TPC.PCTableEntryByIdx(Idx))
257
if (TPC.PcIsFuncEntry(TE))
258
PrintPC(" NEW_FUNC: %p %F %L\n", "",
259
TPC.GetNextInstructionPc(TE->PC));
260
}
261
262
void CollectDFT(const std::string &InputPath) {
263
if (DataFlowBinary.empty()) return;
264
if (!FilesWithDFT.insert(InputPath).second) return;
265
Command Cmd(Args);
266
Cmd.removeFlag("fork");
267
Cmd.removeFlag("runs");
268
Cmd.addFlag("data_flow_trace", DFTDir);
269
Cmd.addArgument(InputPath);
270
for (auto &C : CorpusDirs) // Remove all corpora from the args.
271
Cmd.removeArgument(C);
272
Cmd.setOutputFile(DirPlusFile(TempDir, "dft.log"));
273
Cmd.combineOutAndErr();
274
// Printf("CollectDFT: %s\n", Cmd.toString().c_str());
275
ExecuteCommand(Cmd);
276
}
277
278
};
279
280
struct JobQueue {
281
std::queue<FuzzJob *> Qu;
282
std::mutex Mu;
283
std::condition_variable Cv;
284
285
void Push(FuzzJob *Job) {
286
{
287
std::lock_guard<std::mutex> Lock(Mu);
288
Qu.push(Job);
289
}
290
Cv.notify_one();
291
}
292
FuzzJob *Pop() {
293
std::unique_lock<std::mutex> Lk(Mu);
294
// std::lock_guard<std::mutex> Lock(Mu);
295
Cv.wait(Lk, [&]{return !Qu.empty();});
296
assert(!Qu.empty());
297
auto Job = Qu.front();
298
Qu.pop();
299
return Job;
300
}
301
};
302
303
void WorkerThread(JobQueue *FuzzQ, JobQueue *MergeQ) {
304
while (auto Job = FuzzQ->Pop()) {
305
// Printf("WorkerThread: job %p\n", Job);
306
Job->ExitCode = ExecuteCommand(Job->Cmd);
307
MergeQ->Push(Job);
308
}
309
}
310
311
// This is just a skeleton of an experimental -fork=1 feature.
312
void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
313
const std::vector<std::string> &Args,
314
const std::vector<std::string> &CorpusDirs, int NumJobs) {
315
Printf("INFO: -fork=%d: fuzzing in separate process(s)\n", NumJobs);
316
317
GlobalEnv Env;
318
Env.Args = Args;
319
Env.CorpusDirs = CorpusDirs;
320
Env.Rand = &Rand;
321
Env.Verbosity = Options.Verbosity;
322
Env.ProcessStartTime = std::chrono::system_clock::now();
323
Env.DataFlowBinary = Options.CollectDataFlow;
324
Env.Group = Options.ForkCorpusGroups;
325
326
std::vector<SizedFile> SeedFiles;
327
for (auto &Dir : CorpusDirs)
328
GetSizedFilesFromDir(Dir, &SeedFiles);
329
std::sort(SeedFiles.begin(), SeedFiles.end());
330
Env.TempDir = TempPath("FuzzWithFork", ".dir");
331
Env.DFTDir = DirPlusFile(Env.TempDir, "DFT");
332
RmDirRecursive(Env.TempDir); // in case there is a leftover from old runs.
333
MkDir(Env.TempDir);
334
MkDir(Env.DFTDir);
335
336
337
if (CorpusDirs.empty())
338
MkDir(Env.MainCorpusDir = DirPlusFile(Env.TempDir, "C"));
339
else
340
Env.MainCorpusDir = CorpusDirs[0];
341
342
if (Options.KeepSeed) {
343
for (auto &File : SeedFiles)
344
Env.Files.push_back(File.File);
345
} else {
346
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
347
std::set<uint32_t> NewFeatures, NewCov;
348
CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, Env.Features,
349
&NewFeatures, Env.Cov, &NewCov, CFPath,
350
/*Verbose=*/false, /*IsSetCoverMerge=*/false);
351
Env.Features.insert(NewFeatures.begin(), NewFeatures.end());
352
Env.Cov.insert(NewCov.begin(), NewCov.end());
353
RemoveFile(CFPath);
354
}
355
356
if (Env.Group) {
357
for (auto &path : Env.Files)
358
Env.FilesSizes.push_back(FileSize(path));
359
}
360
361
Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
362
Env.Files.size(), Env.TempDir.c_str());
363
364
int ExitCode = 0;
365
366
JobQueue FuzzQ, MergeQ;
367
368
auto StopJobs = [&]() {
369
for (int i = 0; i < NumJobs; i++)
370
FuzzQ.Push(nullptr);
371
MergeQ.Push(nullptr);
372
WriteToFile(Unit({1}), Env.StopFile());
373
};
374
375
size_t MergeCycle = 20;
376
size_t JobExecuted = 0;
377
size_t JobId = 1;
378
std::vector<std::thread> Threads;
379
for (int t = 0; t < NumJobs; t++) {
380
Threads.push_back(std::thread(WorkerThread, &FuzzQ, &MergeQ));
381
FuzzQ.Push(Env.CreateNewJob(JobId++));
382
}
383
384
while (true) {
385
std::unique_ptr<FuzzJob> Job(MergeQ.Pop());
386
if (!Job)
387
break;
388
ExitCode = Job->ExitCode;
389
if (ExitCode == Options.InterruptExitCode) {
390
Printf("==%lu== libFuzzer: a child was interrupted; exiting\n", GetPid());
391
StopJobs();
392
break;
393
}
394
Fuzzer::MaybeExitGracefully();
395
396
Env.RunOneMergeJob(Job.get());
397
398
// merge the corpus .
399
JobExecuted++;
400
if (Env.Group && JobExecuted >= MergeCycle) {
401
std::vector<SizedFile> CurrentSeedFiles;
402
for (auto &Dir : CorpusDirs)
403
GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
404
std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
405
406
auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
407
std::set<uint32_t> TmpNewFeatures, TmpNewCov;
408
std::set<uint32_t> TmpFeatures, TmpCov;
409
Env.Files.clear();
410
Env.FilesSizes.clear();
411
CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
412
TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
413
CFPath, /*Verbose=*/false, /*IsSetCoverMerge=*/false);
414
for (auto &path : Env.Files)
415
Env.FilesSizes.push_back(FileSize(path));
416
RemoveFile(CFPath);
417
JobExecuted = 0;
418
MergeCycle += 5;
419
}
420
421
// Since the number of corpus seeds will gradually increase, in order to
422
// control the number in each group to be about three times the number of
423
// seeds selected each time, the number of groups is dynamically adjusted.
424
if (Env.Files.size() < 2000)
425
Env.NumCorpuses = 12;
426
else if (Env.Files.size() < 6000)
427
Env.NumCorpuses = 20;
428
else if (Env.Files.size() < 12000)
429
Env.NumCorpuses = 32;
430
else if (Env.Files.size() < 16000)
431
Env.NumCorpuses = 40;
432
else if (Env.Files.size() < 24000)
433
Env.NumCorpuses = 60;
434
else
435
Env.NumCorpuses = 80;
436
437
// Continue if our crash is one of the ignored ones.
438
if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
439
Env.NumTimeouts++;
440
else if (Options.IgnoreOOMs && ExitCode == Options.OOMExitCode)
441
Env.NumOOMs++;
442
else if (ExitCode != 0) {
443
Env.NumCrashes++;
444
if (Options.IgnoreCrashes) {
445
std::ifstream In(Job->LogPath);
446
std::string Line;
447
while (std::getline(In, Line, '\n'))
448
if (Line.find("ERROR:") != Line.npos ||
449
Line.find("runtime error:") != Line.npos)
450
Printf("%s\n", Line.c_str());
451
} else {
452
// And exit if we don't ignore this crash.
453
Printf("INFO: log from the inner process:\n%s",
454
FileToString(Job->LogPath).c_str());
455
StopJobs();
456
break;
457
}
458
}
459
460
// Stop if we are over the time budget.
461
// This is not precise, since other threads are still running
462
// and we will wait while joining them.
463
// We also don't stop instantly: other jobs need to finish.
464
if (Options.MaxTotalTimeSec > 0 &&
465
Env.secondsSinceProcessStartUp() >= (size_t)Options.MaxTotalTimeSec) {
466
Printf("INFO: fuzzed for %zd seconds, wrapping up soon\n",
467
Env.secondsSinceProcessStartUp());
468
StopJobs();
469
break;
470
}
471
if (Env.NumRuns >= Options.MaxNumberOfRuns) {
472
Printf("INFO: fuzzed for %zd iterations, wrapping up soon\n",
473
Env.NumRuns);
474
StopJobs();
475
break;
476
}
477
478
FuzzQ.Push(Env.CreateNewJob(JobId++));
479
}
480
481
for (auto &T : Threads)
482
T.join();
483
484
// The workers have terminated. Don't try to remove the directory before they
485
// terminate to avoid a race condition preventing cleanup on Windows.
486
RmDirRecursive(Env.TempDir);
487
488
// Use the exit code from the last child process.
489
Printf("INFO: exiting: %d time: %zds\n", ExitCode,
490
Env.secondsSinceProcessStartUp());
491
exit(ExitCode);
492
}
493
494
} // namespace fuzzer
495
496