Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
35266 views
1
//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of SanitizerBinaryMetadata.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14
#include "llvm/ADT/SetVector.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include "llvm/ADT/Statistic.h"
17
#include "llvm/ADT/StringExtras.h"
18
#include "llvm/ADT/StringRef.h"
19
#include "llvm/ADT/Twine.h"
20
#include "llvm/Analysis/CaptureTracking.h"
21
#include "llvm/Analysis/ValueTracking.h"
22
#include "llvm/IR/Constant.h"
23
#include "llvm/IR/DerivedTypes.h"
24
#include "llvm/IR/Function.h"
25
#include "llvm/IR/GlobalValue.h"
26
#include "llvm/IR/GlobalVariable.h"
27
#include "llvm/IR/IRBuilder.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/LLVMContext.h"
31
#include "llvm/IR/MDBuilder.h"
32
#include "llvm/IR/Metadata.h"
33
#include "llvm/IR/Module.h"
34
#include "llvm/IR/Type.h"
35
#include "llvm/IR/Value.h"
36
#include "llvm/ProfileData/InstrProf.h"
37
#include "llvm/Support/Allocator.h"
38
#include "llvm/Support/CommandLine.h"
39
#include "llvm/Support/Debug.h"
40
#include "llvm/Support/SpecialCaseList.h"
41
#include "llvm/Support/StringSaver.h"
42
#include "llvm/Support/VirtualFileSystem.h"
43
#include "llvm/TargetParser/Triple.h"
44
#include "llvm/Transforms/Utils/ModuleUtils.h"
45
46
#include <array>
47
#include <cstdint>
48
#include <memory>
49
50
using namespace llvm;
51
52
#define DEBUG_TYPE "sanmd"
53
54
namespace {
55
56
//===--- Constants --------------------------------------------------------===//
57
58
constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits
59
constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
60
constexpr int kCtorDtorPriority = 2;
61
62
// Pairs of names of initialization callback functions and which section
63
// contains the relevant metadata.
64
class MetadataInfo {
65
public:
66
const StringRef FunctionPrefix;
67
const StringRef SectionSuffix;
68
69
static const MetadataInfo Covered;
70
static const MetadataInfo Atomics;
71
72
private:
73
// Forbid construction elsewhere.
74
explicit constexpr MetadataInfo(StringRef FunctionPrefix,
75
StringRef SectionSuffix)
76
: FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
77
};
78
const MetadataInfo MetadataInfo::Covered{
79
"__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
80
const MetadataInfo MetadataInfo::Atomics{
81
"__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
82
83
// The only instances of MetadataInfo are the constants above, so a set of
84
// them may simply store pointers to them. To deterministically generate code,
85
// we need to use a set with stable iteration order, such as SetVector.
86
using MetadataInfoSet = SetVector<const MetadataInfo *>;
87
88
//===--- Command-line options ---------------------------------------------===//
89
90
cl::opt<bool> ClWeakCallbacks(
91
"sanitizer-metadata-weak-callbacks",
92
cl::desc("Declare callbacks extern weak, and only call if non-null."),
93
cl::Hidden, cl::init(true));
94
cl::opt<bool>
95
ClNoSanitize("sanitizer-metadata-nosanitize-attr",
96
cl::desc("Mark some metadata features uncovered in functions "
97
"with associated no_sanitize attributes."),
98
cl::Hidden, cl::init(true));
99
100
cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
101
cl::desc("Emit PCs for covered functions."),
102
cl::Hidden, cl::init(false));
103
cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
104
cl::desc("Emit PCs for atomic operations."),
105
cl::Hidden, cl::init(false));
106
cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
107
cl::desc("Emit PCs for start of functions that are "
108
"subject for use-after-return checking"),
109
cl::Hidden, cl::init(false));
110
111
//===--- Statistics -------------------------------------------------------===//
112
113
STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
114
STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
115
STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
116
117
//===----------------------------------------------------------------------===//
118
119
// Apply opt overrides.
120
SanitizerBinaryMetadataOptions &&
121
transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
122
Opts.Covered |= ClEmitCovered;
123
Opts.Atomics |= ClEmitAtomics;
124
Opts.UAR |= ClEmitUAR;
125
return std::move(Opts);
126
}
127
128
class SanitizerBinaryMetadata {
129
public:
130
SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
131
std::unique_ptr<SpecialCaseList> Ignorelist)
132
: Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
133
Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
134
VersionStr(utostr(getVersion())), IRB(M.getContext()) {
135
// FIXME: Make it work with other formats.
136
assert(TargetTriple.isOSBinFormatELF() && "ELF only");
137
assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
138
"Device targets are not supported");
139
}
140
141
bool run();
142
143
private:
144
uint32_t getVersion() const {
145
uint32_t Version = kVersionBase;
146
const auto CM = Mod.getCodeModel();
147
if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
148
Version |= kVersionPtrSizeRel;
149
return Version;
150
}
151
152
void runOn(Function &F, MetadataInfoSet &MIS);
153
154
// Determines which set of metadata to collect for this instruction.
155
//
156
// Returns true if covered metadata is required to unambiguously interpret
157
// other metadata. For example, if we are interested in atomics metadata, any
158
// function with memory operations (atomic or not) requires covered metadata
159
// to determine if a memory operation is atomic or not in modules compiled
160
// with SanitizerBinaryMetadata.
161
bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
162
uint64_t &FeatureMask);
163
164
// Get start/end section marker pointer.
165
GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
166
167
// Returns the target-dependent section name.
168
StringRef getSectionName(StringRef SectionSuffix);
169
170
// Returns the section start marker name.
171
StringRef getSectionStart(StringRef SectionSuffix);
172
173
// Returns the section end marker name.
174
StringRef getSectionEnd(StringRef SectionSuffix);
175
176
// Returns true if the access to the address should be considered "atomic".
177
bool pretendAtomicAccess(const Value *Addr);
178
179
Module &Mod;
180
const SanitizerBinaryMetadataOptions Options;
181
std::unique_ptr<SpecialCaseList> Ignorelist;
182
const Triple TargetTriple;
183
const std::string VersionStr;
184
IRBuilder<> IRB;
185
BumpPtrAllocator Alloc;
186
UniqueStringSaver StringPool{Alloc};
187
};
188
189
bool SanitizerBinaryMetadata::run() {
190
MetadataInfoSet MIS;
191
192
for (Function &F : Mod)
193
runOn(F, MIS);
194
195
if (MIS.empty())
196
return false;
197
198
//
199
// Setup constructors and call all initialization functions for requested
200
// metadata features.
201
//
202
203
auto *PtrTy = IRB.getPtrTy();
204
auto *Int32Ty = IRB.getInt32Ty();
205
const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
206
auto *Version = ConstantInt::get(Int32Ty, getVersion());
207
208
for (const MetadataInfo *MI : MIS) {
209
const std::array<Value *, InitTypes.size()> InitArgs = {
210
Version,
211
getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
212
getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
213
};
214
215
// Calls to the initialization functions with different versions cannot be
216
// merged. Give the structors unique names based on the version, which will
217
// also be used as the COMDAT key.
218
const std::string StructorPrefix = (MI->FunctionPrefix + VersionStr).str();
219
220
// We declare the _add and _del functions as weak, and only call them if
221
// there is a valid symbol linked. This allows building binaries with
222
// semantic metadata, but without having callbacks. When a tool that wants
223
// the metadata is linked which provides the callbacks, they will be called.
224
Function *Ctor =
225
createSanitizerCtorAndInitFunctions(
226
Mod, StructorPrefix + ".module_ctor",
227
(MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
228
/*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
229
.first;
230
Function *Dtor =
231
createSanitizerCtorAndInitFunctions(
232
Mod, StructorPrefix + ".module_dtor",
233
(MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
234
/*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
235
.first;
236
Constant *CtorComdatKey = nullptr;
237
Constant *DtorComdatKey = nullptr;
238
if (TargetTriple.supportsCOMDAT()) {
239
// Use COMDAT to deduplicate constructor/destructor function. The COMDAT
240
// key needs to be a non-local linkage.
241
Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
242
Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
243
Ctor->setLinkage(GlobalValue::ExternalLinkage);
244
Dtor->setLinkage(GlobalValue::ExternalLinkage);
245
// DSOs should _not_ call another constructor/destructor!
246
Ctor->setVisibility(GlobalValue::HiddenVisibility);
247
Dtor->setVisibility(GlobalValue::HiddenVisibility);
248
CtorComdatKey = Ctor;
249
DtorComdatKey = Dtor;
250
}
251
appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
252
appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
253
}
254
255
return true;
256
}
257
258
void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
259
if (F.empty())
260
return;
261
if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
262
return;
263
if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
264
return;
265
// Don't touch available_externally functions, their actual body is elsewhere.
266
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
267
return;
268
269
MDBuilder MDB(F.getContext());
270
271
// The metadata features enabled for this function, stored along covered
272
// metadata (if enabled).
273
uint64_t FeatureMask = 0;
274
// Don't emit unnecessary covered metadata for all functions to save space.
275
bool RequiresCovered = false;
276
277
if (Options.Atomics || Options.UAR) {
278
for (BasicBlock &BB : F)
279
for (Instruction &I : BB)
280
RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
281
}
282
283
if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
284
FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
285
if (F.isVarArg())
286
FeatureMask &= ~kSanitizerBinaryMetadataUAR;
287
if (FeatureMask & kSanitizerBinaryMetadataUAR) {
288
RequiresCovered = true;
289
NumMetadataUAR++;
290
}
291
292
// Covered metadata is always emitted if explicitly requested, otherwise only
293
// if some other metadata requires it to unambiguously interpret it for
294
// modules compiled with SanitizerBinaryMetadata.
295
if (Options.Covered || (FeatureMask && RequiresCovered)) {
296
NumMetadataCovered++;
297
const auto *MI = &MetadataInfo::Covered;
298
MIS.insert(MI);
299
const StringRef Section = getSectionName(MI->SectionSuffix);
300
// The feature mask will be placed after the function size.
301
Constant *CFM = IRB.getInt64(FeatureMask);
302
F.setMetadata(LLVMContext::MD_pcsections,
303
MDB.createPCSections({{Section, {CFM}}}));
304
}
305
}
306
307
bool isUARSafeCall(CallInst *CI) {
308
auto *F = CI->getCalledFunction();
309
// There are no intrinsic functions that leak arguments.
310
// If the called function does not return, the current function
311
// does not return as well, so no possibility of use-after-return.
312
// Sanitizer function also don't leak or don't return.
313
// It's safe to both pass pointers to local variables to them
314
// and to tail-call them.
315
return F && (F->isIntrinsic() || F->doesNotReturn() ||
316
F->getName().starts_with("__asan_") ||
317
F->getName().starts_with("__hwsan_") ||
318
F->getName().starts_with("__ubsan_") ||
319
F->getName().starts_with("__msan_") ||
320
F->getName().starts_with("__tsan_"));
321
}
322
323
bool hasUseAfterReturnUnsafeUses(Value &V) {
324
for (User *U : V.users()) {
325
if (auto *I = dyn_cast<Instruction>(U)) {
326
if (I->isLifetimeStartOrEnd() || I->isDroppable())
327
continue;
328
if (auto *CI = dyn_cast<CallInst>(U)) {
329
if (isUARSafeCall(CI))
330
continue;
331
}
332
if (isa<LoadInst>(U))
333
continue;
334
if (auto *SI = dyn_cast<StoreInst>(U)) {
335
// If storing TO the alloca, then the address isn't taken.
336
if (SI->getOperand(1) == &V)
337
continue;
338
}
339
if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
340
if (!hasUseAfterReturnUnsafeUses(*GEPI))
341
continue;
342
} else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
343
if (!hasUseAfterReturnUnsafeUses(*BCI))
344
continue;
345
}
346
}
347
return true;
348
}
349
return false;
350
}
351
352
bool useAfterReturnUnsafe(Instruction &I) {
353
if (isa<AllocaInst>(I))
354
return hasUseAfterReturnUnsafeUses(I);
355
// Tail-called functions are not necessary intercepted
356
// at runtime because there is no call instruction.
357
// So conservatively mark the caller as requiring checking.
358
else if (auto *CI = dyn_cast<CallInst>(&I))
359
return CI->isTailCall() && !isUARSafeCall(CI);
360
return false;
361
}
362
363
bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
364
if (!Addr)
365
return false;
366
367
Addr = Addr->stripInBoundsOffsets();
368
auto *GV = dyn_cast<GlobalVariable>(Addr);
369
if (!GV)
370
return false;
371
372
// Some compiler-generated accesses are known racy, to avoid false positives
373
// in data-race analysis pretend they're atomic.
374
if (GV->hasSection()) {
375
const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
376
const auto ProfSec =
377
getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
378
if (GV->getSection().ends_with(ProfSec))
379
return true;
380
}
381
if (GV->getName().starts_with("__llvm_gcov") ||
382
GV->getName().starts_with("__llvm_gcda"))
383
return true;
384
385
return false;
386
}
387
388
// Returns true if the memory at `Addr` may be shared with other threads.
389
bool maybeSharedMutable(const Value *Addr) {
390
// By default assume memory may be shared.
391
if (!Addr)
392
return true;
393
394
if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
395
!PointerMayBeCaptured(Addr, true, true))
396
return false; // Object is on stack but does not escape.
397
398
Addr = Addr->stripInBoundsOffsets();
399
if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
400
if (GV->isConstant())
401
return false; // Shared, but not mutable.
402
}
403
404
return true;
405
}
406
407
bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
408
MDBuilder &MDB, uint64_t &FeatureMask) {
409
SmallVector<const MetadataInfo *, 1> InstMetadata;
410
bool RequiresCovered = false;
411
412
// Only call if at least 1 type of metadata is requested.
413
assert(Options.UAR || Options.Atomics);
414
415
if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
416
if (useAfterReturnUnsafe(I))
417
FeatureMask |= kSanitizerBinaryMetadataUAR;
418
}
419
420
if (Options.Atomics) {
421
const Value *Addr = nullptr;
422
if (auto *SI = dyn_cast<StoreInst>(&I))
423
Addr = SI->getPointerOperand();
424
else if (auto *LI = dyn_cast<LoadInst>(&I))
425
Addr = LI->getPointerOperand();
426
427
if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
428
auto SSID = getAtomicSyncScopeID(&I);
429
if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
430
pretendAtomicAccess(Addr)) {
431
NumMetadataAtomics++;
432
InstMetadata.push_back(&MetadataInfo::Atomics);
433
}
434
FeatureMask |= kSanitizerBinaryMetadataAtomics;
435
RequiresCovered = true;
436
}
437
}
438
439
// Attach MD_pcsections to instruction.
440
if (!InstMetadata.empty()) {
441
MIS.insert(InstMetadata.begin(), InstMetadata.end());
442
SmallVector<MDBuilder::PCSection, 1> Sections;
443
for (const auto &MI : InstMetadata)
444
Sections.push_back({getSectionName(MI->SectionSuffix), {}});
445
I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
446
}
447
448
return RequiresCovered;
449
}
450
451
GlobalVariable *
452
SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
453
// Use ExternalWeak so that if all sections are discarded due to section
454
// garbage collection, the linker will not report undefined symbol errors.
455
auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
456
GlobalVariable::ExternalWeakLinkage,
457
/*Initializer=*/nullptr, MarkerName);
458
Marker->setVisibility(GlobalValue::HiddenVisibility);
459
return Marker;
460
}
461
462
StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
463
// FIXME: Other TargetTriples.
464
// Request ULEB128 encoding for all integer constants.
465
return StringPool.save(SectionSuffix + VersionStr + "!C");
466
}
467
468
StringRef SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
469
// Twine only concatenates 2 strings; with >2 strings, concatenating them
470
// creates Twine temporaries, and returning the final Twine no longer works
471
// because we'd end up with a stack-use-after-return. So here we also use the
472
// StringPool to store the new string.
473
return StringPool.save("__start_" + SectionSuffix + VersionStr);
474
}
475
476
StringRef SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
477
return StringPool.save("__stop_" + SectionSuffix + VersionStr);
478
}
479
480
} // namespace
481
482
SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
483
SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
484
: Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
485
486
PreservedAnalyses
487
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
488
std::unique_ptr<SpecialCaseList> Ignorelist;
489
if (!IgnorelistFiles.empty()) {
490
Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
491
*vfs::getRealFileSystem());
492
if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
493
return PreservedAnalyses::all();
494
}
495
496
SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
497
if (Pass.run())
498
return PreservedAnalyses::none();
499
return PreservedAnalyses::all();
500
}
501
502