Path: blob/main/contrib/llvm-project/compiler-rt/lib/dfsan/dfsan.cpp
35233 views
//===-- dfsan.cpp ---------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is a part of DataFlowSanitizer.9//10// DataFlowSanitizer runtime. This file defines the public interface to11// DataFlowSanitizer as well as the definition of certain runtime functions12// called automatically by the compiler (specifically the instrumentation pass13// in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).14//15// The public interface is defined in include/sanitizer/dfsan_interface.h whose16// functions are prefixed dfsan_ while the compiler interface functions are17// prefixed __dfsan_.18//===----------------------------------------------------------------------===//1920#include "dfsan/dfsan.h"2122#include "dfsan/dfsan_chained_origin_depot.h"23#include "dfsan/dfsan_flags.h"24#include "dfsan/dfsan_origin.h"25#include "dfsan/dfsan_thread.h"26#include "sanitizer_common/sanitizer_atomic.h"27#include "sanitizer_common/sanitizer_common.h"28#include "sanitizer_common/sanitizer_file.h"29#include "sanitizer_common/sanitizer_flag_parser.h"30#include "sanitizer_common/sanitizer_flags.h"31#include "sanitizer_common/sanitizer_internal_defs.h"32#include "sanitizer_common/sanitizer_libc.h"33#include "sanitizer_common/sanitizer_report_decorator.h"34#include "sanitizer_common/sanitizer_stacktrace.h"35#if SANITIZER_LINUX36# include <sys/personality.h>37#endif3839using namespace __dfsan;4041Flags __dfsan::flags_data;4243// The size of TLS variables. These constants must be kept in sync with the ones44// in DataFlowSanitizer.cpp.45static const int kDFsanArgTlsSize = 800;46static const int kDFsanRetvalTlsSize = 800;47static const int kDFsanArgOriginTlsSize = 800;4849SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u6450__dfsan_retval_tls[kDFsanRetvalTlsSize / sizeof(u64)];51SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u32 __dfsan_retval_origin_tls;52SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u6453__dfsan_arg_tls[kDFsanArgTlsSize / sizeof(u64)];54SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL u3255__dfsan_arg_origin_tls[kDFsanArgOriginTlsSize / sizeof(u32)];5657// Instrumented code may set this value in terms of -dfsan-track-origins.58// * undefined or 0: do not track origins.59// * 1: track origins at memory store operations.60// * 2: track origins at memory load and store operations.61// TODO: track callsites.62extern "C" SANITIZER_WEAK_ATTRIBUTE const int __dfsan_track_origins;6364extern "C" SANITIZER_INTERFACE_ATTRIBUTE int dfsan_get_track_origins() {65return &__dfsan_track_origins ? __dfsan_track_origins : 0;66}6768// On Linux/x86_64, memory is laid out as follows:69//70// +--------------------+ 0x800000000000 (top of memory)71// | application 3 |72// +--------------------+ 0x70000000000073// | invalid |74// +--------------------+ 0x61000000000075// | origin 1 |76// +--------------------+ 0x60000000000077// | application 2 |78// +--------------------+ 0x51000000000079// | shadow 1 |80// +--------------------+ 0x50000000000081// | invalid |82// +--------------------+ 0x40000000000083// | origin 3 |84// +--------------------+ 0x30000000000085// | shadow 3 |86// +--------------------+ 0x20000000000087// | origin 2 |88// +--------------------+ 0x11000000000089// | invalid |90// +--------------------+ 0x10000000000091// | shadow 2 |92// +--------------------+ 0x01000000000093// | application 1 |94// +--------------------+ 0x00000000000095//96// MEM_TO_SHADOW(mem) = mem ^ 0x50000000000097// SHADOW_TO_ORIGIN(shadow) = shadow + 0x1000000000009899extern "C" SANITIZER_INTERFACE_ATTRIBUTE100dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {101dfsan_label label = ls[0];102for (uptr i = 1; i != n; ++i)103label |= ls[i];104return label;105}106107// Return the union of all the n labels from addr at the high 32 bit, and the108// origin of the first taint byte at the low 32 bit.109extern "C" SANITIZER_INTERFACE_ATTRIBUTE u64110__dfsan_load_label_and_origin(const void *addr, uptr n) {111dfsan_label label = 0;112u64 ret = 0;113uptr p = (uptr)addr;114dfsan_label *s = shadow_for((void *)p);115for (uptr i = 0; i < n; ++i) {116dfsan_label l = s[i];117if (!l)118continue;119label |= l;120if (!ret)121ret = *(dfsan_origin *)origin_for((void *)(p + i));122}123return ret | (u64)label << 32;124}125126extern "C" SANITIZER_INTERFACE_ATTRIBUTE127void __dfsan_unimplemented(char *fname) {128if (flags().warn_unimplemented)129Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",130fname);131}132133extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_wrapper_extern_weak_null(134const void *addr, char *fname) {135if (!addr)136Report(137"ERROR: DataFlowSanitizer: dfsan generated wrapper calling null "138"extern_weak function %s\nIf this only happens with dfsan, the "139"dfsan instrumentation pass may be accidentally optimizing out a "140"null check\n",141fname);142}143144// Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function145// to try to figure out where labels are being introduced in a nominally146// label-free program.147extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() {148if (flags().warn_nonzero_labels)149Report("WARNING: DataFlowSanitizer: saw nonzero label\n");150}151152// Indirect call to an uninstrumented vararg function. We don't have a way of153// handling these at the moment.154extern "C" SANITIZER_INTERFACE_ATTRIBUTE void155__dfsan_vararg_wrapper(const char *fname) {156Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "157"function %s\n", fname);158Die();159}160161// Resolves the union of two labels.162SANITIZER_INTERFACE_ATTRIBUTE dfsan_label163dfsan_union(dfsan_label l1, dfsan_label l2) {164return l1 | l2;165}166167static const uptr kOriginAlign = sizeof(dfsan_origin);168static const uptr kOriginAlignMask = ~(kOriginAlign - 1UL);169170static uptr OriginAlignUp(uptr u) {171return (u + kOriginAlign - 1) & kOriginAlignMask;172}173174static uptr OriginAlignDown(uptr u) { return u & kOriginAlignMask; }175176// Return the origin of the first taint byte in the size bytes from the address177// addr.178static dfsan_origin GetOriginIfTainted(uptr addr, uptr size) {179for (uptr i = 0; i < size; ++i, ++addr) {180dfsan_label *s = shadow_for((void *)addr);181182if (*s) {183// Validate address region.184CHECK(MEM_IS_SHADOW(s));185return *(dfsan_origin *)origin_for((void *)addr);186}187}188return 0;189}190191// For platforms which support slow unwinder only, we need to restrict the store192// context size to 1, basically only storing the current pc, because the slow193// unwinder which is based on libunwind is not async signal safe and causes194// random freezes in forking applications as well as in signal handlers.195// DFSan supports only Linux. So we do not restrict the store context size.196#define GET_STORE_STACK_TRACE_PC_BP(pc, bp) \197BufferedStackTrace stack; \198stack.Unwind(pc, bp, nullptr, true, flags().store_context_size);199200#define PRINT_CALLER_STACK_TRACE \201{ \202GET_CALLER_PC_BP; \203GET_STORE_STACK_TRACE_PC_BP(pc, bp) \204stack.Print(); \205}206207// Return a chain with the previous ID id and the current stack.208// from_init = true if this is the first chain of an origin tracking path.209static u32 ChainOrigin(u32 id, StackTrace *stack, bool from_init = false) {210// StackDepot is not async signal safe. Do not create new chains in a signal211// handler.212DFsanThread *t = GetCurrentThread();213if (t && t->InSignalHandler())214return id;215216// As an optimization the origin of an application byte is updated only when217// its shadow is non-zero. Because we are only interested in the origins of218// taint labels, it does not matter what origin a zero label has. This reduces219// memory write cost. MSan does similar optimization. The following invariant220// may not hold because of some bugs. We check the invariant to help debug.221if (!from_init && id == 0 && flags().check_origin_invariant) {222Printf(" DFSan found invalid origin invariant\n");223PRINT_CALLER_STACK_TRACE224}225226Origin o = Origin::FromRawId(id);227stack->tag = StackTrace::TAG_UNKNOWN;228Origin chained = Origin::CreateChainedOrigin(o, stack);229return chained.raw_id();230}231232static void ChainAndWriteOriginIfTainted(uptr src, uptr size, uptr dst,233StackTrace *stack) {234dfsan_origin o = GetOriginIfTainted(src, size);235if (o) {236o = ChainOrigin(o, stack);237*(dfsan_origin *)origin_for((void *)dst) = o;238}239}240241// Copy the origins of the size bytes from src to dst. The source and target242// memory ranges cannot be overlapped. This is used by memcpy. stack records the243// stack trace of the memcpy. When dst and src are not 4-byte aligned properly,244// origins at the unaligned address boundaries may be overwritten because four245// contiguous bytes share the same origin.246static void CopyOrigin(const void *dst, const void *src, uptr size,247StackTrace *stack) {248uptr d = (uptr)dst;249uptr beg = OriginAlignDown(d);250// Copy left unaligned origin if that memory is tainted.251if (beg < d) {252ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack);253beg += kOriginAlign;254}255256uptr end = OriginAlignDown(d + size);257// If both ends fall into the same 4-byte slot, we are done.258if (end < beg)259return;260261// Copy right unaligned origin if that memory is tainted.262if (end < d + size)263ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end,264stack);265266if (beg >= end)267return;268269// Align src up.270uptr src_a = OriginAlignUp((uptr)src);271dfsan_origin *src_o = origin_for((void *)src_a);272u32 *src_s = (u32 *)shadow_for((void *)src_a);273dfsan_origin *src_end = origin_for((void *)(src_a + (end - beg)));274dfsan_origin *dst_o = origin_for((void *)beg);275dfsan_origin last_src_o = 0;276dfsan_origin last_dst_o = 0;277for (; src_o < src_end; ++src_o, ++src_s, ++dst_o) {278if (!*src_s)279continue;280if (*src_o != last_src_o) {281last_src_o = *src_o;282last_dst_o = ChainOrigin(last_src_o, stack);283}284*dst_o = last_dst_o;285}286}287288// Copy the origins of the size bytes from src to dst. The source and target289// memory ranges may be overlapped. So the copy is done in a reverse order.290// This is used by memmove. stack records the stack trace of the memmove.291static void ReverseCopyOrigin(const void *dst, const void *src, uptr size,292StackTrace *stack) {293uptr d = (uptr)dst;294uptr end = OriginAlignDown(d + size);295296// Copy right unaligned origin if that memory is tainted.297if (end < d + size)298ChainAndWriteOriginIfTainted((uptr)src + (end - d), (d + size) - end, end,299stack);300301uptr beg = OriginAlignDown(d);302303if (beg + kOriginAlign < end) {304// Align src up.305uptr src_a = OriginAlignUp((uptr)src);306void *src_end = (void *)(src_a + end - beg - kOriginAlign);307dfsan_origin *src_end_o = origin_for(src_end);308u32 *src_end_s = (u32 *)shadow_for(src_end);309dfsan_origin *src_begin_o = origin_for((void *)src_a);310dfsan_origin *dst = origin_for((void *)(end - kOriginAlign));311dfsan_origin last_src_o = 0;312dfsan_origin last_dst_o = 0;313for (; src_end_o >= src_begin_o; --src_end_o, --src_end_s, --dst) {314if (!*src_end_s)315continue;316if (*src_end_o != last_src_o) {317last_src_o = *src_end_o;318last_dst_o = ChainOrigin(last_src_o, stack);319}320*dst = last_dst_o;321}322}323324// Copy left unaligned origin if that memory is tainted.325if (beg < d)326ChainAndWriteOriginIfTainted((uptr)src, beg + kOriginAlign - d, beg, stack);327}328329// Copy or move the origins of the len bytes from src to dst. The source and330// target memory ranges may or may not be overlapped. This is used by memory331// transfer operations. stack records the stack trace of the memory transfer332// operation.333static void MoveOrigin(const void *dst, const void *src, uptr size,334StackTrace *stack) {335// Validate address regions.336if (!MEM_IS_SHADOW(shadow_for(dst)) ||337!MEM_IS_SHADOW(shadow_for((void *)((uptr)dst + size))) ||338!MEM_IS_SHADOW(shadow_for(src)) ||339!MEM_IS_SHADOW(shadow_for((void *)((uptr)src + size)))) {340CHECK(false);341return;342}343// If destination origin range overlaps with source origin range, move344// origins by copying origins in a reverse order; otherwise, copy origins in345// a normal order. The orders of origin transfer are consistent with the346// orders of how memcpy and memmove transfer user data.347uptr src_aligned_beg = OriginAlignDown((uptr)src);348uptr src_aligned_end = OriginAlignDown((uptr)src + size);349uptr dst_aligned_beg = OriginAlignDown((uptr)dst);350if (dst_aligned_beg < src_aligned_end && dst_aligned_beg >= src_aligned_beg)351return ReverseCopyOrigin(dst, src, size, stack);352return CopyOrigin(dst, src, size, stack);353}354355// Set the size bytes from the addres dst to be the origin value.356static void SetOrigin(const void *dst, uptr size, u32 origin) {357if (size == 0)358return;359360// Origin mapping is 4 bytes per 4 bytes of application memory.361// Here we extend the range such that its left and right bounds are both362// 4 byte aligned.363uptr x = unaligned_origin_for((uptr)dst);364uptr beg = OriginAlignDown(x);365uptr end = OriginAlignUp(x + size); // align up.366u64 origin64 = ((u64)origin << 32) | origin;367// This is like memset, but the value is 32-bit. We unroll by 2 to write368// 64 bits at once. May want to unroll further to get 128-bit stores.369if (beg & 7ULL) {370if (*(u32 *)beg != origin)371*(u32 *)beg = origin;372beg += 4;373}374for (uptr addr = beg; addr < (end & ~7UL); addr += 8) {375if (*(u64 *)addr == origin64)376continue;377*(u64 *)addr = origin64;378}379if (end & 7ULL)380if (*(u32 *)(end - kOriginAlign) != origin)381*(u32 *)(end - kOriginAlign) = origin;382}383384#define RET_CHAIN_ORIGIN(id) \385GET_CALLER_PC_BP; \386GET_STORE_STACK_TRACE_PC_BP(pc, bp); \387return ChainOrigin(id, &stack);388389// Return a new origin chain with the previous ID id and the current stack390// trace.391extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin392__dfsan_chain_origin(dfsan_origin id) {393RET_CHAIN_ORIGIN(id)394}395396// Return a new origin chain with the previous ID id and the current stack397// trace if the label is tainted.398extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin399__dfsan_chain_origin_if_tainted(dfsan_label label, dfsan_origin id) {400if (!label)401return id;402RET_CHAIN_ORIGIN(id)403}404405// Copy or move the origins of the len bytes from src to dst.406extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_mem_origin_transfer(407const void *dst, const void *src, uptr len) {408if (src == dst)409return;410GET_CALLER_PC_BP;411GET_STORE_STACK_TRACE_PC_BP(pc, bp);412MoveOrigin(dst, src, len, &stack);413}414415extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_origin_transfer(416const void *dst, const void *src, uptr len) {417__dfsan_mem_origin_transfer(dst, src, len);418}419420static void CopyShadow(void *dst, const void *src, uptr len) {421internal_memcpy((void *)__dfsan::shadow_for(dst),422(const void *)__dfsan::shadow_for(src),423len * sizeof(dfsan_label));424}425426extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_mem_shadow_transfer(427void *dst, const void *src, uptr len) {428CopyShadow(dst, src, len);429}430431// Copy shadow and origins of the len bytes from src to dst.432extern "C" SANITIZER_INTERFACE_ATTRIBUTE void433__dfsan_mem_shadow_origin_transfer(void *dst, const void *src, uptr size) {434if (src == dst)435return;436CopyShadow(dst, src, size);437if (dfsan_get_track_origins()) {438// Duplicating code instead of calling __dfsan_mem_origin_transfer439// so that the getting the caller stack frame works correctly.440GET_CALLER_PC_BP;441GET_STORE_STACK_TRACE_PC_BP(pc, bp);442MoveOrigin(dst, src, size, &stack);443}444}445446// Copy shadow and origins as per __atomic_compare_exchange.447extern "C" SANITIZER_INTERFACE_ATTRIBUTE void448__dfsan_mem_shadow_origin_conditional_exchange(u8 condition, void *target,449void *expected,450const void *desired, uptr size) {451void *dst;452const void *src;453// condition is result of native call to __atomic_compare_exchange454if (condition) {455// Copy desired into target456dst = target;457src = desired;458} else {459// Copy target into expected460dst = expected;461src = target;462}463if (src == dst)464return;465CopyShadow(dst, src, size);466if (dfsan_get_track_origins()) {467// Duplicating code instead of calling __dfsan_mem_origin_transfer468// so that the getting the caller stack frame works correctly.469GET_CALLER_PC_BP;470GET_STORE_STACK_TRACE_PC_BP(pc, bp);471MoveOrigin(dst, src, size, &stack);472}473}474475namespace __dfsan {476477bool dfsan_inited = false;478bool dfsan_init_is_running = false;479480void dfsan_copy_memory(void *dst, const void *src, uptr size) {481internal_memcpy(dst, src, size);482dfsan_mem_shadow_transfer(dst, src, size);483if (dfsan_get_track_origins())484dfsan_mem_origin_transfer(dst, src, size);485}486487// Releases the pages within the origin address range.488static void ReleaseOrigins(void *addr, uptr size) {489const uptr beg_origin_addr = (uptr)__dfsan::origin_for(addr);490const void *end_addr = (void *)((uptr)addr + size);491const uptr end_origin_addr = (uptr)__dfsan::origin_for(end_addr);492493if (end_origin_addr - beg_origin_addr <494common_flags()->clear_shadow_mmap_threshold)495return;496497const uptr page_size = GetPageSizeCached();498const uptr beg_aligned = RoundUpTo(beg_origin_addr, page_size);499const uptr end_aligned = RoundDownTo(end_origin_addr, page_size);500501if (!MmapFixedSuperNoReserve(beg_aligned, end_aligned - beg_aligned))502Die();503}504505static void WriteZeroShadowInRange(uptr beg, uptr end) {506// Don't write the label if it is already the value we need it to be.507// In a program where most addresses are not labeled, it is common that508// a page of shadow memory is entirely zeroed. The Linux copy-on-write509// implementation will share all of the zeroed pages, making a copy of a510// page when any value is written. The un-sharing will happen even if511// the value written does not change the value in memory. Avoiding the512// write when both |label| and |*labelp| are zero dramatically reduces513// the amount of real memory used by large programs.514if (!mem_is_zero((const char *)beg, end - beg))515internal_memset((void *)beg, 0, end - beg);516}517518// Releases the pages within the shadow address range, and sets519// the shadow addresses not on the pages to be 0.520static void ReleaseOrClearShadows(void *addr, uptr size) {521const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);522const void *end_addr = (void *)((uptr)addr + size);523const uptr end_shadow_addr = (uptr)__dfsan::shadow_for(end_addr);524525if (end_shadow_addr - beg_shadow_addr <526common_flags()->clear_shadow_mmap_threshold) {527WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr);528return;529}530531const uptr page_size = GetPageSizeCached();532const uptr beg_aligned = RoundUpTo(beg_shadow_addr, page_size);533const uptr end_aligned = RoundDownTo(end_shadow_addr, page_size);534535if (beg_aligned >= end_aligned) {536WriteZeroShadowInRange(beg_shadow_addr, end_shadow_addr);537} else {538if (beg_aligned != beg_shadow_addr)539WriteZeroShadowInRange(beg_shadow_addr, beg_aligned);540if (end_aligned != end_shadow_addr)541WriteZeroShadowInRange(end_aligned, end_shadow_addr);542if (!MmapFixedSuperNoReserve(beg_aligned, end_aligned - beg_aligned))543Die();544}545}546547void SetShadow(dfsan_label label, void *addr, uptr size, dfsan_origin origin) {548if (0 != label) {549const uptr beg_shadow_addr = (uptr)__dfsan::shadow_for(addr);550internal_memset((void *)beg_shadow_addr, label, size);551if (dfsan_get_track_origins())552SetOrigin(addr, size, origin);553return;554}555556if (dfsan_get_track_origins())557ReleaseOrigins(addr, size);558559ReleaseOrClearShadows(addr, size);560}561562} // namespace __dfsan563564// If the label s is tainted, set the size bytes from the address p to be a new565// origin chain with the previous ID o and the current stack trace. This is566// used by instrumentation to reduce code size when too much code is inserted.567extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_maybe_store_origin(568dfsan_label s, void *p, uptr size, dfsan_origin o) {569if (UNLIKELY(s)) {570GET_CALLER_PC_BP;571GET_STORE_STACK_TRACE_PC_BP(pc, bp);572SetOrigin(p, size, ChainOrigin(o, &stack));573}574}575576extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_set_label(577dfsan_label label, dfsan_origin origin, void *addr, uptr size) {578__dfsan::SetShadow(label, addr, size, origin);579}580581SANITIZER_INTERFACE_ATTRIBUTE582void dfsan_set_label(dfsan_label label, void *addr, uptr size) {583dfsan_origin init_origin = 0;584if (label && dfsan_get_track_origins()) {585GET_CALLER_PC_BP;586GET_STORE_STACK_TRACE_PC_BP(pc, bp);587init_origin = ChainOrigin(0, &stack, true);588}589__dfsan::SetShadow(label, addr, size, init_origin);590}591592SANITIZER_INTERFACE_ATTRIBUTE593void dfsan_add_label(dfsan_label label, void *addr, uptr size) {594if (0 == label)595return;596597if (dfsan_get_track_origins()) {598GET_CALLER_PC_BP;599GET_STORE_STACK_TRACE_PC_BP(pc, bp);600dfsan_origin init_origin = ChainOrigin(0, &stack, true);601SetOrigin(addr, size, init_origin);602}603604for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)605*labelp |= label;606}607608// Unlike the other dfsan interface functions the behavior of this function609// depends on the label of one of its arguments. Hence it is implemented as a610// custom function.611extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label612__dfsw_dfsan_get_label(long data, dfsan_label data_label,613dfsan_label *ret_label) {614*ret_label = 0;615return data_label;616}617618extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label __dfso_dfsan_get_label(619long data, dfsan_label data_label, dfsan_label *ret_label,620dfsan_origin data_origin, dfsan_origin *ret_origin) {621*ret_label = 0;622*ret_origin = 0;623return data_label;624}625626// This function is used if dfsan_get_origin is called when origin tracking is627// off.628extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfsw_dfsan_get_origin(629long data, dfsan_label data_label, dfsan_label *ret_label) {630*ret_label = 0;631return 0;632}633634extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfso_dfsan_get_origin(635long data, dfsan_label data_label, dfsan_label *ret_label,636dfsan_origin data_origin, dfsan_origin *ret_origin) {637*ret_label = 0;638*ret_origin = 0;639return data_origin;640}641642SANITIZER_INTERFACE_ATTRIBUTE dfsan_label643dfsan_read_label(const void *addr, uptr size) {644if (size == 0)645return 0;646return __dfsan_union_load(shadow_for(addr), size);647}648649SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin650dfsan_read_origin_of_first_taint(const void *addr, uptr size) {651return GetOriginIfTainted((uptr)addr, size);652}653654SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_label_origin(dfsan_label label,655dfsan_origin origin,656void *addr,657uptr size) {658__dfsan_set_label(label, origin, addr, size);659}660661extern "C" SANITIZER_INTERFACE_ATTRIBUTE int662dfsan_has_label(dfsan_label label, dfsan_label elem) {663return (label & elem) == elem;664}665666namespace __dfsan {667668typedef void (*dfsan_conditional_callback_t)(dfsan_label label,669dfsan_origin origin);670static dfsan_conditional_callback_t conditional_callback = nullptr;671static dfsan_label labels_in_signal_conditional = 0;672673static void ConditionalCallback(dfsan_label label, dfsan_origin origin) {674// Programs have many branches. For efficiency the conditional sink callback675// handler needs to ignore as many as possible as early as possible.676if (label == 0) {677return;678}679if (conditional_callback == nullptr) {680return;681}682683// This initial ConditionalCallback handler needs to be in here in dfsan684// runtime (rather than being an entirely user implemented hook) so that it685// has access to dfsan thread information.686DFsanThread *t = GetCurrentThread();687// A callback operation which does useful work (like record the flow) will688// likely be too long executed in a signal handler.689if (t && t->InSignalHandler()) {690// Record set of labels used in signal handler for completeness.691labels_in_signal_conditional |= label;692return;693}694695conditional_callback(label, origin);696}697698} // namespace __dfsan699700extern "C" SANITIZER_INTERFACE_ATTRIBUTE void701__dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) {702__dfsan::ConditionalCallback(label, origin);703}704705extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback(706dfsan_label label) {707__dfsan::ConditionalCallback(label, 0);708}709710extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback(711__dfsan::dfsan_conditional_callback_t callback) {712__dfsan::conditional_callback = callback;713}714715extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label716dfsan_get_labels_in_signal_conditional() {717return __dfsan::labels_in_signal_conditional;718}719720namespace __dfsan {721722typedef void (*dfsan_reaches_function_callback_t)(dfsan_label label,723dfsan_origin origin,724const char *file,725unsigned int line,726const char *function);727static dfsan_reaches_function_callback_t reaches_function_callback = nullptr;728static dfsan_label labels_in_signal_reaches_function = 0;729730static void ReachesFunctionCallback(dfsan_label label, dfsan_origin origin,731const char *file, unsigned int line,732const char *function) {733if (label == 0) {734return;735}736if (reaches_function_callback == nullptr) {737return;738}739740// This initial ReachesFunctionCallback handler needs to be in here in dfsan741// runtime (rather than being an entirely user implemented hook) so that it742// has access to dfsan thread information.743DFsanThread *t = GetCurrentThread();744// A callback operation which does useful work (like record the flow) will745// likely be too long executed in a signal handler.746if (t && t->InSignalHandler()) {747// Record set of labels used in signal handler for completeness.748labels_in_signal_reaches_function |= label;749return;750}751752reaches_function_callback(label, origin, file, line, function);753}754755} // namespace __dfsan756757extern "C" SANITIZER_INTERFACE_ATTRIBUTE void758__dfsan_reaches_function_callback_origin(dfsan_label label, dfsan_origin origin,759const char *file, unsigned int line,760const char *function) {761__dfsan::ReachesFunctionCallback(label, origin, file, line, function);762}763764extern "C" SANITIZER_INTERFACE_ATTRIBUTE void765__dfsan_reaches_function_callback(dfsan_label label, const char *file,766unsigned int line, const char *function) {767__dfsan::ReachesFunctionCallback(label, 0, file, line, function);768}769770extern "C" SANITIZER_INTERFACE_ATTRIBUTE void771dfsan_set_reaches_function_callback(772__dfsan::dfsan_reaches_function_callback_t callback) {773__dfsan::reaches_function_callback = callback;774}775776extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label777dfsan_get_labels_in_signal_reaches_function() {778return __dfsan::labels_in_signal_reaches_function;779}780781class Decorator : public __sanitizer::SanitizerCommonDecorator {782public:783Decorator() : SanitizerCommonDecorator() {}784const char *Origin() const { return Magenta(); }785};786787namespace {788789void PrintNoOriginTrackingWarning() {790Decorator d;791Printf(792" %sDFSan: origin tracking is not enabled. Did you specify the "793"-dfsan-track-origins=1 option?%s\n",794d.Warning(), d.Default());795}796797void PrintNoTaintWarning(const void *address) {798Decorator d;799Printf(" %sDFSan: no tainted value at %x%s\n", d.Warning(), address,800d.Default());801}802803void PrintInvalidOriginWarning(dfsan_label label, const void *address) {804Decorator d;805Printf(806" %sTaint value 0x%x (at %p) has invalid origin tracking. This can "807"be a DFSan bug.%s\n",808d.Warning(), label, address, d.Default());809}810811void PrintInvalidOriginIdWarning(dfsan_origin origin) {812Decorator d;813Printf(814" %sOrigin Id %d has invalid origin tracking. This can "815"be a DFSan bug.%s\n",816d.Warning(), origin, d.Default());817}818819bool PrintOriginTraceFramesToStr(Origin o, InternalScopedString *out) {820Decorator d;821bool found = false;822823while (o.isChainedOrigin()) {824StackTrace stack;825dfsan_origin origin_id = o.raw_id();826o = o.getNextChainedOrigin(&stack);827if (o.isChainedOrigin())828out->AppendF(829" %sOrigin value: 0x%x, Taint value was stored to memory at%s\n",830d.Origin(), origin_id, d.Default());831else832out->AppendF(" %sOrigin value: 0x%x, Taint value was created at%s\n",833d.Origin(), origin_id, d.Default());834835// Includes a trailing newline, so no need to add it again.836stack.PrintTo(out);837found = true;838}839840return found;841}842843bool PrintOriginTraceToStr(const void *addr, const char *description,844InternalScopedString *out) {845CHECK(out);846CHECK(dfsan_get_track_origins());847Decorator d;848849const dfsan_label label = *__dfsan::shadow_for(addr);850CHECK(label);851852const dfsan_origin origin = *__dfsan::origin_for(addr);853854out->AppendF(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n",855d.Origin(), label, addr, description ? description : "",856d.Default());857858Origin o = Origin::FromRawId(origin);859return PrintOriginTraceFramesToStr(o, out);860}861862} // namespace863864extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace(865const void *addr, const char *description) {866if (!dfsan_get_track_origins()) {867PrintNoOriginTrackingWarning();868return;869}870871const dfsan_label label = *__dfsan::shadow_for(addr);872if (!label) {873PrintNoTaintWarning(addr);874return;875}876877InternalScopedString trace;878bool success = PrintOriginTraceToStr(addr, description, &trace);879880if (trace.length())881Printf("%s", trace.data());882883if (!success)884PrintInvalidOriginWarning(label, addr);885}886887extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr888dfsan_sprint_origin_trace(const void *addr, const char *description,889char *out_buf, uptr out_buf_size) {890CHECK(out_buf);891892if (!dfsan_get_track_origins()) {893PrintNoOriginTrackingWarning();894return 0;895}896897const dfsan_label label = *__dfsan::shadow_for(addr);898if (!label) {899PrintNoTaintWarning(addr);900return 0;901}902903InternalScopedString trace;904bool success = PrintOriginTraceToStr(addr, description, &trace);905906if (!success) {907PrintInvalidOriginWarning(label, addr);908return 0;909}910911if (out_buf_size) {912internal_strncpy(out_buf, trace.data(), out_buf_size - 1);913out_buf[out_buf_size - 1] = '\0';914}915916return trace.length();917}918919extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_id_trace(920dfsan_origin origin) {921if (!dfsan_get_track_origins()) {922PrintNoOriginTrackingWarning();923return;924}925Origin o = Origin::FromRawId(origin);926927InternalScopedString trace;928bool success = PrintOriginTraceFramesToStr(o, &trace);929930if (trace.length())931Printf("%s", trace.data());932933if (!success)934PrintInvalidOriginIdWarning(origin);935}936937extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr dfsan_sprint_origin_id_trace(938dfsan_origin origin, char *out_buf, uptr out_buf_size) {939CHECK(out_buf);940941if (!dfsan_get_track_origins()) {942PrintNoOriginTrackingWarning();943return 0;944}945Origin o = Origin::FromRawId(origin);946947InternalScopedString trace;948bool success = PrintOriginTraceFramesToStr(o, &trace);949950if (!success) {951PrintInvalidOriginIdWarning(origin);952return 0;953}954955if (out_buf_size) {956internal_strncpy(out_buf, trace.data(), out_buf_size - 1);957out_buf[out_buf_size - 1] = '\0';958}959960return trace.length();961}962963extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin964dfsan_get_init_origin(const void *addr) {965if (!dfsan_get_track_origins())966return 0;967968const dfsan_label label = *__dfsan::shadow_for(addr);969if (!label)970return 0;971972const dfsan_origin origin = *__dfsan::origin_for(addr);973974Origin o = Origin::FromRawId(origin);975dfsan_origin origin_id = o.raw_id();976while (o.isChainedOrigin()) {977StackTrace stack;978origin_id = o.raw_id();979o = o.getNextChainedOrigin(&stack);980}981return origin_id;982}983984void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,985void *context,986bool request_fast,987u32 max_depth) {988using namespace __dfsan;989DFsanThread *t = GetCurrentThread();990if (!t || !StackTrace::WillUseFastUnwind(request_fast)) {991return Unwind(max_depth, pc, bp, context, 0, 0, false);992}993Unwind(max_depth, pc, bp, nullptr, t->stack_top(), t->stack_bottom(), true);994}995996extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_print_stack_trace() {997GET_CALLER_PC_BP;998GET_STORE_STACK_TRACE_PC_BP(pc, bp);999stack.Print();1000}10011002extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr1003dfsan_sprint_stack_trace(char *out_buf, uptr out_buf_size) {1004CHECK(out_buf);1005GET_CALLER_PC_BP;1006GET_STORE_STACK_TRACE_PC_BP(pc, bp);1007return stack.PrintTo(out_buf, out_buf_size);1008}10091010void Flags::SetDefaults() {1011#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;1012#include "dfsan_flags.inc"1013#undef DFSAN_FLAG1014}10151016static void RegisterDfsanFlags(FlagParser *parser, Flags *f) {1017#define DFSAN_FLAG(Type, Name, DefaultValue, Description) \1018RegisterFlag(parser, #Name, Description, &f->Name);1019#include "dfsan_flags.inc"1020#undef DFSAN_FLAG1021}10221023static void InitializeFlags() {1024SetCommonFlagsDefaults();1025{1026CommonFlags cf;1027cf.CopyFrom(*common_flags());1028cf.intercept_tls_get_addr = true;1029OverrideCommonFlags(cf);1030}1031flags().SetDefaults();10321033FlagParser parser;1034RegisterCommonFlags(&parser);1035RegisterDfsanFlags(&parser, &flags());1036parser.ParseStringFromEnv("DFSAN_OPTIONS");1037InitializeCommonFlags();1038if (Verbosity()) ReportUnrecognizedFlags();1039if (common_flags()->help) parser.PrintFlagDescriptions();1040}10411042SANITIZER_INTERFACE_ATTRIBUTE1043void dfsan_clear_arg_tls(uptr offset, uptr size) {1044internal_memset((void *)((uptr)__dfsan_arg_tls + offset), 0, size);1045}10461047SANITIZER_INTERFACE_ATTRIBUTE1048void dfsan_clear_thread_local_state() {1049internal_memset(__dfsan_arg_tls, 0, sizeof(__dfsan_arg_tls));1050internal_memset(__dfsan_retval_tls, 0, sizeof(__dfsan_retval_tls));10511052if (dfsan_get_track_origins()) {1053internal_memset(__dfsan_arg_origin_tls, 0, sizeof(__dfsan_arg_origin_tls));1054internal_memset(&__dfsan_retval_origin_tls, 0,1055sizeof(__dfsan_retval_origin_tls));1056}1057}10581059SANITIZER_INTERFACE_ATTRIBUTE1060void dfsan_set_arg_tls(uptr offset, dfsan_label label) {1061// 2x to match ShadowTLSAlignment.1062// ShadowTLSAlignment should probably be changed.1063// TODO: Consider reducing ShadowTLSAlignment to 1.1064// Aligning to 2 bytes is probably a remnant of fast16 mode.1065((dfsan_label *)__dfsan_arg_tls)[offset * 2] = label;1066}10671068SANITIZER_INTERFACE_ATTRIBUTE1069void dfsan_set_arg_origin_tls(uptr offset, dfsan_origin o) {1070__dfsan_arg_origin_tls[offset] = o;1071}10721073extern "C" void dfsan_flush() {1074const uptr maxVirtualAddress = GetMaxUserVirtualAddress();1075for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {1076uptr start = kMemoryLayout[i].start;1077uptr end = kMemoryLayout[i].end;1078uptr size = end - start;1079MappingDesc::Type type = kMemoryLayout[i].type;10801081if (type != MappingDesc::SHADOW && type != MappingDesc::ORIGIN)1082continue;10831084// Check if the segment should be mapped based on platform constraints.1085if (start >= maxVirtualAddress)1086continue;10871088if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name)) {1089Printf("FATAL: DataFlowSanitizer: failed to clear memory region\n");1090Die();1091}1092}1093__dfsan::labels_in_signal_conditional = 0;1094__dfsan::labels_in_signal_reaches_function = 0;1095}10961097// TODO: CheckMemoryLayoutSanity is based on msan.1098// Consider refactoring these into a shared implementation.1099static void CheckMemoryLayoutSanity() {1100uptr prev_end = 0;1101for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {1102uptr start = kMemoryLayout[i].start;1103uptr end = kMemoryLayout[i].end;1104MappingDesc::Type type = kMemoryLayout[i].type;1105CHECK_LT(start, end);1106CHECK_EQ(prev_end, start);1107CHECK(addr_is_type(start, type));1108CHECK(addr_is_type((start + end) / 2, type));1109CHECK(addr_is_type(end - 1, type));1110if (type == MappingDesc::APP) {1111uptr addr = start;1112CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));1113CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));1114CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));11151116addr = (start + end) / 2;1117CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));1118CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));1119CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));11201121addr = end - 1;1122CHECK(MEM_IS_SHADOW(MEM_TO_SHADOW(addr)));1123CHECK(MEM_IS_ORIGIN(MEM_TO_ORIGIN(addr)));1124CHECK_EQ(MEM_TO_ORIGIN(addr), SHADOW_TO_ORIGIN(MEM_TO_SHADOW(addr)));1125}1126prev_end = end;1127}1128}11291130// TODO: CheckMemoryRangeAvailability is based on msan.1131// Consider refactoring these into a shared implementation.1132static bool CheckMemoryRangeAvailability(uptr beg, uptr size, bool verbose) {1133if (size > 0) {1134uptr end = beg + size - 1;1135if (!MemoryRangeIsAvailable(beg, end)) {1136if (verbose)1137Printf("FATAL: Memory range %p - %p is not available.\n", beg, end);1138return false;1139}1140}1141return true;1142}11431144// TODO: ProtectMemoryRange is based on msan.1145// Consider refactoring these into a shared implementation.1146static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {1147if (size > 0) {1148void *addr = MmapFixedNoAccess(beg, size, name);1149if (beg == 0 && addr) {1150// Depending on the kernel configuration, we may not be able to protect1151// the page at address zero.1152uptr gap = 16 * GetPageSizeCached();1153beg += gap;1154size -= gap;1155addr = MmapFixedNoAccess(beg, size, name);1156}1157if ((uptr)addr != beg) {1158uptr end = beg + size - 1;1159Printf("FATAL: Cannot protect memory range %p - %p (%s).\n", beg, end,1160name);1161return false;1162}1163}1164return true;1165}11661167// TODO: InitShadow is based on msan.1168// Consider refactoring these into a shared implementation.1169bool InitShadow(bool init_origins, bool dry_run) {1170// Let user know mapping parameters first.1171VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init);1172for (unsigned i = 0; i < kMemoryLayoutSize; ++i)1173VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start,1174kMemoryLayout[i].end - 1);11751176CheckMemoryLayoutSanity();11771178if (!MEM_IS_APP(&__dfsan::dfsan_init)) {1179if (!dry_run)1180Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",1181(uptr)&__dfsan::dfsan_init);1182return false;1183}11841185const uptr maxVirtualAddress = GetMaxUserVirtualAddress();11861187for (unsigned i = 0; i < kMemoryLayoutSize; ++i) {1188uptr start = kMemoryLayout[i].start;1189uptr end = kMemoryLayout[i].end;1190uptr size = end - start;1191MappingDesc::Type type = kMemoryLayout[i].type;11921193// Check if the segment should be mapped based on platform constraints.1194if (start >= maxVirtualAddress)1195continue;11961197bool map = type == MappingDesc::SHADOW ||1198(init_origins && type == MappingDesc::ORIGIN);1199bool protect = type == MappingDesc::INVALID ||1200(!init_origins && type == MappingDesc::ORIGIN);1201CHECK(!(map && protect));1202if (!map && !protect) {1203CHECK(type == MappingDesc::APP || type == MappingDesc::ALLOCATOR);12041205if (dry_run && type == MappingDesc::ALLOCATOR &&1206!CheckMemoryRangeAvailability(start, size, !dry_run))1207return false;1208}1209if (map) {1210if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))1211return false;1212if (!dry_run &&1213!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))1214return false;1215if (!dry_run && common_flags()->use_madv_dontdump)1216DontDumpShadowMemory(start, size);1217}1218if (protect) {1219if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))1220return false;1221if (!dry_run && !ProtectMemoryRange(start, size, kMemoryLayout[i].name))1222return false;1223}1224}12251226return true;1227}12281229bool InitShadowWithReExec(bool init_origins) {1230// Start with dry run: check layout is ok, but don't print warnings because1231// warning messages will cause tests to fail (even if we successfully re-exec1232// after the warning).1233bool success = InitShadow(init_origins, true);1234if (!success) {1235#if SANITIZER_LINUX1236// Perhaps ASLR entropy is too high. If ASLR is enabled, re-exec without it.1237int old_personality = personality(0xffffffff);1238bool aslr_on =1239(old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);12401241if (aslr_on) {1242VReport(1,1243"WARNING: DataflowSanitizer: memory layout is incompatible, "1244"possibly due to high-entropy ASLR.\n"1245"Re-execing with fixed virtual address space.\n"1246"N.B. reducing ASLR entropy is preferable.\n");1247CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);1248ReExec();1249}1250#endif1251}12521253// The earlier dry run didn't actually map or protect anything. Run again in1254// non-dry run mode.1255return success && InitShadow(init_origins, false);1256}12571258static void DFsanInit(int argc, char **argv, char **envp) {1259CHECK(!dfsan_init_is_running);1260if (dfsan_inited)1261return;1262dfsan_init_is_running = true;1263SanitizerToolName = "DataflowSanitizer";12641265AvoidCVE_2016_2143();12661267InitializeFlags();12681269CheckASLR();12701271if (!InitShadowWithReExec(dfsan_get_track_origins())) {1272Printf("FATAL: DataflowSanitizer can not mmap the shadow memory.\n");1273DumpProcessMap();1274Die();1275}12761277initialize_interceptors();12781279// Set up threads1280DFsanTSDInit(DFsanTSDDtor);12811282dfsan_allocator_init();12831284DFsanThread *main_thread = DFsanThread::Create(nullptr, nullptr);1285SetCurrentThread(main_thread);1286main_thread->Init();12871288dfsan_init_is_running = false;1289dfsan_inited = true;1290}12911292namespace __dfsan {12931294void dfsan_init() { DFsanInit(0, nullptr, nullptr); }12951296} // namespace __dfsan12971298#if SANITIZER_CAN_USE_PREINIT_ARRAY1299__attribute__((section(".preinit_array"),1300used)) static void (*dfsan_init_ptr)(int, char **,1301char **) = DFsanInit;1302#endif130313041305