Path: blob/main/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
35294 views
//===-- tsan_rtl_access.cpp -----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is a part of ThreadSanitizer (TSan), a race detector.9//10// Definitions of memory access and function entry/exit entry points.11//===----------------------------------------------------------------------===//1213#include "tsan_rtl.h"1415namespace __tsan {1617ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,18uptr addr, uptr size,19AccessType typ) {20DCHECK(size == 1 || size == 2 || size == 4 || size == 8);21if (!kCollectHistory)22return true;23EventAccess* ev;24if (UNLIKELY(!TraceAcquire(thr, &ev)))25return false;26u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;27uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));28thr->trace_prev_pc = pc;29if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {30ev->is_access = 1;31ev->is_read = !!(typ & kAccessRead);32ev->is_atomic = !!(typ & kAccessAtomic);33ev->size_log = size_log;34ev->pc_delta = pc_delta;35DCHECK_EQ(ev->pc_delta, pc_delta);36ev->addr = CompressAddr(addr);37TraceRelease(thr, ev);38return true;39}40auto* evex = reinterpret_cast<EventAccessExt*>(ev);41evex->is_access = 0;42evex->is_func = 0;43evex->type = EventType::kAccessExt;44evex->is_read = !!(typ & kAccessRead);45evex->is_atomic = !!(typ & kAccessAtomic);46evex->size_log = size_log;47// Note: this is important, see comment in EventAccessExt.48evex->_ = 0;49evex->addr = CompressAddr(addr);50evex->pc = pc;51TraceRelease(thr, evex);52return true;53}5455ALWAYS_INLINE56bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,57AccessType typ) {58if (!kCollectHistory)59return true;60EventAccessRange* ev;61if (UNLIKELY(!TraceAcquire(thr, &ev)))62return false;63thr->trace_prev_pc = pc;64ev->is_access = 0;65ev->is_func = 0;66ev->type = EventType::kAccessRange;67ev->is_read = !!(typ & kAccessRead);68ev->is_free = !!(typ & kAccessFree);69ev->size_lo = size;70ev->pc = CompressAddr(pc);71ev->addr = CompressAddr(addr);72ev->size_hi = size >> EventAccessRange::kSizeLoBits;73TraceRelease(thr, ev);74return true;75}7677void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,78AccessType typ) {79if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))80return;81TraceSwitchPart(thr);82UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);83DCHECK(res);84}8586void TraceFunc(ThreadState* thr, uptr pc) {87if (LIKELY(TryTraceFunc(thr, pc)))88return;89TraceSwitchPart(thr);90UNUSED bool res = TryTraceFunc(thr, pc);91DCHECK(res);92}9394NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {95TraceSwitchPart(thr);96FuncEntry(thr, pc);97}9899NOINLINE void TraceRestartFuncExit(ThreadState* thr) {100TraceSwitchPart(thr);101FuncExit(thr);102}103104void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,105StackID stk) {106DCHECK(type == EventType::kLock || type == EventType::kRLock);107if (!kCollectHistory)108return;109EventLock ev;110ev.is_access = 0;111ev.is_func = 0;112ev.type = type;113ev.pc = CompressAddr(pc);114ev.stack_lo = stk;115ev.stack_hi = stk >> EventLock::kStackIDLoBits;116ev._ = 0;117ev.addr = CompressAddr(addr);118TraceEvent(thr, ev);119}120121void TraceMutexUnlock(ThreadState* thr, uptr addr) {122if (!kCollectHistory)123return;124EventUnlock ev;125ev.is_access = 0;126ev.is_func = 0;127ev.type = EventType::kUnlock;128ev._ = 0;129ev.addr = CompressAddr(addr);130TraceEvent(thr, ev);131}132133void TraceTime(ThreadState* thr) {134if (!kCollectHistory)135return;136FastState fast_state = thr->fast_state;137EventTime ev;138ev.is_access = 0;139ev.is_func = 0;140ev.type = EventType::kTime;141ev.sid = static_cast<u64>(fast_state.sid());142ev.epoch = static_cast<u64>(fast_state.epoch());143ev._ = 0;144TraceEvent(thr, ev);145}146147NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,148Shadow old,149AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {150// For the free shadow markers the first element (that contains kFreeSid)151// triggers the race, but the second element contains info about the freeing152// thread, take it.153if (old.sid() == kFreeSid)154old = Shadow(LoadShadow(&shadow_mem[1]));155// This prevents trapping on this address in future.156for (uptr i = 0; i < kShadowCnt; i++)157StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);158// See the comment in MemoryRangeFreed as to why the slot is locked159// for free memory accesses. ReportRace must not be called with160// the slot locked because of the fork. But MemoryRangeFreed is not161// called during fork because fork sets ignore_reads_and_writes,162// so simply unlocking the slot should be fine.163if (typ & kAccessSlotLocked)164SlotUnlock(thr);165ReportRace(thr, shadow_mem, cur, Shadow(old), typ);166if (typ & kAccessSlotLocked)167SlotLock(thr);168}169170#if !TSAN_VECTORIZE171ALWAYS_INLINE172bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,173AccessType typ) {174for (uptr i = 0; i < kShadowCnt; i++) {175auto old = LoadShadow(&s[i]);176if (!(typ & kAccessRead)) {177if (old == cur.raw())178return true;179continue;180}181auto masked = static_cast<RawShadow>(static_cast<u32>(old) |182static_cast<u32>(Shadow::kRodata));183if (masked == cur.raw())184return true;185if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {186if (old == Shadow::kRodata)187return true;188}189}190return false;191}192193ALWAYS_INLINE194bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,195int unused0, int unused1, AccessType typ) {196bool stored = false;197for (uptr idx = 0; idx < kShadowCnt; idx++) {198RawShadow* sp = &shadow_mem[idx];199Shadow old(LoadShadow(sp));200if (LIKELY(old.raw() == Shadow::kEmpty)) {201if (!(typ & kAccessCheckOnly) && !stored)202StoreShadow(sp, cur.raw());203return false;204}205if (LIKELY(!(cur.access() & old.access())))206continue;207if (LIKELY(cur.sid() == old.sid())) {208if (!(typ & kAccessCheckOnly) &&209LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {210StoreShadow(sp, cur.raw());211stored = true;212}213continue;214}215if (LIKELY(old.IsBothReadsOrAtomic(typ)))216continue;217if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))218continue;219DoReportRace(thr, shadow_mem, cur, old, typ);220return true;221}222// We did not find any races and had already stored223// the current access info, so we are done.224if (LIKELY(stored))225return false;226// Choose a random candidate slot and replace it.227uptr index =228atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;229StoreShadow(&shadow_mem[index], cur.raw());230return false;231}232233# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0234235#else /* !TSAN_VECTORIZE */236237ALWAYS_INLINE238bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,239m128 access, AccessType typ) {240// Note: we could check if there is a larger access of the same type,241// e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)242// and now do smaller reads/writes, these can also be considered as "same243// access". However, it will make the check more expensive, so it's unclear244// if it's worth it. But this would conserve trace space, so it's useful245// besides potential speed up.246if (!(typ & kAccessRead)) {247const m128 same = _mm_cmpeq_epi32(shadow, access);248return _mm_movemask_epi8(same);249}250// For reads we need to reset read bit in the shadow,251// because we need to match read with both reads and writes.252// Shadow::kRodata has only read bit set, so it does what we want.253// We also abuse it for rodata check to save few cycles254// since we already loaded Shadow::kRodata into a register.255// Reads from rodata can't race.256// Measurements show that they can be 10-20% of all memory accesses.257// Shadow::kRodata has epoch 0 which cannot appear in shadow normally258// (thread epochs start from 1). So the same read bit mask259// serves as rodata indicator.260const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));261const m128 masked_shadow = _mm_or_si128(shadow, read_mask);262m128 same = _mm_cmpeq_epi32(masked_shadow, access);263// Range memory accesses check Shadow::kRodata before calling this,264// Shadow::kRodatas is not possible for free memory access265// and Go does not use Shadow::kRodata.266if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {267const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);268same = _mm_or_si128(ro, same);269}270return _mm_movemask_epi8(same);271}272273NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,274u32 race_mask, m128 shadow, AccessType typ) {275// race_mask points which of the shadow elements raced with the current276// access. Extract that element.277CHECK_NE(race_mask, 0);278u32 old;279// Note: _mm_extract_epi32 index must be a constant value.280switch (__builtin_ffs(race_mask) / 4) {281case 0:282old = _mm_extract_epi32(shadow, 0);283break;284case 1:285old = _mm_extract_epi32(shadow, 1);286break;287case 2:288old = _mm_extract_epi32(shadow, 2);289break;290case 3:291old = _mm_extract_epi32(shadow, 3);292break;293}294Shadow prev(static_cast<RawShadow>(old));295// For the free shadow markers the first element (that contains kFreeSid)296// triggers the race, but the second element contains info about the freeing297// thread, take it.298if (prev.sid() == kFreeSid)299prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));300DoReportRace(thr, shadow_mem, cur, prev, typ);301}302303ALWAYS_INLINE304bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,305m128 shadow, m128 access, AccessType typ) {306// Note: empty/zero slots don't intersect with any access.307const m128 zero = _mm_setzero_si128();308const m128 mask_access = _mm_set1_epi32(0x000000ff);309const m128 mask_sid = _mm_set1_epi32(0x0000ff00);310const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);311const m128 access_and = _mm_and_si128(access, shadow);312const m128 access_xor = _mm_xor_si128(access, shadow);313const m128 intersect = _mm_and_si128(access_and, mask_access);314const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);315const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);316const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);317const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);318const m128 no_race =319_mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);320const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));321if (UNLIKELY(race_mask))322goto SHARED;323324STORE : {325if (typ & kAccessCheckOnly)326return false;327// We could also replace different sid's if access is the same,328// rw weaker and happens before. However, just checking access below329// is not enough because we also need to check that !both_read_or_atomic330// (reads from different sids can be concurrent).331// Theoretically we could replace smaller accesses with larger accesses,332// but it's unclear if it's worth doing.333const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);334const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);335const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);336const m128 access_read_atomic =337_mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);338const m128 rw_weaker =339_mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);340const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);341const int rewrite_mask = _mm_movemask_epi8(rewrite);342int index = __builtin_ffs(rewrite_mask);343if (UNLIKELY(index == 0)) {344const m128 empty = _mm_cmpeq_epi32(shadow, zero);345const int empty_mask = _mm_movemask_epi8(empty);346index = __builtin_ffs(empty_mask);347if (UNLIKELY(index == 0))348index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;349}350StoreShadow(&shadow_mem[index / 4], cur.raw());351// We could zero other slots determined by rewrite_mask.352// That would help other threads to evict better slots,353// but it's unclear if it's worth it.354return false;355}356357SHARED:358m128 thread_epochs = _mm_set1_epi32(0x7fffffff);359// Need to unwind this because _mm_extract_epi8/_mm_insert_epi32360// indexes must be constants.361# define LOAD_EPOCH(idx) \362if (LIKELY(race_mask & (1 << (idx * 4)))) { \363u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \364u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \365thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \366}367LOAD_EPOCH(0);368LOAD_EPOCH(1);369LOAD_EPOCH(2);370LOAD_EPOCH(3);371# undef LOAD_EPOCH372const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);373const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);374const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);375const int concurrent_mask = _mm_movemask_epi8(concurrent);376if (LIKELY(concurrent_mask == 0))377goto STORE;378379DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);380return true;381}382383# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \384const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \385const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))386#endif387388char* DumpShadow(char* buf, RawShadow raw) {389if (raw == Shadow::kEmpty) {390internal_snprintf(buf, 64, "0");391return buf;392}393Shadow s(raw);394AccessType typ;395s.GetAccess(nullptr, nullptr, &typ);396internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",397static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),398s.access(), static_cast<u32>(typ));399return buf;400}401402// TryTrace* and TraceRestart* functions allow to turn memory access and func403// entry/exit callbacks into leaf functions with all associated performance404// benefits. These hottest callbacks do only 2 slow path calls: report a race405// and trace part switching. Race reporting is easy to turn into a tail call, we406// just always return from the runtime after reporting a race. But trace part407// switching is harder because it needs to be in the middle of callbacks. To408// turn it into a tail call we immidiately return after TraceRestart* functions,409// but TraceRestart* functions themselves recurse into the callback after410// switching trace part. As the result the hottest callbacks contain only tail411// calls, which effectively makes them leaf functions (can use all registers,412// no frame setup, etc).413NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,414uptr size, AccessType typ) {415TraceSwitchPart(thr);416MemoryAccess(thr, pc, addr, size, typ);417}418419ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,420uptr size, AccessType typ) {421RawShadow* shadow_mem = MemToShadow(addr);422UNUSED char memBuf[4][64];423DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,424static_cast<int>(thr->fast_state.sid()),425static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,426static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),427DumpShadow(memBuf[1], shadow_mem[1]),428DumpShadow(memBuf[2], shadow_mem[2]),429DumpShadow(memBuf[3], shadow_mem[3]));430431FastState fast_state = thr->fast_state;432Shadow cur(fast_state, addr, size, typ);433434LOAD_CURRENT_SHADOW(cur, shadow_mem);435if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))436return;437if (UNLIKELY(fast_state.GetIgnoreBit()))438return;439if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))440return TraceRestartMemoryAccess(thr, pc, addr, size, typ);441CheckRaces(thr, shadow_mem, cur, shadow, access, typ);442}443444void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);445446NOINLINE447void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,448AccessType typ) {449TraceSwitchPart(thr);450MemoryAccess16(thr, pc, addr, typ);451}452453ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,454AccessType typ) {455const uptr size = 16;456FastState fast_state = thr->fast_state;457if (UNLIKELY(fast_state.GetIgnoreBit()))458return;459Shadow cur(fast_state, 0, 8, typ);460RawShadow* shadow_mem = MemToShadow(addr);461bool traced = false;462{463LOAD_CURRENT_SHADOW(cur, shadow_mem);464if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))465goto SECOND;466if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))467return RestartMemoryAccess16(thr, pc, addr, typ);468traced = true;469if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))470return;471}472SECOND:473shadow_mem += kShadowCnt;474LOAD_CURRENT_SHADOW(cur, shadow_mem);475if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))476return;477if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))478return RestartMemoryAccess16(thr, pc, addr, typ);479CheckRaces(thr, shadow_mem, cur, shadow, access, typ);480}481482NOINLINE483void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,484uptr size, AccessType typ) {485TraceSwitchPart(thr);486UnalignedMemoryAccess(thr, pc, addr, size, typ);487}488489ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,490uptr addr, uptr size,491AccessType typ) {492DCHECK_LE(size, 8);493FastState fast_state = thr->fast_state;494if (UNLIKELY(fast_state.GetIgnoreBit()))495return;496RawShadow* shadow_mem = MemToShadow(addr);497bool traced = false;498uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);499{500Shadow cur(fast_state, addr, size1, typ);501LOAD_CURRENT_SHADOW(cur, shadow_mem);502if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))503goto SECOND;504if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))505return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);506traced = true;507if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))508return;509}510SECOND:511uptr size2 = size - size1;512if (LIKELY(size2 == 0))513return;514shadow_mem += kShadowCnt;515Shadow cur(fast_state, 0, size2, typ);516LOAD_CURRENT_SHADOW(cur, shadow_mem);517if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))518return;519if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))520return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);521CheckRaces(thr, shadow_mem, cur, shadow, access, typ);522}523524void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {525DCHECK_LE(p, end);526DCHECK(IsShadowMem(p));527DCHECK(IsShadowMem(end));528UNUSED const uptr kAlign = kShadowCnt * kShadowSize;529DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);530DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);531#if !TSAN_VECTORIZE532for (; p < end; p += kShadowCnt) {533p[0] = v;534for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;535}536#else537m128 vv = _mm_setr_epi32(538static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),539static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));540m128* vp = reinterpret_cast<m128*>(p);541m128* vend = reinterpret_cast<m128*>(end);542for (; vp < vend; vp++) _mm_store_si128(vp, vv);543#endif544}545546static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {547if (size == 0)548return;549DCHECK_EQ(addr % kShadowCell, 0);550DCHECK_EQ(size % kShadowCell, 0);551// If a user passes some insane arguments (memset(0)),552// let it just crash as usual.553if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))554return;555RawShadow* begin = MemToShadow(addr);556RawShadow* end = begin + size / kShadowCell * kShadowCnt;557// Don't want to touch lots of shadow memory.558// If a program maps 10MB stack, there is no need reset the whole range.559// UnmapOrDie/MmapFixedNoReserve does not work on Windows.560if (SANITIZER_WINDOWS ||561size <= common_flags()->clear_shadow_mmap_threshold) {562ShadowSet(begin, end, val);563return;564}565// The region is big, reset only beginning and end.566const uptr kPageSize = GetPageSizeCached();567// Set at least first kPageSize/2 to page boundary.568RawShadow* mid1 =569Min(end, reinterpret_cast<RawShadow*>(RoundUp(570reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));571ShadowSet(begin, mid1, val);572// Reset middle part.573RawShadow* mid2 = RoundDown(end, kPageSize);574if (mid2 > mid1) {575if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))576Die();577}578// Set the ending.579ShadowSet(mid2, end, val);580}581582void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {583uptr addr1 = RoundDown(addr, kShadowCell);584uptr size1 = RoundUp(size + addr - addr1, kShadowCell);585MemoryRangeSet(addr1, size1, Shadow::kEmpty);586}587588void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {589// Callers must lock the slot to ensure synchronization with the reset.590// The problem with "freed" memory is that it's not "monotonic"591// with respect to bug detection: freed memory is bad to access,592// but then if the heap block is reallocated later, it's good to access.593// As the result a garbage "freed" shadow can lead to a false positive594// if it happens to match a real free in the thread trace,595// but the heap block was reallocated before the current memory access,596// so it's still good to access. It's not the case with data races.597DCHECK(thr->slot_locked);598DCHECK_EQ(addr % kShadowCell, 0);599size = RoundUp(size, kShadowCell);600// Processing more than 1k (2k of shadow) is expensive,601// can cause excessive memory consumption (user does not necessary touch602// the whole range) and most likely unnecessary.603size = Min<uptr>(size, 1024);604const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |605kAccessCheckOnly | kAccessNoRodata;606TraceMemoryAccessRange(thr, pc, addr, size, typ);607RawShadow* shadow_mem = MemToShadow(addr);608Shadow cur(thr->fast_state, 0, kShadowCell, typ);609#if TSAN_VECTORIZE610const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));611const m128 freed = _mm_setr_epi32(612static_cast<u32>(Shadow::FreedMarker()),613static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);614for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {615const m128 shadow = _mm_load_si128((m128*)shadow_mem);616if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))617return;618_mm_store_si128((m128*)shadow_mem, freed);619}620#else621for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {622if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))623return;624StoreShadow(&shadow_mem[0], Shadow::FreedMarker());625StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));626StoreShadow(&shadow_mem[2], Shadow::kEmpty);627StoreShadow(&shadow_mem[3], Shadow::kEmpty);628}629#endif630}631632void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {633DCHECK_EQ(addr % kShadowCell, 0);634size = RoundUp(size, kShadowCell);635TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);636Shadow cur(thr->fast_state, 0, 8, kAccessWrite);637MemoryRangeSet(addr, size, cur.raw());638}639640void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,641uptr size) {642if (thr->ignore_reads_and_writes == 0)643MemoryRangeImitateWrite(thr, pc, addr, size);644else645MemoryResetRange(thr, pc, addr, size);646}647648ALWAYS_INLINE649bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,650AccessType typ) {651LOAD_CURRENT_SHADOW(cur, shadow_mem);652if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))653return false;654return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);655}656657template <bool is_read>658NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,659uptr size) {660TraceSwitchPart(thr);661MemoryAccessRangeT<is_read>(thr, pc, addr, size);662}663664template <bool is_read>665void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {666const AccessType typ =667(is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;668RawShadow* shadow_mem = MemToShadow(addr);669DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,670(void*)pc, (void*)addr, (int)size, is_read);671672#if SANITIZER_DEBUG673if (!IsAppMem(addr)) {674Printf("Access to non app mem start: %p\n", (void*)addr);675DCHECK(IsAppMem(addr));676}677if (!IsAppMem(addr + size - 1)) {678Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));679DCHECK(IsAppMem(addr + size - 1));680}681if (!IsShadowMem(shadow_mem)) {682Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);683DCHECK(IsShadowMem(shadow_mem));684}685686RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(687reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);688if (!IsShadowMem(shadow_mem_end)) {689Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,690(void*)(addr + size - 1));691Printf(692"Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "693"%zx\n",694shadow_mem, (void*)addr, size, kShadowMultiplier);695DCHECK(IsShadowMem(shadow_mem_end));696}697#endif698699// Access to .rodata section, no races here.700// Measurements show that it can be 10-20% of all memory accesses.701// Check here once to not check for every access separately.702// Note: we could (and should) do this only for the is_read case703// (writes shouldn't go to .rodata). But it happens in Chromium tests:704// https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19705// Details are unknown since it happens only on CI machines.706if (*shadow_mem == Shadow::kRodata)707return;708709FastState fast_state = thr->fast_state;710if (UNLIKELY(fast_state.GetIgnoreBit()))711return;712713if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))714return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);715716if (UNLIKELY(addr % kShadowCell)) {717// Handle unaligned beginning, if any.718uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);719size -= size1;720Shadow cur(fast_state, addr, size1, typ);721if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))722return;723shadow_mem += kShadowCnt;724}725// Handle middle part, if any.726Shadow cur(fast_state, 0, kShadowCell, typ);727for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {728if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))729return;730}731// Handle ending, if any.732if (UNLIKELY(size)) {733Shadow cur(fast_state, 0, size, typ);734if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))735return;736}737}738739template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,740uptr size);741template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,742uptr size);743744} // namespace __tsan745746#if !SANITIZER_GO747// Must be included in this file to make sure everything is inlined.748# include "tsan_interface.inc"749#endif750751752