Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
35294 views
1
//===-- tsan_rtl_access.cpp -----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is a part of ThreadSanitizer (TSan), a race detector.
10
//
11
// Definitions of memory access and function entry/exit entry points.
12
//===----------------------------------------------------------------------===//
13
14
#include "tsan_rtl.h"
15
16
namespace __tsan {
17
18
ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19
uptr addr, uptr size,
20
AccessType typ) {
21
DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
22
if (!kCollectHistory)
23
return true;
24
EventAccess* ev;
25
if (UNLIKELY(!TraceAcquire(thr, &ev)))
26
return false;
27
u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
28
uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
29
thr->trace_prev_pc = pc;
30
if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
31
ev->is_access = 1;
32
ev->is_read = !!(typ & kAccessRead);
33
ev->is_atomic = !!(typ & kAccessAtomic);
34
ev->size_log = size_log;
35
ev->pc_delta = pc_delta;
36
DCHECK_EQ(ev->pc_delta, pc_delta);
37
ev->addr = CompressAddr(addr);
38
TraceRelease(thr, ev);
39
return true;
40
}
41
auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42
evex->is_access = 0;
43
evex->is_func = 0;
44
evex->type = EventType::kAccessExt;
45
evex->is_read = !!(typ & kAccessRead);
46
evex->is_atomic = !!(typ & kAccessAtomic);
47
evex->size_log = size_log;
48
// Note: this is important, see comment in EventAccessExt.
49
evex->_ = 0;
50
evex->addr = CompressAddr(addr);
51
evex->pc = pc;
52
TraceRelease(thr, evex);
53
return true;
54
}
55
56
ALWAYS_INLINE
57
bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58
AccessType typ) {
59
if (!kCollectHistory)
60
return true;
61
EventAccessRange* ev;
62
if (UNLIKELY(!TraceAcquire(thr, &ev)))
63
return false;
64
thr->trace_prev_pc = pc;
65
ev->is_access = 0;
66
ev->is_func = 0;
67
ev->type = EventType::kAccessRange;
68
ev->is_read = !!(typ & kAccessRead);
69
ev->is_free = !!(typ & kAccessFree);
70
ev->size_lo = size;
71
ev->pc = CompressAddr(pc);
72
ev->addr = CompressAddr(addr);
73
ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74
TraceRelease(thr, ev);
75
return true;
76
}
77
78
void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79
AccessType typ) {
80
if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81
return;
82
TraceSwitchPart(thr);
83
UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84
DCHECK(res);
85
}
86
87
void TraceFunc(ThreadState* thr, uptr pc) {
88
if (LIKELY(TryTraceFunc(thr, pc)))
89
return;
90
TraceSwitchPart(thr);
91
UNUSED bool res = TryTraceFunc(thr, pc);
92
DCHECK(res);
93
}
94
95
NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
96
TraceSwitchPart(thr);
97
FuncEntry(thr, pc);
98
}
99
100
NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101
TraceSwitchPart(thr);
102
FuncExit(thr);
103
}
104
105
void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106
StackID stk) {
107
DCHECK(type == EventType::kLock || type == EventType::kRLock);
108
if (!kCollectHistory)
109
return;
110
EventLock ev;
111
ev.is_access = 0;
112
ev.is_func = 0;
113
ev.type = type;
114
ev.pc = CompressAddr(pc);
115
ev.stack_lo = stk;
116
ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117
ev._ = 0;
118
ev.addr = CompressAddr(addr);
119
TraceEvent(thr, ev);
120
}
121
122
void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123
if (!kCollectHistory)
124
return;
125
EventUnlock ev;
126
ev.is_access = 0;
127
ev.is_func = 0;
128
ev.type = EventType::kUnlock;
129
ev._ = 0;
130
ev.addr = CompressAddr(addr);
131
TraceEvent(thr, ev);
132
}
133
134
void TraceTime(ThreadState* thr) {
135
if (!kCollectHistory)
136
return;
137
FastState fast_state = thr->fast_state;
138
EventTime ev;
139
ev.is_access = 0;
140
ev.is_func = 0;
141
ev.type = EventType::kTime;
142
ev.sid = static_cast<u64>(fast_state.sid());
143
ev.epoch = static_cast<u64>(fast_state.epoch());
144
ev._ = 0;
145
TraceEvent(thr, ev);
146
}
147
148
NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149
Shadow old,
150
AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151
// For the free shadow markers the first element (that contains kFreeSid)
152
// triggers the race, but the second element contains info about the freeing
153
// thread, take it.
154
if (old.sid() == kFreeSid)
155
old = Shadow(LoadShadow(&shadow_mem[1]));
156
// This prevents trapping on this address in future.
157
for (uptr i = 0; i < kShadowCnt; i++)
158
StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
159
// See the comment in MemoryRangeFreed as to why the slot is locked
160
// for free memory accesses. ReportRace must not be called with
161
// the slot locked because of the fork. But MemoryRangeFreed is not
162
// called during fork because fork sets ignore_reads_and_writes,
163
// so simply unlocking the slot should be fine.
164
if (typ & kAccessSlotLocked)
165
SlotUnlock(thr);
166
ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
167
if (typ & kAccessSlotLocked)
168
SlotLock(thr);
169
}
170
171
#if !TSAN_VECTORIZE
172
ALWAYS_INLINE
173
bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174
AccessType typ) {
175
for (uptr i = 0; i < kShadowCnt; i++) {
176
auto old = LoadShadow(&s[i]);
177
if (!(typ & kAccessRead)) {
178
if (old == cur.raw())
179
return true;
180
continue;
181
}
182
auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
183
static_cast<u32>(Shadow::kRodata));
184
if (masked == cur.raw())
185
return true;
186
if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187
if (old == Shadow::kRodata)
188
return true;
189
}
190
}
191
return false;
192
}
193
194
ALWAYS_INLINE
195
bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196
int unused0, int unused1, AccessType typ) {
197
bool stored = false;
198
for (uptr idx = 0; idx < kShadowCnt; idx++) {
199
RawShadow* sp = &shadow_mem[idx];
200
Shadow old(LoadShadow(sp));
201
if (LIKELY(old.raw() == Shadow::kEmpty)) {
202
if (!(typ & kAccessCheckOnly) && !stored)
203
StoreShadow(sp, cur.raw());
204
return false;
205
}
206
if (LIKELY(!(cur.access() & old.access())))
207
continue;
208
if (LIKELY(cur.sid() == old.sid())) {
209
if (!(typ & kAccessCheckOnly) &&
210
LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211
StoreShadow(sp, cur.raw());
212
stored = true;
213
}
214
continue;
215
}
216
if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217
continue;
218
if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219
continue;
220
DoReportRace(thr, shadow_mem, cur, old, typ);
221
return true;
222
}
223
// We did not find any races and had already stored
224
// the current access info, so we are done.
225
if (LIKELY(stored))
226
return false;
227
// Choose a random candidate slot and replace it.
228
uptr index =
229
atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230
StoreShadow(&shadow_mem[index], cur.raw());
231
return false;
232
}
233
234
# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
235
236
#else /* !TSAN_VECTORIZE */
237
238
ALWAYS_INLINE
239
bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240
m128 access, AccessType typ) {
241
// Note: we could check if there is a larger access of the same type,
242
// e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243
// and now do smaller reads/writes, these can also be considered as "same
244
// access". However, it will make the check more expensive, so it's unclear
245
// if it's worth it. But this would conserve trace space, so it's useful
246
// besides potential speed up.
247
if (!(typ & kAccessRead)) {
248
const m128 same = _mm_cmpeq_epi32(shadow, access);
249
return _mm_movemask_epi8(same);
250
}
251
// For reads we need to reset read bit in the shadow,
252
// because we need to match read with both reads and writes.
253
// Shadow::kRodata has only read bit set, so it does what we want.
254
// We also abuse it for rodata check to save few cycles
255
// since we already loaded Shadow::kRodata into a register.
256
// Reads from rodata can't race.
257
// Measurements show that they can be 10-20% of all memory accesses.
258
// Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259
// (thread epochs start from 1). So the same read bit mask
260
// serves as rodata indicator.
261
const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
262
const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
263
m128 same = _mm_cmpeq_epi32(masked_shadow, access);
264
// Range memory accesses check Shadow::kRodata before calling this,
265
// Shadow::kRodatas is not possible for free memory access
266
// and Go does not use Shadow::kRodata.
267
if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268
const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
269
same = _mm_or_si128(ro, same);
270
}
271
return _mm_movemask_epi8(same);
272
}
273
274
NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275
u32 race_mask, m128 shadow, AccessType typ) {
276
// race_mask points which of the shadow elements raced with the current
277
// access. Extract that element.
278
CHECK_NE(race_mask, 0);
279
u32 old;
280
// Note: _mm_extract_epi32 index must be a constant value.
281
switch (__builtin_ffs(race_mask) / 4) {
282
case 0:
283
old = _mm_extract_epi32(shadow, 0);
284
break;
285
case 1:
286
old = _mm_extract_epi32(shadow, 1);
287
break;
288
case 2:
289
old = _mm_extract_epi32(shadow, 2);
290
break;
291
case 3:
292
old = _mm_extract_epi32(shadow, 3);
293
break;
294
}
295
Shadow prev(static_cast<RawShadow>(old));
296
// For the free shadow markers the first element (that contains kFreeSid)
297
// triggers the race, but the second element contains info about the freeing
298
// thread, take it.
299
if (prev.sid() == kFreeSid)
300
prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
301
DoReportRace(thr, shadow_mem, cur, prev, typ);
302
}
303
304
ALWAYS_INLINE
305
bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306
m128 shadow, m128 access, AccessType typ) {
307
// Note: empty/zero slots don't intersect with any access.
308
const m128 zero = _mm_setzero_si128();
309
const m128 mask_access = _mm_set1_epi32(0x000000ff);
310
const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
311
const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
312
const m128 access_and = _mm_and_si128(access, shadow);
313
const m128 access_xor = _mm_xor_si128(access, shadow);
314
const m128 intersect = _mm_and_si128(access_and, mask_access);
315
const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
316
const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
317
const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
318
const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
319
const m128 no_race =
320
_mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
321
const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
322
if (UNLIKELY(race_mask))
323
goto SHARED;
324
325
STORE : {
326
if (typ & kAccessCheckOnly)
327
return false;
328
// We could also replace different sid's if access is the same,
329
// rw weaker and happens before. However, just checking access below
330
// is not enough because we also need to check that !both_read_or_atomic
331
// (reads from different sids can be concurrent).
332
// Theoretically we could replace smaller accesses with larger accesses,
333
// but it's unclear if it's worth doing.
334
const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
335
const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
336
const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
337
const m128 access_read_atomic =
338
_mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
339
const m128 rw_weaker =
340
_mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
341
const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
342
const int rewrite_mask = _mm_movemask_epi8(rewrite);
343
int index = __builtin_ffs(rewrite_mask);
344
if (UNLIKELY(index == 0)) {
345
const m128 empty = _mm_cmpeq_epi32(shadow, zero);
346
const int empty_mask = _mm_movemask_epi8(empty);
347
index = __builtin_ffs(empty_mask);
348
if (UNLIKELY(index == 0))
349
index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
350
}
351
StoreShadow(&shadow_mem[index / 4], cur.raw());
352
// We could zero other slots determined by rewrite_mask.
353
// That would help other threads to evict better slots,
354
// but it's unclear if it's worth it.
355
return false;
356
}
357
358
SHARED:
359
m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
360
// Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361
// indexes must be constants.
362
# define LOAD_EPOCH(idx) \
363
if (LIKELY(race_mask & (1 << (idx * 4)))) { \
364
u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
365
u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
366
thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
367
}
368
LOAD_EPOCH(0);
369
LOAD_EPOCH(1);
370
LOAD_EPOCH(2);
371
LOAD_EPOCH(3);
372
# undef LOAD_EPOCH
373
const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
374
const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
375
const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
376
const int concurrent_mask = _mm_movemask_epi8(concurrent);
377
if (LIKELY(concurrent_mask == 0))
378
goto STORE;
379
380
DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
381
return true;
382
}
383
384
# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
385
const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386
const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387
#endif
388
389
char* DumpShadow(char* buf, RawShadow raw) {
390
if (raw == Shadow::kEmpty) {
391
internal_snprintf(buf, 64, "0");
392
return buf;
393
}
394
Shadow s(raw);
395
AccessType typ;
396
s.GetAccess(nullptr, nullptr, &typ);
397
internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
398
static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399
s.access(), static_cast<u32>(typ));
400
return buf;
401
}
402
403
// TryTrace* and TraceRestart* functions allow to turn memory access and func
404
// entry/exit callbacks into leaf functions with all associated performance
405
// benefits. These hottest callbacks do only 2 slow path calls: report a race
406
// and trace part switching. Race reporting is easy to turn into a tail call, we
407
// just always return from the runtime after reporting a race. But trace part
408
// switching is harder because it needs to be in the middle of callbacks. To
409
// turn it into a tail call we immidiately return after TraceRestart* functions,
410
// but TraceRestart* functions themselves recurse into the callback after
411
// switching trace part. As the result the hottest callbacks contain only tail
412
// calls, which effectively makes them leaf functions (can use all registers,
413
// no frame setup, etc).
414
NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415
uptr size, AccessType typ) {
416
TraceSwitchPart(thr);
417
MemoryAccess(thr, pc, addr, size, typ);
418
}
419
420
ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421
uptr size, AccessType typ) {
422
RawShadow* shadow_mem = MemToShadow(addr);
423
UNUSED char memBuf[4][64];
424
DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425
static_cast<int>(thr->fast_state.sid()),
426
static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427
static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
428
DumpShadow(memBuf[1], shadow_mem[1]),
429
DumpShadow(memBuf[2], shadow_mem[2]),
430
DumpShadow(memBuf[3], shadow_mem[3]));
431
432
FastState fast_state = thr->fast_state;
433
Shadow cur(fast_state, addr, size, typ);
434
435
LOAD_CURRENT_SHADOW(cur, shadow_mem);
436
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437
return;
438
if (UNLIKELY(fast_state.GetIgnoreBit()))
439
return;
440
if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441
return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
443
}
444
445
void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
446
447
NOINLINE
448
void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449
AccessType typ) {
450
TraceSwitchPart(thr);
451
MemoryAccess16(thr, pc, addr, typ);
452
}
453
454
ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455
AccessType typ) {
456
const uptr size = 16;
457
FastState fast_state = thr->fast_state;
458
if (UNLIKELY(fast_state.GetIgnoreBit()))
459
return;
460
Shadow cur(fast_state, 0, 8, typ);
461
RawShadow* shadow_mem = MemToShadow(addr);
462
bool traced = false;
463
{
464
LOAD_CURRENT_SHADOW(cur, shadow_mem);
465
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466
goto SECOND;
467
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468
return RestartMemoryAccess16(thr, pc, addr, typ);
469
traced = true;
470
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471
return;
472
}
473
SECOND:
474
shadow_mem += kShadowCnt;
475
LOAD_CURRENT_SHADOW(cur, shadow_mem);
476
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477
return;
478
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479
return RestartMemoryAccess16(thr, pc, addr, typ);
480
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
481
}
482
483
NOINLINE
484
void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485
uptr size, AccessType typ) {
486
TraceSwitchPart(thr);
487
UnalignedMemoryAccess(thr, pc, addr, size, typ);
488
}
489
490
ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491
uptr addr, uptr size,
492
AccessType typ) {
493
DCHECK_LE(size, 8);
494
FastState fast_state = thr->fast_state;
495
if (UNLIKELY(fast_state.GetIgnoreBit()))
496
return;
497
RawShadow* shadow_mem = MemToShadow(addr);
498
bool traced = false;
499
uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
500
{
501
Shadow cur(fast_state, addr, size1, typ);
502
LOAD_CURRENT_SHADOW(cur, shadow_mem);
503
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504
goto SECOND;
505
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506
return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507
traced = true;
508
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509
return;
510
}
511
SECOND:
512
uptr size2 = size - size1;
513
if (LIKELY(size2 == 0))
514
return;
515
shadow_mem += kShadowCnt;
516
Shadow cur(fast_state, 0, size2, typ);
517
LOAD_CURRENT_SHADOW(cur, shadow_mem);
518
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519
return;
520
if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521
return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522
CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523
}
524
525
void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526
DCHECK_LE(p, end);
527
DCHECK(IsShadowMem(p));
528
DCHECK(IsShadowMem(end));
529
UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530
DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
531
DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
532
#if !TSAN_VECTORIZE
533
for (; p < end; p += kShadowCnt) {
534
p[0] = v;
535
for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
536
}
537
#else
538
m128 vv = _mm_setr_epi32(
539
static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
540
static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
541
m128* vp = reinterpret_cast<m128*>(p);
542
m128* vend = reinterpret_cast<m128*>(end);
543
for (; vp < vend; vp++) _mm_store_si128(vp, vv);
544
#endif
545
}
546
547
static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548
if (size == 0)
549
return;
550
DCHECK_EQ(addr % kShadowCell, 0);
551
DCHECK_EQ(size % kShadowCell, 0);
552
// If a user passes some insane arguments (memset(0)),
553
// let it just crash as usual.
554
if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
555
return;
556
RawShadow* begin = MemToShadow(addr);
557
RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558
// Don't want to touch lots of shadow memory.
559
// If a program maps 10MB stack, there is no need reset the whole range.
560
// UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561
if (SANITIZER_WINDOWS ||
562
size <= common_flags()->clear_shadow_mmap_threshold) {
563
ShadowSet(begin, end, val);
564
return;
565
}
566
// The region is big, reset only beginning and end.
567
const uptr kPageSize = GetPageSizeCached();
568
// Set at least first kPageSize/2 to page boundary.
569
RawShadow* mid1 =
570
Min(end, reinterpret_cast<RawShadow*>(RoundUp(
571
reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
572
ShadowSet(begin, mid1, val);
573
// Reset middle part.
574
RawShadow* mid2 = RoundDown(end, kPageSize);
575
if (mid2 > mid1) {
576
if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
577
Die();
578
}
579
// Set the ending.
580
ShadowSet(mid2, end, val);
581
}
582
583
void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
584
uptr addr1 = RoundDown(addr, kShadowCell);
585
uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
586
MemoryRangeSet(addr1, size1, Shadow::kEmpty);
587
}
588
589
void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
590
// Callers must lock the slot to ensure synchronization with the reset.
591
// The problem with "freed" memory is that it's not "monotonic"
592
// with respect to bug detection: freed memory is bad to access,
593
// but then if the heap block is reallocated later, it's good to access.
594
// As the result a garbage "freed" shadow can lead to a false positive
595
// if it happens to match a real free in the thread trace,
596
// but the heap block was reallocated before the current memory access,
597
// so it's still good to access. It's not the case with data races.
598
DCHECK(thr->slot_locked);
599
DCHECK_EQ(addr % kShadowCell, 0);
600
size = RoundUp(size, kShadowCell);
601
// Processing more than 1k (2k of shadow) is expensive,
602
// can cause excessive memory consumption (user does not necessary touch
603
// the whole range) and most likely unnecessary.
604
size = Min<uptr>(size, 1024);
605
const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
606
kAccessCheckOnly | kAccessNoRodata;
607
TraceMemoryAccessRange(thr, pc, addr, size, typ);
608
RawShadow* shadow_mem = MemToShadow(addr);
609
Shadow cur(thr->fast_state, 0, kShadowCell, typ);
610
#if TSAN_VECTORIZE
611
const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
612
const m128 freed = _mm_setr_epi32(
613
static_cast<u32>(Shadow::FreedMarker()),
614
static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
615
for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
616
const m128 shadow = _mm_load_si128((m128*)shadow_mem);
617
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
618
return;
619
_mm_store_si128((m128*)shadow_mem, freed);
620
}
621
#else
622
for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
623
if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
624
return;
625
StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
626
StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
627
StoreShadow(&shadow_mem[2], Shadow::kEmpty);
628
StoreShadow(&shadow_mem[3], Shadow::kEmpty);
629
}
630
#endif
631
}
632
633
void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
634
DCHECK_EQ(addr % kShadowCell, 0);
635
size = RoundUp(size, kShadowCell);
636
TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
637
Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
638
MemoryRangeSet(addr, size, cur.raw());
639
}
640
641
void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
642
uptr size) {
643
if (thr->ignore_reads_and_writes == 0)
644
MemoryRangeImitateWrite(thr, pc, addr, size);
645
else
646
MemoryResetRange(thr, pc, addr, size);
647
}
648
649
ALWAYS_INLINE
650
bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
651
AccessType typ) {
652
LOAD_CURRENT_SHADOW(cur, shadow_mem);
653
if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
654
return false;
655
return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
656
}
657
658
template <bool is_read>
659
NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
660
uptr size) {
661
TraceSwitchPart(thr);
662
MemoryAccessRangeT<is_read>(thr, pc, addr, size);
663
}
664
665
template <bool is_read>
666
void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
667
const AccessType typ =
668
(is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
669
RawShadow* shadow_mem = MemToShadow(addr);
670
DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
671
(void*)pc, (void*)addr, (int)size, is_read);
672
673
#if SANITIZER_DEBUG
674
if (!IsAppMem(addr)) {
675
Printf("Access to non app mem start: %p\n", (void*)addr);
676
DCHECK(IsAppMem(addr));
677
}
678
if (!IsAppMem(addr + size - 1)) {
679
Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
680
DCHECK(IsAppMem(addr + size - 1));
681
}
682
if (!IsShadowMem(shadow_mem)) {
683
Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
684
DCHECK(IsShadowMem(shadow_mem));
685
}
686
687
RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
688
reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
689
if (!IsShadowMem(shadow_mem_end)) {
690
Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
691
(void*)(addr + size - 1));
692
Printf(
693
"Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
694
"%zx\n",
695
shadow_mem, (void*)addr, size, kShadowMultiplier);
696
DCHECK(IsShadowMem(shadow_mem_end));
697
}
698
#endif
699
700
// Access to .rodata section, no races here.
701
// Measurements show that it can be 10-20% of all memory accesses.
702
// Check here once to not check for every access separately.
703
// Note: we could (and should) do this only for the is_read case
704
// (writes shouldn't go to .rodata). But it happens in Chromium tests:
705
// https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
706
// Details are unknown since it happens only on CI machines.
707
if (*shadow_mem == Shadow::kRodata)
708
return;
709
710
FastState fast_state = thr->fast_state;
711
if (UNLIKELY(fast_state.GetIgnoreBit()))
712
return;
713
714
if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
715
return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
716
717
if (UNLIKELY(addr % kShadowCell)) {
718
// Handle unaligned beginning, if any.
719
uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
720
size -= size1;
721
Shadow cur(fast_state, addr, size1, typ);
722
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
723
return;
724
shadow_mem += kShadowCnt;
725
}
726
// Handle middle part, if any.
727
Shadow cur(fast_state, 0, kShadowCell, typ);
728
for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
729
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
730
return;
731
}
732
// Handle ending, if any.
733
if (UNLIKELY(size)) {
734
Shadow cur(fast_state, 0, size, typ);
735
if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
736
return;
737
}
738
}
739
740
template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
741
uptr size);
742
template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
743
uptr size);
744
745
} // namespace __tsan
746
747
#if !SANITIZER_GO
748
// Must be included in this file to make sure everything is inlined.
749
# include "tsan_interface.inc"
750
#endif
751
752