Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
35266 views
1
//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10
// which looks for an out-of-bound array element access.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "clang/AST/CharUnits.h"
15
#include "clang/AST/ParentMapContext.h"
16
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17
#include "clang/StaticAnalyzer/Checkers/Taint.h"
18
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19
#include "clang/StaticAnalyzer/Core/Checker.h"
20
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
21
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
22
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
24
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
25
#include "llvm/ADT/SmallString.h"
26
#include "llvm/Support/FormatVariadic.h"
27
#include "llvm/Support/raw_ostream.h"
28
#include <optional>
29
30
using namespace clang;
31
using namespace ento;
32
using namespace taint;
33
using llvm::formatv;
34
35
namespace {
36
/// If `E` is a "clean" array subscript expression, return the type of the
37
/// accessed element. If the base of the subscript expression is modified by
38
/// pointer arithmetic (and not the beginning of a "full" memory region), this
39
/// always returns nullopt because that's the right (or the least bad) thing to
40
/// do for the diagnostic output that's relying on this.
41
static std::optional<QualType> determineElementType(const Expr *E,
42
const CheckerContext &C) {
43
const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
44
if (!ASE)
45
return std::nullopt;
46
47
const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
48
if (!SubscriptBaseReg)
49
return std::nullopt;
50
51
// The base of the subscript expression is affected by pointer arithmetics,
52
// so we want to report byte offsets instead of indices.
53
if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
54
return std::nullopt;
55
56
return ASE->getType();
57
}
58
59
static std::optional<int64_t>
60
determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
61
if (!T)
62
return std::nullopt;
63
return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
64
}
65
66
class StateUpdateReporter {
67
const SubRegion *Reg;
68
const NonLoc ByteOffsetVal;
69
const std::optional<QualType> ElementType;
70
const std::optional<int64_t> ElementSize;
71
bool AssumedNonNegative = false;
72
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
73
74
public:
75
StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
76
CheckerContext &C)
77
: Reg(R), ByteOffsetVal(ByteOffsVal),
78
ElementType(determineElementType(E, C)),
79
ElementSize(determineElementSize(ElementType, C)) {}
80
81
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
82
void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
83
AssumedUpperBound = UpperBoundVal;
84
}
85
86
bool assumedNonNegative() { return AssumedNonNegative; }
87
88
const NoteTag *createNoteTag(CheckerContext &C) const;
89
90
private:
91
std::string getMessage(PathSensitiveBugReport &BR) const;
92
93
/// Return true if information about the value of `Sym` can put constraints
94
/// on some symbol which is interesting within the bug report `BR`.
95
/// In particular, this returns true when `Sym` is interesting within `BR`;
96
/// but it also returns true if `Sym` is an expression that contains integer
97
/// constants and a single symbolic operand which is interesting (in `BR`).
98
/// We need to use this instead of plain `BR.isInteresting()` because if we
99
/// are analyzing code like
100
/// int array[10];
101
/// int f(int arg) {
102
/// return array[arg] && array[arg + 10];
103
/// }
104
/// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
105
/// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
106
/// to detect this out of bounds access).
107
static bool providesInformationAboutInteresting(SymbolRef Sym,
108
PathSensitiveBugReport &BR);
109
static bool providesInformationAboutInteresting(SVal SV,
110
PathSensitiveBugReport &BR) {
111
return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
112
}
113
};
114
115
struct Messages {
116
std::string Short, Full;
117
};
118
119
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
120
// instead of `PreStmt` because the current implementation passes the whole
121
// expression to `CheckerContext::getSVal()` which only works after the
122
// symbolic evaluation of the expression. (To turn them into `PreStmt`
123
// callbacks, we'd need to duplicate the logic that evaluates these
124
// expressions.) The `MemberExpr` callback would work as `PreStmt` but it's
125
// defined as `PostStmt` for the sake of consistency with the other callbacks.
126
class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
127
check::PostStmt<UnaryOperator>,
128
check::PostStmt<MemberExpr>> {
129
BugType BT{this, "Out-of-bound access"};
130
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
131
132
void performCheck(const Expr *E, CheckerContext &C) const;
133
134
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
135
NonLoc Offset, std::optional<NonLoc> Extent,
136
bool IsTaintBug = false) const;
137
138
static void markPartsInteresting(PathSensitiveBugReport &BR,
139
ProgramStateRef ErrorState, NonLoc Val,
140
bool MarkTaint);
141
142
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
143
144
static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
145
NonLoc Offset, NonLoc Limit,
146
CheckerContext &C);
147
static bool isInAddressOf(const Stmt *S, ASTContext &AC);
148
149
public:
150
void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
151
performCheck(E, C);
152
}
153
void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
154
if (E->getOpcode() == UO_Deref)
155
performCheck(E, C);
156
}
157
void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
158
if (E->isArrow())
159
performCheck(E->getBase(), C);
160
}
161
};
162
163
} // anonymous namespace
164
165
/// For a given Location that can be represented as a symbolic expression
166
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
167
/// Arr and the distance of Location from the beginning of Arr (expressed in a
168
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
169
/// cannot be determined.
170
static std::optional<std::pair<const SubRegion *, NonLoc>>
171
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
172
QualType T = SVB.getArrayIndexType();
173
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
174
// We will use this utility to add and multiply values.
175
return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
176
};
177
178
const SubRegion *OwnerRegion = nullptr;
179
std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
180
181
const ElementRegion *CurRegion =
182
dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
183
184
while (CurRegion) {
185
const auto Index = CurRegion->getIndex().getAs<NonLoc>();
186
if (!Index)
187
return std::nullopt;
188
189
QualType ElemType = CurRegion->getElementType();
190
191
// FIXME: The following early return was presumably added to safeguard the
192
// getTypeSizeInChars() call (which doesn't accept an incomplete type), but
193
// it seems that `ElemType` cannot be incomplete at this point.
194
if (ElemType->isIncompleteType())
195
return std::nullopt;
196
197
// Calculate Delta = Index * sizeof(ElemType).
198
NonLoc Size = SVB.makeArrayIndex(
199
SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
200
auto Delta = EvalBinOp(BO_Mul, *Index, Size);
201
if (!Delta)
202
return std::nullopt;
203
204
// Perform Offset += Delta.
205
Offset = EvalBinOp(BO_Add, *Offset, *Delta);
206
if (!Offset)
207
return std::nullopt;
208
209
OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
210
// When this is just another ElementRegion layer, we need to continue the
211
// offset calculations:
212
CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
213
}
214
215
if (OwnerRegion)
216
return std::make_pair(OwnerRegion, *Offset);
217
218
return std::nullopt;
219
}
220
221
// NOTE: This function is the "heart" of this checker. It simplifies
222
// inequalities with transformations that are valid (and very elementary) in
223
// pure mathematics, but become invalid if we use them in C++ number model
224
// where the calculations may overflow.
225
// Due to the overflow issues I think it's impossible (or at least not
226
// practical) to integrate this kind of simplification into the resolution of
227
// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
228
// produces valid results when the calculations are handling memory offsets
229
// and every value is well below SIZE_MAX.
230
// TODO: This algorithm should be moved to a central location where it's
231
// available for other checkers that need to compare memory offsets.
232
// NOTE: the simplification preserves the order of the two operands in a
233
// mathematical sense, but it may change the result produced by a C++
234
// comparison operator (and the automatic type conversions).
235
// For example, consider a comparison "X+1 < 0", where the LHS is stored as a
236
// size_t and the RHS is stored in an int. (As size_t is unsigned, this
237
// comparison is false for all values of "X".) However, the simplification may
238
// turn it into "X < -1", which is still always false in a mathematical sense,
239
// but can produce a true result when evaluated by `evalBinOp` (which follows
240
// the rules of C++ and casts -1 to SIZE_MAX).
241
static std::pair<NonLoc, nonloc::ConcreteInt>
242
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
243
SValBuilder &svalBuilder) {
244
std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
245
if (SymVal && SymVal->isExpression()) {
246
if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
247
llvm::APSInt constant =
248
APSIntType(extent.getValue()).convert(SIE->getRHS());
249
switch (SIE->getOpcode()) {
250
case BO_Mul:
251
// The constant should never be 0 here, becasue multiplication by zero
252
// is simplified by the engine.
253
if ((extent.getValue() % constant) != 0)
254
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
255
else
256
return getSimplifiedOffsets(
257
nonloc::SymbolVal(SIE->getLHS()),
258
svalBuilder.makeIntVal(extent.getValue() / constant),
259
svalBuilder);
260
case BO_Add:
261
return getSimplifiedOffsets(
262
nonloc::SymbolVal(SIE->getLHS()),
263
svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
264
default:
265
break;
266
}
267
}
268
}
269
270
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
271
}
272
273
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
274
const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);
275
return MaxV && MaxV->isNegative();
276
}
277
278
static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {
279
QualType T = Value.getType(SVB.getContext());
280
return T->isUnsignedIntegerType();
281
}
282
283
// Evaluate the comparison Value < Threshold with the help of the custom
284
// simplification algorithm defined for this checker. Return a pair of states,
285
// where the first one corresponds to "value below threshold" and the second
286
// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
287
// the case when the evaluation fails.
288
// If the optional argument CheckEquality is true, then use BO_EQ instead of
289
// the default BO_LT after consistently applying the same simplification steps.
290
static std::pair<ProgramStateRef, ProgramStateRef>
291
compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
292
SValBuilder &SVB, bool CheckEquality = false) {
293
if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
294
std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
295
}
296
297
// We want to perform a _mathematical_ comparison between the numbers `Value`
298
// and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may
299
// perform automatic conversions. For example the number -1 is less than the
300
// number 1000, but -1 < `1000ull` will evaluate to `false` because the `int`
301
// -1 is converted to ULONGLONG_MAX.
302
// To avoid automatic conversions, we evaluate the "obvious" cases without
303
// calling `evalBinOpNN`:
304
if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {
305
if (CheckEquality) {
306
// negative_value == unsigned_threshold is always false
307
return {nullptr, State};
308
}
309
// negative_value < unsigned_threshold is always true
310
return {State, nullptr};
311
}
312
if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {
313
// unsigned_value == negative_threshold and
314
// unsigned_value < negative_threshold are both always false
315
return {nullptr, State};
316
}
317
// FIXME: These special cases are sufficient for handling real-world
318
// comparisons, but in theory there could be contrived situations where
319
// automatic conversion of a symbolic value (which can be negative and can be
320
// positive) leads to incorrect results.
321
// NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because
322
// we want to ensure that assumptions coming from this precondition and
323
// assumptions coming from regular C/C++ operator calls are represented by
324
// constraints on the same symbolic expression. A solution that would
325
// evaluate these "mathematical" compariosns through a separate pathway would
326
// be a step backwards in this sense.
327
328
const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
329
auto BelowThreshold =
330
SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
331
.getAs<NonLoc>();
332
333
if (BelowThreshold)
334
return State->assume(*BelowThreshold);
335
336
return {nullptr, nullptr};
337
}
338
339
static std::string getRegionName(const SubRegion *Region) {
340
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
341
return RegName;
342
343
// Field regions only have descriptive names when their parent has a
344
// descriptive name; so we provide a fallback representation for them:
345
if (const auto *FR = Region->getAs<FieldRegion>()) {
346
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
347
return formatv("the field '{0}'", Name);
348
return "the unnamed field";
349
}
350
351
if (isa<AllocaRegion>(Region))
352
return "the memory returned by 'alloca'";
353
354
if (isa<SymbolicRegion>(Region) &&
355
isa<HeapSpaceRegion>(Region->getMemorySpace()))
356
return "the heap area";
357
358
if (isa<StringRegion>(Region))
359
return "the string literal";
360
361
return "the region";
362
}
363
364
static std::optional<int64_t> getConcreteValue(NonLoc SV) {
365
if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
366
return ConcreteVal->getValue().tryExtValue();
367
}
368
return std::nullopt;
369
}
370
371
static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
372
return SV ? getConcreteValue(*SV) : std::nullopt;
373
}
374
375
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
376
std::string RegName = getRegionName(Region), OffsetStr = "";
377
378
if (auto ConcreteOffset = getConcreteValue(Offset))
379
OffsetStr = formatv(" {0}", ConcreteOffset);
380
381
return {
382
formatv("Out of bound access to memory preceding {0}", RegName),
383
formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
384
}
385
386
/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
387
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
388
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
389
/// division is considered to be successful.
390
static bool tryDividePair(std::optional<int64_t> &Val1,
391
std::optional<int64_t> &Val2, int64_t Divisor) {
392
if (!Divisor)
393
return false;
394
const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
395
const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
396
if (!Val1HasRemainder && !Val2HasRemainder) {
397
if (Val1)
398
*Val1 /= Divisor;
399
if (Val2)
400
*Val2 /= Divisor;
401
return true;
402
}
403
return false;
404
}
405
406
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
407
NonLoc Offset, NonLoc Extent, SVal Location,
408
bool AlsoMentionUnderflow) {
409
std::string RegName = getRegionName(Region);
410
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
411
assert(EReg && "this checker only handles element access");
412
QualType ElemType = EReg->getElementType();
413
414
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
415
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
416
417
int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
418
419
bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
420
const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
421
422
SmallString<256> Buf;
423
llvm::raw_svector_ostream Out(Buf);
424
Out << "Access of ";
425
if (!ExtentN && !UseByteOffsets)
426
Out << "'" << ElemType.getAsString() << "' element in ";
427
Out << RegName << " at ";
428
if (AlsoMentionUnderflow) {
429
Out << "a negative or overflowing " << OffsetOrIndex;
430
} else if (OffsetN) {
431
Out << OffsetOrIndex << " " << *OffsetN;
432
} else {
433
Out << "an overflowing " << OffsetOrIndex;
434
}
435
if (ExtentN) {
436
Out << ", while it holds only ";
437
if (*ExtentN != 1)
438
Out << *ExtentN;
439
else
440
Out << "a single";
441
if (UseByteOffsets)
442
Out << " byte";
443
else
444
Out << " '" << ElemType.getAsString() << "' element";
445
446
if (*ExtentN > 1)
447
Out << "s";
448
}
449
450
return {formatv("Out of bound access to memory {0} {1}",
451
AlsoMentionUnderflow ? "around" : "after the end of",
452
RegName),
453
std::string(Buf)};
454
}
455
456
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,
457
bool AlsoMentionUnderflow) {
458
std::string RegName = getRegionName(Region);
459
return {formatv("Potential out of bound access to {0} with tainted {1}",
460
RegName, OffsetName),
461
formatv("Access of {0} with a tainted {1} that may be {2}too large",
462
RegName, OffsetName,
463
AlsoMentionUnderflow ? "negative or " : "")};
464
}
465
466
const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
467
// Don't create a note tag if we didn't assume anything:
468
if (!AssumedNonNegative && !AssumedUpperBound)
469
return nullptr;
470
471
return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
472
return getMessage(BR);
473
});
474
}
475
476
std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
477
bool ShouldReportNonNegative = AssumedNonNegative;
478
if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
479
if (AssumedUpperBound &&
480
providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
481
// Even if the byte offset isn't interesting (e.g. it's a constant value),
482
// the assumption can still be interesting if it provides information
483
// about an interesting symbolic upper bound.
484
ShouldReportNonNegative = false;
485
} else {
486
// We don't have anything interesting, don't report the assumption.
487
return "";
488
}
489
}
490
491
std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
492
std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
493
494
const bool UseIndex =
495
ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
496
497
SmallString<256> Buf;
498
llvm::raw_svector_ostream Out(Buf);
499
Out << "Assuming ";
500
if (UseIndex) {
501
Out << "index ";
502
if (OffsetN)
503
Out << "'" << OffsetN << "' ";
504
} else if (AssumedUpperBound) {
505
Out << "byte offset ";
506
if (OffsetN)
507
Out << "'" << OffsetN << "' ";
508
} else {
509
Out << "offset ";
510
}
511
512
Out << "is";
513
if (ShouldReportNonNegative) {
514
Out << " non-negative";
515
}
516
if (AssumedUpperBound) {
517
if (ShouldReportNonNegative)
518
Out << " and";
519
Out << " less than ";
520
if (ExtentN)
521
Out << *ExtentN << ", ";
522
if (UseIndex && ElementType)
523
Out << "the number of '" << ElementType->getAsString()
524
<< "' elements in ";
525
else
526
Out << "the extent of ";
527
Out << getRegionName(Reg);
528
}
529
return std::string(Out.str());
530
}
531
532
bool StateUpdateReporter::providesInformationAboutInteresting(
533
SymbolRef Sym, PathSensitiveBugReport &BR) {
534
if (!Sym)
535
return false;
536
for (SymbolRef PartSym : Sym->symbols()) {
537
// The interestingess mark may appear on any layer as we're stripping off
538
// the SymIntExpr, UnarySymExpr etc. layers...
539
if (BR.isInteresting(PartSym))
540
return true;
541
// ...but if both sides of the expression are symbolic, then there is no
542
// practical algorithm to produce separate constraints for the two
543
// operands (from the single combined result).
544
if (isa<SymSymExpr>(PartSym))
545
return false;
546
}
547
return false;
548
}
549
550
void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
551
const SVal Location = C.getSVal(E);
552
553
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
554
// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
555
// and incomplete analysis of these leads to false positives. As even
556
// accurate reports would be confusing for the users, just disable reports
557
// from these macros:
558
if (isFromCtypeMacro(E, C.getASTContext()))
559
return;
560
561
ProgramStateRef State = C.getState();
562
SValBuilder &SVB = C.getSValBuilder();
563
564
const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
565
computeOffset(State, SVB, Location);
566
567
if (!RawOffset)
568
return;
569
570
auto [Reg, ByteOffset] = *RawOffset;
571
572
// The state updates will be reported as a single note tag, which will be
573
// composed by this helper class.
574
StateUpdateReporter SUR(Reg, ByteOffset, E, C);
575
576
// CHECK LOWER BOUND
577
const MemSpaceRegion *Space = Reg->getMemorySpace();
578
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
579
// A symbolic region in unknown space represents an unknown pointer that
580
// may point into the middle of an array, so we don't look for underflows.
581
// Both conditions are significant because we want to check underflows in
582
// symbolic regions on the heap (which may be introduced by checkers like
583
// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
584
// non-symbolic regions (e.g. a field subregion of a symbolic region) in
585
// unknown space.
586
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
587
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
588
589
if (PrecedesLowerBound) {
590
// The offset may be invalid (negative)...
591
if (!WithinLowerBound) {
592
// ...and it cannot be valid (>= 0), so report an error.
593
Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
594
reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
595
return;
596
}
597
// ...but it can be valid as well, so the checker will (optimistically)
598
// assume that it's valid and mention this in the note tag.
599
SUR.recordNonNegativeAssumption();
600
}
601
602
// Actually update the state. The "if" only fails in the extremely unlikely
603
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
604
// evalBinOpNN fails to evaluate the less-than operator.
605
if (WithinLowerBound)
606
State = WithinLowerBound;
607
}
608
609
// CHECK UPPER BOUND
610
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
611
if (auto KnownSize = Size.getAs<NonLoc>()) {
612
// In a situation where both underflow and overflow are possible (but the
613
// index is either tainted or known to be invalid), the logic of this
614
// checker will first assume that the offset is non-negative, and then
615
// (with this additional assumption) it will detect an overflow error.
616
// In this situation the warning message should mention both possibilities.
617
bool AlsoMentionUnderflow = SUR.assumedNonNegative();
618
619
auto [WithinUpperBound, ExceedsUpperBound] =
620
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
621
622
if (ExceedsUpperBound) {
623
// The offset may be invalid (>= Size)...
624
if (!WithinUpperBound) {
625
// ...and it cannot be within bounds, so report an error, unless we can
626
// definitely determine that this is an idiomatic `&array[size]`
627
// expression that calculates the past-the-end pointer.
628
if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
629
*KnownSize, C)) {
630
C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
631
return;
632
}
633
634
Messages Msgs =
635
getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,
636
Location, AlsoMentionUnderflow);
637
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
638
return;
639
}
640
// ...and it can be valid as well...
641
if (isTainted(State, ByteOffset)) {
642
// ...but it's tainted, so report an error.
643
644
// Diagnostic detail: saying "tainted offset" is always correct, but
645
// the common case is that 'idx' is tainted in 'arr[idx]' and then it's
646
// nicer to say "tainted index".
647
const char *OffsetName = "offset";
648
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
649
if (isTainted(State, ASE->getIdx(), C.getLocationContext()))
650
OffsetName = "index";
651
652
Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);
653
reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
654
/*IsTaintBug=*/true);
655
return;
656
}
657
// ...and it isn't tainted, so the checker will (optimistically) assume
658
// that the offset is in bounds and mention this in the note tag.
659
SUR.recordUpperBoundAssumption(*KnownSize);
660
}
661
662
// Actually update the state. The "if" only fails in the extremely unlikely
663
// case when compareValueToThreshold returns {nullptr, nullptr} becasue
664
// evalBinOpNN fails to evaluate the less-than operator.
665
if (WithinUpperBound)
666
State = WithinUpperBound;
667
}
668
669
// Add a transition, reporting the state updates that we accumulated.
670
C.addTransition(State, SUR.createNoteTag(C));
671
}
672
673
void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR,
674
ProgramStateRef ErrorState,
675
NonLoc Val, bool MarkTaint) {
676
if (SymbolRef Sym = Val.getAsSymbol()) {
677
// If the offset is a symbolic value, iterate over its "parts" with
678
// `SymExpr::symbols()` and mark each of them as interesting.
679
// For example, if the offset is `x*4 + y` then we put interestingness onto
680
// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
681
// `x` and `y`.
682
for (SymbolRef PartSym : Sym->symbols())
683
BR.markInteresting(PartSym);
684
}
685
686
if (MarkTaint) {
687
// If the issue that we're reporting depends on the taintedness of the
688
// offset, then put interestingness onto symbols that could be the origin
689
// of the taint. Note that this may find symbols that did not appear in
690
// `Sym->symbols()` (because they're only loosely connected to `Val`).
691
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
692
BR.markInteresting(Sym);
693
}
694
}
695
696
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
697
ProgramStateRef ErrorState, Messages Msgs,
698
NonLoc Offset, std::optional<NonLoc> Extent,
699
bool IsTaintBug /*=false*/) const {
700
701
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
702
if (!ErrorNode)
703
return;
704
705
auto BR = std::make_unique<PathSensitiveBugReport>(
706
IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
707
708
// FIXME: ideally we would just call trackExpressionValue() and that would
709
// "do the right thing": mark the relevant symbols as interesting, track the
710
// control dependencies and statements storing the relevant values and add
711
// helpful diagnostic pieces. However, right now trackExpressionValue() is
712
// a heap of unreliable heuristics, so it would cause several issues:
713
// - Interestingness is not applied consistently, e.g. if `array[x+10]`
714
// causes an overflow, then `x` is not marked as interesting.
715
// - We get irrelevant diagnostic pieces, e.g. in the code
716
// `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`
717
// it places a "Storing uninitialized value" note on the `malloc` call
718
// (which is technically true, but irrelevant).
719
// If trackExpressionValue() becomes reliable, it should be applied instead
720
// of this custom markPartsInteresting().
721
markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);
722
if (Extent)
723
markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
724
725
C.emitReport(std::move(BR));
726
}
727
728
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
729
SourceLocation Loc = S->getBeginLoc();
730
if (!Loc.isMacroID())
731
return false;
732
733
StringRef MacroName = Lexer::getImmediateMacroName(
734
Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
735
736
if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
737
return false;
738
739
return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
740
(MacroName == "isblank") || (MacroName == "isdigit") ||
741
(MacroName == "isgraph") || (MacroName == "islower") ||
742
(MacroName == "isnctrl") || (MacroName == "isprint") ||
743
(MacroName == "ispunct") || (MacroName == "isspace") ||
744
(MacroName == "isupper") || (MacroName == "isxdigit"));
745
}
746
747
bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
748
ParentMapContext &ParentCtx = ACtx.getParentMapContext();
749
do {
750
const DynTypedNodeList Parents = ParentCtx.getParents(*S);
751
if (Parents.empty())
752
return false;
753
S = Parents[0].get<Stmt>();
754
} while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));
755
const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);
756
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
757
}
758
759
bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
760
ProgramStateRef State,
761
NonLoc Offset, NonLoc Limit,
762
CheckerContext &C) {
763
if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
764
auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
765
State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);
766
return EqualsToThreshold && !NotEqualToThreshold;
767
}
768
return false;
769
}
770
771
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
772
mgr.registerChecker<ArrayBoundCheckerV2>();
773
}
774
775
bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
776
return true;
777
}
778
779