Path: blob/main/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
35266 views
//== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines ArrayBoundCheckerV2, which is a path-sensitive check9// which looks for an out-of-bound array element access.10//11//===----------------------------------------------------------------------===//1213#include "clang/AST/CharUnits.h"14#include "clang/AST/ParentMapContext.h"15#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"16#include "clang/StaticAnalyzer/Checkers/Taint.h"17#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"18#include "clang/StaticAnalyzer/Core/Checker.h"19#include "clang/StaticAnalyzer/Core/CheckerManager.h"20#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"21#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"22#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"23#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"24#include "llvm/ADT/SmallString.h"25#include "llvm/Support/FormatVariadic.h"26#include "llvm/Support/raw_ostream.h"27#include <optional>2829using namespace clang;30using namespace ento;31using namespace taint;32using llvm::formatv;3334namespace {35/// If `E` is a "clean" array subscript expression, return the type of the36/// accessed element. If the base of the subscript expression is modified by37/// pointer arithmetic (and not the beginning of a "full" memory region), this38/// always returns nullopt because that's the right (or the least bad) thing to39/// do for the diagnostic output that's relying on this.40static std::optional<QualType> determineElementType(const Expr *E,41const CheckerContext &C) {42const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);43if (!ASE)44return std::nullopt;4546const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();47if (!SubscriptBaseReg)48return std::nullopt;4950// The base of the subscript expression is affected by pointer arithmetics,51// so we want to report byte offsets instead of indices.52if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))53return std::nullopt;5455return ASE->getType();56}5758static std::optional<int64_t>59determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {60if (!T)61return std::nullopt;62return C.getASTContext().getTypeSizeInChars(*T).getQuantity();63}6465class StateUpdateReporter {66const SubRegion *Reg;67const NonLoc ByteOffsetVal;68const std::optional<QualType> ElementType;69const std::optional<int64_t> ElementSize;70bool AssumedNonNegative = false;71std::optional<NonLoc> AssumedUpperBound = std::nullopt;7273public:74StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,75CheckerContext &C)76: Reg(R), ByteOffsetVal(ByteOffsVal),77ElementType(determineElementType(E, C)),78ElementSize(determineElementSize(ElementType, C)) {}7980void recordNonNegativeAssumption() { AssumedNonNegative = true; }81void recordUpperBoundAssumption(NonLoc UpperBoundVal) {82AssumedUpperBound = UpperBoundVal;83}8485bool assumedNonNegative() { return AssumedNonNegative; }8687const NoteTag *createNoteTag(CheckerContext &C) const;8889private:90std::string getMessage(PathSensitiveBugReport &BR) const;9192/// Return true if information about the value of `Sym` can put constraints93/// on some symbol which is interesting within the bug report `BR`.94/// In particular, this returns true when `Sym` is interesting within `BR`;95/// but it also returns true if `Sym` is an expression that contains integer96/// constants and a single symbolic operand which is interesting (in `BR`).97/// We need to use this instead of plain `BR.isInteresting()` because if we98/// are analyzing code like99/// int array[10];100/// int f(int arg) {101/// return array[arg] && array[arg + 10];102/// }103/// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not104/// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough105/// to detect this out of bounds access).106static bool providesInformationAboutInteresting(SymbolRef Sym,107PathSensitiveBugReport &BR);108static bool providesInformationAboutInteresting(SVal SV,109PathSensitiveBugReport &BR) {110return providesInformationAboutInteresting(SV.getAsSymbol(), BR);111}112};113114struct Messages {115std::string Short, Full;116};117118// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`119// instead of `PreStmt` because the current implementation passes the whole120// expression to `CheckerContext::getSVal()` which only works after the121// symbolic evaluation of the expression. (To turn them into `PreStmt`122// callbacks, we'd need to duplicate the logic that evaluates these123// expressions.) The `MemberExpr` callback would work as `PreStmt` but it's124// defined as `PostStmt` for the sake of consistency with the other callbacks.125class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,126check::PostStmt<UnaryOperator>,127check::PostStmt<MemberExpr>> {128BugType BT{this, "Out-of-bound access"};129BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};130131void performCheck(const Expr *E, CheckerContext &C) const;132133void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,134NonLoc Offset, std::optional<NonLoc> Extent,135bool IsTaintBug = false) const;136137static void markPartsInteresting(PathSensitiveBugReport &BR,138ProgramStateRef ErrorState, NonLoc Val,139bool MarkTaint);140141static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);142143static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,144NonLoc Offset, NonLoc Limit,145CheckerContext &C);146static bool isInAddressOf(const Stmt *S, ASTContext &AC);147148public:149void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {150performCheck(E, C);151}152void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {153if (E->getOpcode() == UO_Deref)154performCheck(E, C);155}156void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {157if (E->isArrow())158performCheck(E->getBase(), C);159}160};161162} // anonymous namespace163164/// For a given Location that can be represented as a symbolic expression165/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block166/// Arr and the distance of Location from the beginning of Arr (expressed in a167/// NonLoc that specifies the number of CharUnits). Returns nullopt when these168/// cannot be determined.169static std::optional<std::pair<const SubRegion *, NonLoc>>170computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {171QualType T = SVB.getArrayIndexType();172auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {173// We will use this utility to add and multiply values.174return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();175};176177const SubRegion *OwnerRegion = nullptr;178std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();179180const ElementRegion *CurRegion =181dyn_cast_or_null<ElementRegion>(Location.getAsRegion());182183while (CurRegion) {184const auto Index = CurRegion->getIndex().getAs<NonLoc>();185if (!Index)186return std::nullopt;187188QualType ElemType = CurRegion->getElementType();189190// FIXME: The following early return was presumably added to safeguard the191// getTypeSizeInChars() call (which doesn't accept an incomplete type), but192// it seems that `ElemType` cannot be incomplete at this point.193if (ElemType->isIncompleteType())194return std::nullopt;195196// Calculate Delta = Index * sizeof(ElemType).197NonLoc Size = SVB.makeArrayIndex(198SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());199auto Delta = EvalBinOp(BO_Mul, *Index, Size);200if (!Delta)201return std::nullopt;202203// Perform Offset += Delta.204Offset = EvalBinOp(BO_Add, *Offset, *Delta);205if (!Offset)206return std::nullopt;207208OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();209// When this is just another ElementRegion layer, we need to continue the210// offset calculations:211CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);212}213214if (OwnerRegion)215return std::make_pair(OwnerRegion, *Offset);216217return std::nullopt;218}219220// NOTE: This function is the "heart" of this checker. It simplifies221// inequalities with transformations that are valid (and very elementary) in222// pure mathematics, but become invalid if we use them in C++ number model223// where the calculations may overflow.224// Due to the overflow issues I think it's impossible (or at least not225// practical) to integrate this kind of simplification into the resolution of226// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function227// produces valid results when the calculations are handling memory offsets228// and every value is well below SIZE_MAX.229// TODO: This algorithm should be moved to a central location where it's230// available for other checkers that need to compare memory offsets.231// NOTE: the simplification preserves the order of the two operands in a232// mathematical sense, but it may change the result produced by a C++233// comparison operator (and the automatic type conversions).234// For example, consider a comparison "X+1 < 0", where the LHS is stored as a235// size_t and the RHS is stored in an int. (As size_t is unsigned, this236// comparison is false for all values of "X".) However, the simplification may237// turn it into "X < -1", which is still always false in a mathematical sense,238// but can produce a true result when evaluated by `evalBinOp` (which follows239// the rules of C++ and casts -1 to SIZE_MAX).240static std::pair<NonLoc, nonloc::ConcreteInt>241getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,242SValBuilder &svalBuilder) {243std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();244if (SymVal && SymVal->isExpression()) {245if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {246llvm::APSInt constant =247APSIntType(extent.getValue()).convert(SIE->getRHS());248switch (SIE->getOpcode()) {249case BO_Mul:250// The constant should never be 0 here, becasue multiplication by zero251// is simplified by the engine.252if ((extent.getValue() % constant) != 0)253return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);254else255return getSimplifiedOffsets(256nonloc::SymbolVal(SIE->getLHS()),257svalBuilder.makeIntVal(extent.getValue() / constant),258svalBuilder);259case BO_Add:260return getSimplifiedOffsets(261nonloc::SymbolVal(SIE->getLHS()),262svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);263default:264break;265}266}267}268269return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);270}271272static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {273const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);274return MaxV && MaxV->isNegative();275}276277static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {278QualType T = Value.getType(SVB.getContext());279return T->isUnsignedIntegerType();280}281282// Evaluate the comparison Value < Threshold with the help of the custom283// simplification algorithm defined for this checker. Return a pair of states,284// where the first one corresponds to "value below threshold" and the second285// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in286// the case when the evaluation fails.287// If the optional argument CheckEquality is true, then use BO_EQ instead of288// the default BO_LT after consistently applying the same simplification steps.289static std::pair<ProgramStateRef, ProgramStateRef>290compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,291SValBuilder &SVB, bool CheckEquality = false) {292if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {293std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);294}295296// We want to perform a _mathematical_ comparison between the numbers `Value`297// and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may298// perform automatic conversions. For example the number -1 is less than the299// number 1000, but -1 < `1000ull` will evaluate to `false` because the `int`300// -1 is converted to ULONGLONG_MAX.301// To avoid automatic conversions, we evaluate the "obvious" cases without302// calling `evalBinOpNN`:303if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {304if (CheckEquality) {305// negative_value == unsigned_threshold is always false306return {nullptr, State};307}308// negative_value < unsigned_threshold is always true309return {State, nullptr};310}311if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {312// unsigned_value == negative_threshold and313// unsigned_value < negative_threshold are both always false314return {nullptr, State};315}316// FIXME: These special cases are sufficient for handling real-world317// comparisons, but in theory there could be contrived situations where318// automatic conversion of a symbolic value (which can be negative and can be319// positive) leads to incorrect results.320// NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because321// we want to ensure that assumptions coming from this precondition and322// assumptions coming from regular C/C++ operator calls are represented by323// constraints on the same symbolic expression. A solution that would324// evaluate these "mathematical" compariosns through a separate pathway would325// be a step backwards in this sense.326327const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;328auto BelowThreshold =329SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())330.getAs<NonLoc>();331332if (BelowThreshold)333return State->assume(*BelowThreshold);334335return {nullptr, nullptr};336}337338static std::string getRegionName(const SubRegion *Region) {339if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())340return RegName;341342// Field regions only have descriptive names when their parent has a343// descriptive name; so we provide a fallback representation for them:344if (const auto *FR = Region->getAs<FieldRegion>()) {345if (StringRef Name = FR->getDecl()->getName(); !Name.empty())346return formatv("the field '{0}'", Name);347return "the unnamed field";348}349350if (isa<AllocaRegion>(Region))351return "the memory returned by 'alloca'";352353if (isa<SymbolicRegion>(Region) &&354isa<HeapSpaceRegion>(Region->getMemorySpace()))355return "the heap area";356357if (isa<StringRegion>(Region))358return "the string literal";359360return "the region";361}362363static std::optional<int64_t> getConcreteValue(NonLoc SV) {364if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {365return ConcreteVal->getValue().tryExtValue();366}367return std::nullopt;368}369370static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {371return SV ? getConcreteValue(*SV) : std::nullopt;372}373374static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {375std::string RegName = getRegionName(Region), OffsetStr = "";376377if (auto ConcreteOffset = getConcreteValue(Offset))378OffsetStr = formatv(" {0}", ConcreteOffset);379380return {381formatv("Out of bound access to memory preceding {0}", RegName),382formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};383}384385/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if386/// it can be performed (`Divisor` is nonzero and there is no remainder). The387/// values `Val1` and `Val2` may be nullopt and in that case the corresponding388/// division is considered to be successful.389static bool tryDividePair(std::optional<int64_t> &Val1,390std::optional<int64_t> &Val2, int64_t Divisor) {391if (!Divisor)392return false;393const bool Val1HasRemainder = Val1 && *Val1 % Divisor;394const bool Val2HasRemainder = Val2 && *Val2 % Divisor;395if (!Val1HasRemainder && !Val2HasRemainder) {396if (Val1)397*Val1 /= Divisor;398if (Val2)399*Val2 /= Divisor;400return true;401}402return false;403}404405static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,406NonLoc Offset, NonLoc Extent, SVal Location,407bool AlsoMentionUnderflow) {408std::string RegName = getRegionName(Region);409const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();410assert(EReg && "this checker only handles element access");411QualType ElemType = EReg->getElementType();412413std::optional<int64_t> OffsetN = getConcreteValue(Offset);414std::optional<int64_t> ExtentN = getConcreteValue(Extent);415416int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();417418bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);419const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";420421SmallString<256> Buf;422llvm::raw_svector_ostream Out(Buf);423Out << "Access of ";424if (!ExtentN && !UseByteOffsets)425Out << "'" << ElemType.getAsString() << "' element in ";426Out << RegName << " at ";427if (AlsoMentionUnderflow) {428Out << "a negative or overflowing " << OffsetOrIndex;429} else if (OffsetN) {430Out << OffsetOrIndex << " " << *OffsetN;431} else {432Out << "an overflowing " << OffsetOrIndex;433}434if (ExtentN) {435Out << ", while it holds only ";436if (*ExtentN != 1)437Out << *ExtentN;438else439Out << "a single";440if (UseByteOffsets)441Out << " byte";442else443Out << " '" << ElemType.getAsString() << "' element";444445if (*ExtentN > 1)446Out << "s";447}448449return {formatv("Out of bound access to memory {0} {1}",450AlsoMentionUnderflow ? "around" : "after the end of",451RegName),452std::string(Buf)};453}454455static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName,456bool AlsoMentionUnderflow) {457std::string RegName = getRegionName(Region);458return {formatv("Potential out of bound access to {0} with tainted {1}",459RegName, OffsetName),460formatv("Access of {0} with a tainted {1} that may be {2}too large",461RegName, OffsetName,462AlsoMentionUnderflow ? "negative or " : "")};463}464465const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {466// Don't create a note tag if we didn't assume anything:467if (!AssumedNonNegative && !AssumedUpperBound)468return nullptr;469470return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {471return getMessage(BR);472});473}474475std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {476bool ShouldReportNonNegative = AssumedNonNegative;477if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {478if (AssumedUpperBound &&479providesInformationAboutInteresting(*AssumedUpperBound, BR)) {480// Even if the byte offset isn't interesting (e.g. it's a constant value),481// the assumption can still be interesting if it provides information482// about an interesting symbolic upper bound.483ShouldReportNonNegative = false;484} else {485// We don't have anything interesting, don't report the assumption.486return "";487}488}489490std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);491std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);492493const bool UseIndex =494ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);495496SmallString<256> Buf;497llvm::raw_svector_ostream Out(Buf);498Out << "Assuming ";499if (UseIndex) {500Out << "index ";501if (OffsetN)502Out << "'" << OffsetN << "' ";503} else if (AssumedUpperBound) {504Out << "byte offset ";505if (OffsetN)506Out << "'" << OffsetN << "' ";507} else {508Out << "offset ";509}510511Out << "is";512if (ShouldReportNonNegative) {513Out << " non-negative";514}515if (AssumedUpperBound) {516if (ShouldReportNonNegative)517Out << " and";518Out << " less than ";519if (ExtentN)520Out << *ExtentN << ", ";521if (UseIndex && ElementType)522Out << "the number of '" << ElementType->getAsString()523<< "' elements in ";524else525Out << "the extent of ";526Out << getRegionName(Reg);527}528return std::string(Out.str());529}530531bool StateUpdateReporter::providesInformationAboutInteresting(532SymbolRef Sym, PathSensitiveBugReport &BR) {533if (!Sym)534return false;535for (SymbolRef PartSym : Sym->symbols()) {536// The interestingess mark may appear on any layer as we're stripping off537// the SymIntExpr, UnarySymExpr etc. layers...538if (BR.isInteresting(PartSym))539return true;540// ...but if both sides of the expression are symbolic, then there is no541// practical algorithm to produce separate constraints for the two542// operands (from the single combined result).543if (isa<SymSymExpr>(PartSym))544return false;545}546return false;547}548549void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {550const SVal Location = C.getSVal(E);551552// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as553// #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)554// and incomplete analysis of these leads to false positives. As even555// accurate reports would be confusing for the users, just disable reports556// from these macros:557if (isFromCtypeMacro(E, C.getASTContext()))558return;559560ProgramStateRef State = C.getState();561SValBuilder &SVB = C.getSValBuilder();562563const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =564computeOffset(State, SVB, Location);565566if (!RawOffset)567return;568569auto [Reg, ByteOffset] = *RawOffset;570571// The state updates will be reported as a single note tag, which will be572// composed by this helper class.573StateUpdateReporter SUR(Reg, ByteOffset, E, C);574575// CHECK LOWER BOUND576const MemSpaceRegion *Space = Reg->getMemorySpace();577if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {578// A symbolic region in unknown space represents an unknown pointer that579// may point into the middle of an array, so we don't look for underflows.580// Both conditions are significant because we want to check underflows in581// symbolic regions on the heap (which may be introduced by checkers like582// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and583// non-symbolic regions (e.g. a field subregion of a symbolic region) in584// unknown space.585auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(586State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);587588if (PrecedesLowerBound) {589// The offset may be invalid (negative)...590if (!WithinLowerBound) {591// ...and it cannot be valid (>= 0), so report an error.592Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);593reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);594return;595}596// ...but it can be valid as well, so the checker will (optimistically)597// assume that it's valid and mention this in the note tag.598SUR.recordNonNegativeAssumption();599}600601// Actually update the state. The "if" only fails in the extremely unlikely602// case when compareValueToThreshold returns {nullptr, nullptr} becasue603// evalBinOpNN fails to evaluate the less-than operator.604if (WithinLowerBound)605State = WithinLowerBound;606}607608// CHECK UPPER BOUND609DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);610if (auto KnownSize = Size.getAs<NonLoc>()) {611// In a situation where both underflow and overflow are possible (but the612// index is either tainted or known to be invalid), the logic of this613// checker will first assume that the offset is non-negative, and then614// (with this additional assumption) it will detect an overflow error.615// In this situation the warning message should mention both possibilities.616bool AlsoMentionUnderflow = SUR.assumedNonNegative();617618auto [WithinUpperBound, ExceedsUpperBound] =619compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);620621if (ExceedsUpperBound) {622// The offset may be invalid (>= Size)...623if (!WithinUpperBound) {624// ...and it cannot be within bounds, so report an error, unless we can625// definitely determine that this is an idiomatic `&array[size]`626// expression that calculates the past-the-end pointer.627if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,628*KnownSize, C)) {629C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));630return;631}632633Messages Msgs =634getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, *KnownSize,635Location, AlsoMentionUnderflow);636reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);637return;638}639// ...and it can be valid as well...640if (isTainted(State, ByteOffset)) {641// ...but it's tainted, so report an error.642643// Diagnostic detail: saying "tainted offset" is always correct, but644// the common case is that 'idx' is tainted in 'arr[idx]' and then it's645// nicer to say "tainted index".646const char *OffsetName = "offset";647if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))648if (isTainted(State, ASE->getIdx(), C.getLocationContext()))649OffsetName = "index";650651Messages Msgs = getTaintMsgs(Reg, OffsetName, AlsoMentionUnderflow);652reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,653/*IsTaintBug=*/true);654return;655}656// ...and it isn't tainted, so the checker will (optimistically) assume657// that the offset is in bounds and mention this in the note tag.658SUR.recordUpperBoundAssumption(*KnownSize);659}660661// Actually update the state. The "if" only fails in the extremely unlikely662// case when compareValueToThreshold returns {nullptr, nullptr} becasue663// evalBinOpNN fails to evaluate the less-than operator.664if (WithinUpperBound)665State = WithinUpperBound;666}667668// Add a transition, reporting the state updates that we accumulated.669C.addTransition(State, SUR.createNoteTag(C));670}671672void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR,673ProgramStateRef ErrorState,674NonLoc Val, bool MarkTaint) {675if (SymbolRef Sym = Val.getAsSymbol()) {676// If the offset is a symbolic value, iterate over its "parts" with677// `SymExpr::symbols()` and mark each of them as interesting.678// For example, if the offset is `x*4 + y` then we put interestingness onto679// the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols680// `x` and `y`.681for (SymbolRef PartSym : Sym->symbols())682BR.markInteresting(PartSym);683}684685if (MarkTaint) {686// If the issue that we're reporting depends on the taintedness of the687// offset, then put interestingness onto symbols that could be the origin688// of the taint. Note that this may find symbols that did not appear in689// `Sym->symbols()` (because they're only loosely connected to `Val`).690for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))691BR.markInteresting(Sym);692}693}694695void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,696ProgramStateRef ErrorState, Messages Msgs,697NonLoc Offset, std::optional<NonLoc> Extent,698bool IsTaintBug /*=false*/) const {699700ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);701if (!ErrorNode)702return;703704auto BR = std::make_unique<PathSensitiveBugReport>(705IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);706707// FIXME: ideally we would just call trackExpressionValue() and that would708// "do the right thing": mark the relevant symbols as interesting, track the709// control dependencies and statements storing the relevant values and add710// helpful diagnostic pieces. However, right now trackExpressionValue() is711// a heap of unreliable heuristics, so it would cause several issues:712// - Interestingness is not applied consistently, e.g. if `array[x+10]`713// causes an overflow, then `x` is not marked as interesting.714// - We get irrelevant diagnostic pieces, e.g. in the code715// `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`716// it places a "Storing uninitialized value" note on the `malloc` call717// (which is technically true, but irrelevant).718// If trackExpressionValue() becomes reliable, it should be applied instead719// of this custom markPartsInteresting().720markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug);721if (Extent)722markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);723724C.emitReport(std::move(BR));725}726727bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {728SourceLocation Loc = S->getBeginLoc();729if (!Loc.isMacroID())730return false;731732StringRef MacroName = Lexer::getImmediateMacroName(733Loc, ACtx.getSourceManager(), ACtx.getLangOpts());734735if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')736return false;737738return ((MacroName == "isalnum") || (MacroName == "isalpha") ||739(MacroName == "isblank") || (MacroName == "isdigit") ||740(MacroName == "isgraph") || (MacroName == "islower") ||741(MacroName == "isnctrl") || (MacroName == "isprint") ||742(MacroName == "ispunct") || (MacroName == "isspace") ||743(MacroName == "isupper") || (MacroName == "isxdigit"));744}745746bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {747ParentMapContext &ParentCtx = ACtx.getParentMapContext();748do {749const DynTypedNodeList Parents = ParentCtx.getParents(*S);750if (Parents.empty())751return false;752S = Parents[0].get<Stmt>();753} while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S));754const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S);755return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;756}757758bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,759ProgramStateRef State,760NonLoc Offset, NonLoc Limit,761CheckerContext &C) {762if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {763auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(764State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);765return EqualsToThreshold && !NotEqualToThreshold;766}767return false;768}769770void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {771mgr.registerChecker<ArrayBoundCheckerV2>();772}773774bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {775return true;776}777778779