Path: blob/main/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
35266 views
//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This defines CStringChecker, which is an assortment of checks on calls9// to functions in <string.h>.10//11//===----------------------------------------------------------------------===//1213#include "InterCheckerAPI.h"14#include "clang/AST/OperationKinds.h"15#include "clang/Basic/Builtins.h"16#include "clang/Basic/CharInfo.h"17#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"18#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"19#include "clang/StaticAnalyzer/Core/Checker.h"20#include "clang/StaticAnalyzer/Core/CheckerManager.h"21#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"22#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"24#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"25#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"26#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"27#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"28#include "llvm/ADT/APSInt.h"29#include "llvm/ADT/STLExtras.h"30#include "llvm/ADT/StringExtras.h"31#include "llvm/Support/Casting.h"32#include "llvm/Support/raw_ostream.h"33#include <functional>34#include <optional>3536using namespace clang;37using namespace ento;38using namespace std::placeholders;3940namespace {41struct AnyArgExpr {42const Expr *Expression;43unsigned ArgumentIndex;44};45struct SourceArgExpr : AnyArgExpr {};46struct DestinationArgExpr : AnyArgExpr {};47struct SizeArgExpr : AnyArgExpr {};4849using ErrorMessage = SmallString<128>;50enum class AccessKind { write, read };5152static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,53AccessKind Access) {54ErrorMessage Message;55llvm::raw_svector_ostream Os(Message);5657// Function classification like: Memory copy function58Os << toUppercase(FunctionDescription.front())59<< &FunctionDescription.data()[1];6061if (Access == AccessKind::write) {62Os << " overflows the destination buffer";63} else { // read access64Os << " accesses out-of-bound array element";65}6667return Message;68}6970enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };7172enum class CharKind { Regular = 0, Wide };73constexpr CharKind CK_Regular = CharKind::Regular;74constexpr CharKind CK_Wide = CharKind::Wide;7576static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {77return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy78: Ctx.WideCharTy);79}8081class CStringChecker : public Checker< eval::Call,82check::PreStmt<DeclStmt>,83check::LiveSymbols,84check::DeadSymbols,85check::RegionChanges86> {87mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,88BT_NotCString, BT_AdditionOverflow, BT_UninitRead;8990mutable const char *CurrentFunctionDescription = nullptr;9192public:93/// The filter is used to filter out the diagnostics which are not enabled by94/// the user.95struct CStringChecksFilter {96bool CheckCStringNullArg = false;97bool CheckCStringOutOfBounds = false;98bool CheckCStringBufferOverlap = false;99bool CheckCStringNotNullTerm = false;100bool CheckCStringUninitializedRead = false;101102CheckerNameRef CheckNameCStringNullArg;103CheckerNameRef CheckNameCStringOutOfBounds;104CheckerNameRef CheckNameCStringBufferOverlap;105CheckerNameRef CheckNameCStringNotNullTerm;106CheckerNameRef CheckNameCStringUninitializedRead;107};108109CStringChecksFilter Filter;110111static void *getTag() { static int tag; return &tag; }112113bool evalCall(const CallEvent &Call, CheckerContext &C) const;114void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;115void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;116void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;117118ProgramStateRef119checkRegionChanges(ProgramStateRef state,120const InvalidatedSymbols *,121ArrayRef<const MemRegion *> ExplicitRegions,122ArrayRef<const MemRegion *> Regions,123const LocationContext *LCtx,124const CallEvent *Call) const;125126using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,127const CallEvent &)>;128129CallDescriptionMap<FnCheck> Callbacks = {130{{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},131std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},132{{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},133std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},134{{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},135std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},136{{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},137std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},138{{CDM::CLibrary, {"memcmp"}, 3},139std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},140{{CDM::CLibrary, {"wmemcmp"}, 3},141std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},142{{CDM::CLibraryMaybeHardened, {"memmove"}, 3},143std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},144{{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},145std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},146{{CDM::CLibraryMaybeHardened, {"memset"}, 3},147&CStringChecker::evalMemset},148{{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},149// FIXME: C23 introduces 'memset_explicit', maybe also model that150{{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},151&CStringChecker::evalStrcpy},152{{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},153&CStringChecker::evalStrncpy},154{{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},155&CStringChecker::evalStpcpy},156{{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},157&CStringChecker::evalStrlcpy},158{{CDM::CLibraryMaybeHardened, {"strcat"}, 2},159&CStringChecker::evalStrcat},160{{CDM::CLibraryMaybeHardened, {"strncat"}, 3},161&CStringChecker::evalStrncat},162{{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},163&CStringChecker::evalStrlcat},164{{CDM::CLibraryMaybeHardened, {"strlen"}, 1},165&CStringChecker::evalstrLength},166{{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},167{{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},168&CStringChecker::evalstrnLength},169{{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},170{{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},171{{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},172{{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},173{{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},174{{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},175{{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},176{{CDM::CLibrary, {"bcmp"}, 3},177std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},178{{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},179{{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},180&CStringChecker::evalBzero},181182// When recognizing calls to the following variadic functions, we accept183// any number of arguments in the call (std::nullopt = accept any184// number), but check that in the declaration there are 2 and 3185// parameters respectively. (Note that the parameter count does not186// include the "...". Calls where the number of arguments is too small187// will be discarded by the callback.)188{{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},189&CStringChecker::evalSprintf},190{{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},191&CStringChecker::evalSnprintf},192};193194// These require a bit of special handling.195CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},196StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};197198FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;199void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;200void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;201void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;202void evalBcopy(CheckerContext &C, const CallEvent &Call) const;203void evalCopyCommon(CheckerContext &C, const CallEvent &Call,204ProgramStateRef state, SizeArgExpr Size,205DestinationArgExpr Dest, SourceArgExpr Source,206bool Restricted, bool IsMempcpy, CharKind CK) const;207208void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;209210void evalstrLength(CheckerContext &C, const CallEvent &Call) const;211void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;212void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,213bool IsStrnlen = false) const;214215void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;216void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;217void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;218void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;219void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,220bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,221bool returnPtr = true) const;222223void evalStrcat(CheckerContext &C, const CallEvent &Call) const;224void evalStrncat(CheckerContext &C, const CallEvent &Call) const;225void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;226227void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;228void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;229void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;230void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;231void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,232bool IsBounded = false, bool IgnoreCase = false) const;233234void evalStrsep(CheckerContext &C, const CallEvent &Call) const;235236void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;237void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;238void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;239void evalMemset(CheckerContext &C, const CallEvent &Call) const;240void evalBzero(CheckerContext &C, const CallEvent &Call) const;241242void evalSprintf(CheckerContext &C, const CallEvent &Call) const;243void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;244void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,245bool IsBounded) const;246247// Utility methods248std::pair<ProgramStateRef , ProgramStateRef >249static assumeZero(CheckerContext &C,250ProgramStateRef state, SVal V, QualType Ty);251252static ProgramStateRef setCStringLength(ProgramStateRef state,253const MemRegion *MR,254SVal strLength);255static SVal getCStringLengthForRegion(CheckerContext &C,256ProgramStateRef &state,257const Expr *Ex,258const MemRegion *MR,259bool hypothetical);260SVal getCStringLength(CheckerContext &C,261ProgramStateRef &state,262const Expr *Ex,263SVal Buf,264bool hypothetical = false) const;265266const StringLiteral *getCStringLiteral(CheckerContext &C,267ProgramStateRef &state,268const Expr *expr,269SVal val) const;270271/// Invalidate the destination buffer determined by characters copied.272static ProgramStateRef273invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,274const Expr *BufE, SVal BufV, SVal SizeV,275QualType SizeTy);276277/// Operation never overflows, do not invalidate the super region.278static ProgramStateRef invalidateDestinationBufferNeverOverflows(279CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);280281/// We do not know whether the operation can overflow (e.g. size is unknown),282/// invalidate the super region and escape related pointers.283static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(284CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);285286/// Invalidate the source buffer for escaping pointers.287static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,288ProgramStateRef S,289const Expr *BufE, SVal BufV);290291/// @param InvalidationTraitOperations Determine how to invlidate the292/// MemRegion by setting the invalidation traits. Return true to cause pointer293/// escape, or false otherwise.294static ProgramStateRef invalidateBufferAux(295CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,296llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,297const MemRegion *)>298InvalidationTraitOperations);299300static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,301const MemRegion *MR);302303static bool memsetAux(const Expr *DstBuffer, SVal CharE,304const Expr *Size, CheckerContext &C,305ProgramStateRef &State);306307// Re-usable checks308ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,309AnyArgExpr Arg, SVal l) const;310// Check whether the origin region behind \p Element (like the actual array311// region \p Element is from) is initialized.312ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,313AnyArgExpr Buffer, SVal Element, SVal Size) const;314ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,315AnyArgExpr Buffer, SVal Element,316AccessKind Access,317CharKind CK = CharKind::Regular) const;318ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,319AnyArgExpr Buffer, SizeArgExpr Size,320AccessKind Access,321CharKind CK = CharKind::Regular) const;322ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,323SizeArgExpr Size, AnyArgExpr First,324AnyArgExpr Second,325CharKind CK = CharKind::Regular) const;326void emitOverlapBug(CheckerContext &C,327ProgramStateRef state,328const Stmt *First,329const Stmt *Second) const;330331void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,332StringRef WarningMsg) const;333void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,334const Stmt *S, StringRef WarningMsg) const;335void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,336const Stmt *S, StringRef WarningMsg) const;337void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;338void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,339const Expr *E, StringRef Msg) const;340ProgramStateRef checkAdditionOverflow(CheckerContext &C,341ProgramStateRef state,342NonLoc left,343NonLoc right) const;344345// Return true if the destination buffer of the copy function may be in bound.346// Expects SVal of Size to be positive and unsigned.347// Expects SVal of FirstBuf to be a FieldRegion.348static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,349SVal BufVal, QualType BufTy, SVal LengthVal,350QualType LengthTy);351};352353} //end anonymous namespace354355REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)356357//===----------------------------------------------------------------------===//358// Individual checks and utility methods.359//===----------------------------------------------------------------------===//360361std::pair<ProgramStateRef, ProgramStateRef>362CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,363QualType Ty) {364std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();365if (!val)366return std::pair<ProgramStateRef, ProgramStateRef>(State, State);367368SValBuilder &svalBuilder = C.getSValBuilder();369DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);370return State->assume(svalBuilder.evalEQ(State, *val, zero));371}372373ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,374ProgramStateRef State,375AnyArgExpr Arg, SVal l) const {376// If a previous check has failed, propagate the failure.377if (!State)378return nullptr;379380ProgramStateRef stateNull, stateNonNull;381std::tie(stateNull, stateNonNull) =382assumeZero(C, State, l, Arg.Expression->getType());383384if (stateNull && !stateNonNull) {385if (Filter.CheckCStringNullArg) {386SmallString<80> buf;387llvm::raw_svector_ostream OS(buf);388assert(CurrentFunctionDescription);389OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)390<< llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "391<< CurrentFunctionDescription;392393emitNullArgBug(C, stateNull, Arg.Expression, OS.str());394}395return nullptr;396}397398// From here on, assume that the value is non-null.399assert(stateNonNull);400return stateNonNull;401}402403static std::optional<NonLoc> getIndex(ProgramStateRef State,404const ElementRegion *ER, CharKind CK) {405SValBuilder &SVB = State->getStateManager().getSValBuilder();406ASTContext &Ctx = SVB.getContext();407408if (CK == CharKind::Regular) {409if (ER->getValueType() != Ctx.CharTy)410return {};411return ER->getIndex();412}413414if (ER->getValueType() != Ctx.WideCharTy)415return {};416417QualType SizeTy = Ctx.getSizeType();418NonLoc WideSize =419SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),420SizeTy)421.castAs<NonLoc>();422SVal Offset =423SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);424if (Offset.isUnknown())425return {};426return Offset.castAs<NonLoc>();427}428429// Basically 1 -> 1st, 12 -> 12th, etc.430static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {431Os << Idx << llvm::getOrdinalSuffix(Idx);432}433434ProgramStateRef CStringChecker::checkInit(CheckerContext &C,435ProgramStateRef State,436AnyArgExpr Buffer, SVal Element,437SVal Size) const {438439// If a previous check has failed, propagate the failure.440if (!State)441return nullptr;442443const MemRegion *R = Element.getAsRegion();444const auto *ER = dyn_cast_or_null<ElementRegion>(R);445if (!ER)446return State;447448const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();449if (!SuperR)450return State;451452// FIXME: We ought to able to check objects as well. Maybe453// UninitializedObjectChecker could help?454if (!SuperR->getValueType()->isArrayType())455return State;456457SValBuilder &SVB = C.getSValBuilder();458ASTContext &Ctx = SVB.getContext();459460const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());461const NonLoc Zero = SVB.makeZeroArrayIndex();462463std::optional<Loc> FirstElementVal =464State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();465if (!FirstElementVal)466return State;467468// Ensure that we wouldn't read uninitialized value.469if (Filter.CheckCStringUninitializedRead &&470State->getSVal(*FirstElementVal).isUndef()) {471llvm::SmallString<258> Buf;472llvm::raw_svector_ostream OS(Buf);473OS << "The first element of the ";474printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);475OS << " argument is undefined";476emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());477return nullptr;478}479480// We won't check whether the entire region is fully initialized -- lets just481// check that the first and the last element is. So, onto checking the last482// element:483const QualType IdxTy = SVB.getArrayIndexType();484485NonLoc ElemSize =486SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)487.castAs<NonLoc>();488489// FIXME: Check that the size arg to the cstring function is divisible by490// size of the actual element type?491492// The type of the argument to the cstring function is either char or wchar,493// but thats not the type of the original array (or memory region).494// Suppose the following:495// int t[5];496// memcpy(dst, t, sizeof(t) / sizeof(t[0]));497// When checking whether t is fully initialized, we see it as char array of498// size sizeof(int)*5. If we check the last element as a character, we read499// the last byte of an integer, which will be undefined. But just because500// that value is undefined, it doesn't mean that the element is uninitialized!501// For this reason, we need to retrieve the actual last element with the502// correct type.503504// Divide the size argument to the cstring function by the actual element505// type. This value will be size of the array, or the index to the506// past-the-end element.507std::optional<NonLoc> Offset =508SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,509IdxTy)510.getAs<NonLoc>();511512// Retrieve the index of the last element.513const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();514SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);515516if (!Offset)517return State;518519SVal LastElementVal =520State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));521if (!isa<Loc>(LastElementVal))522return State;523524if (Filter.CheckCStringUninitializedRead &&525State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {526const llvm::APSInt *IdxInt = LastIdx.getAsInteger();527// If we can't get emit a sensible last element index, just bail out --528// prefer to emit nothing in favour of emitting garbage quality reports.529if (!IdxInt) {530C.addSink();531return nullptr;532}533llvm::SmallString<258> Buf;534llvm::raw_svector_ostream OS(Buf);535OS << "The last accessed element (at index ";536OS << IdxInt->getExtValue();537OS << ") in the ";538printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);539OS << " argument is undefined";540emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());541return nullptr;542}543return State;544}545546// FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?547ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,548ProgramStateRef state,549AnyArgExpr Buffer, SVal Element,550AccessKind Access,551CharKind CK) const {552553// If a previous check has failed, propagate the failure.554if (!state)555return nullptr;556557// Check for out of bound array element access.558const MemRegion *R = Element.getAsRegion();559if (!R)560return state;561562const auto *ER = dyn_cast<ElementRegion>(R);563if (!ER)564return state;565566// Get the index of the accessed element.567std::optional<NonLoc> Idx = getIndex(state, ER, CK);568if (!Idx)569return state;570571// Get the size of the array.572const auto *superReg = cast<SubRegion>(ER->getSuperRegion());573DefinedOrUnknownSVal Size =574getDynamicExtent(state, superReg, C.getSValBuilder());575576auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);577if (StOutBound && !StInBound) {578// These checks are either enabled by the CString out-of-bounds checker579// explicitly or implicitly by the Malloc checker.580// In the latter case we only do modeling but do not emit warning.581if (!Filter.CheckCStringOutOfBounds)582return nullptr;583584// Emit a bug report.585ErrorMessage Message =586createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);587emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);588return nullptr;589}590591// Array bound check succeeded. From this point forward the array bound592// should always succeed.593return StInBound;594}595596ProgramStateRef597CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,598AnyArgExpr Buffer, SizeArgExpr Size,599AccessKind Access, CharKind CK) const {600// If a previous check has failed, propagate the failure.601if (!State)602return nullptr;603604SValBuilder &svalBuilder = C.getSValBuilder();605ASTContext &Ctx = svalBuilder.getContext();606607QualType SizeTy = Size.Expression->getType();608QualType PtrTy = getCharPtrType(Ctx, CK);609610// Check that the first buffer is non-null.611SVal BufVal = C.getSVal(Buffer.Expression);612State = checkNonNull(C, State, Buffer, BufVal);613if (!State)614return nullptr;615616// If out-of-bounds checking is turned off, skip the rest.617if (!Filter.CheckCStringOutOfBounds)618return State;619620SVal BufStart =621svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());622623// Check if the first byte of the buffer is accessible.624State = CheckLocation(C, State, Buffer, BufStart, Access, CK);625626if (!State)627return nullptr;628629// Get the access length and make sure it is known.630// FIXME: This assumes the caller has already checked that the access length631// is positive. And that it's unsigned.632SVal LengthVal = C.getSVal(Size.Expression);633std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();634if (!Length)635return State;636637// Compute the offset of the last element to be accessed: size-1.638NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();639SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);640if (Offset.isUnknown())641return nullptr;642NonLoc LastOffset = Offset.castAs<NonLoc>();643644// Check that the first buffer is sufficiently long.645if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {646647SVal BufEnd =648svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);649State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);650if (Access == AccessKind::read)651State = checkInit(C, State, Buffer, BufEnd, *Length);652653// If the buffer isn't large enough, abort.654if (!State)655return nullptr;656}657658// Large enough or not, return this state!659return State;660}661662ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,663ProgramStateRef state,664SizeArgExpr Size, AnyArgExpr First,665AnyArgExpr Second,666CharKind CK) const {667if (!Filter.CheckCStringBufferOverlap)668return state;669670// Do a simple check for overlap: if the two arguments are from the same671// buffer, see if the end of the first is greater than the start of the second672// or vice versa.673674// If a previous check has failed, propagate the failure.675if (!state)676return nullptr;677678ProgramStateRef stateTrue, stateFalse;679680// Assume different address spaces cannot overlap.681if (First.Expression->getType()->getPointeeType().getAddressSpace() !=682Second.Expression->getType()->getPointeeType().getAddressSpace())683return state;684685// Get the buffer values and make sure they're known locations.686const LocationContext *LCtx = C.getLocationContext();687SVal firstVal = state->getSVal(First.Expression, LCtx);688SVal secondVal = state->getSVal(Second.Expression, LCtx);689690std::optional<Loc> firstLoc = firstVal.getAs<Loc>();691if (!firstLoc)692return state;693694std::optional<Loc> secondLoc = secondVal.getAs<Loc>();695if (!secondLoc)696return state;697698// Are the two values the same?699SValBuilder &svalBuilder = C.getSValBuilder();700std::tie(stateTrue, stateFalse) =701state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));702703if (stateTrue && !stateFalse) {704// If the values are known to be equal, that's automatically an overlap.705emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);706return nullptr;707}708709// assume the two expressions are not equal.710assert(stateFalse);711state = stateFalse;712713// Which value comes first?714QualType cmpTy = svalBuilder.getConditionType();715SVal reverse =716svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);717std::optional<DefinedOrUnknownSVal> reverseTest =718reverse.getAs<DefinedOrUnknownSVal>();719if (!reverseTest)720return state;721722std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);723if (stateTrue) {724if (stateFalse) {725// If we don't know which one comes first, we can't perform this test.726return state;727} else {728// Switch the values so that firstVal is before secondVal.729std::swap(firstLoc, secondLoc);730731// Switch the Exprs as well, so that they still correspond.732std::swap(First, Second);733}734}735736// Get the length, and make sure it too is known.737SVal LengthVal = state->getSVal(Size.Expression, LCtx);738std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();739if (!Length)740return state;741742// Convert the first buffer's start address to char*.743// Bail out if the cast fails.744ASTContext &Ctx = svalBuilder.getContext();745QualType CharPtrTy = getCharPtrType(Ctx, CK);746SVal FirstStart =747svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());748std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();749if (!FirstStartLoc)750return state;751752// Compute the end of the first buffer. Bail out if THAT fails.753SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,754*Length, CharPtrTy);755std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();756if (!FirstEndLoc)757return state;758759// Is the end of the first buffer past the start of the second buffer?760SVal Overlap =761svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);762std::optional<DefinedOrUnknownSVal> OverlapTest =763Overlap.getAs<DefinedOrUnknownSVal>();764if (!OverlapTest)765return state;766767std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);768769if (stateTrue && !stateFalse) {770// Overlap!771emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);772return nullptr;773}774775// assume the two expressions don't overlap.776assert(stateFalse);777return stateFalse;778}779780void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,781const Stmt *First, const Stmt *Second) const {782ExplodedNode *N = C.generateErrorNode(state);783if (!N)784return;785786if (!BT_Overlap)787BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,788categories::UnixAPI, "Improper arguments"));789790// Generate a report for this bug.791auto report = std::make_unique<PathSensitiveBugReport>(792*BT_Overlap, "Arguments must not be overlapping buffers", N);793report->addRange(First->getSourceRange());794report->addRange(Second->getSourceRange());795796C.emitReport(std::move(report));797}798799void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,800const Stmt *S, StringRef WarningMsg) const {801if (ExplodedNode *N = C.generateErrorNode(State)) {802if (!BT_Null) {803// FIXME: This call uses the string constant 'categories::UnixAPI' as the804// description of the bug; it should be replaced by a real description.805BT_Null.reset(806new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));807}808809auto Report =810std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);811Report->addRange(S->getSourceRange());812if (const auto *Ex = dyn_cast<Expr>(S))813bugreporter::trackExpressionValue(N, Ex, *Report);814C.emitReport(std::move(Report));815}816}817818void CStringChecker::emitUninitializedReadBug(CheckerContext &C,819ProgramStateRef State,820const Expr *E,821StringRef Msg) const {822if (ExplodedNode *N = C.generateErrorNode(State)) {823if (!BT_UninitRead)824BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,825"Accessing unitialized/garbage values"));826827auto Report =828std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);829Report->addNote("Other elements might also be undefined",830Report->getLocation());831Report->addRange(E->getSourceRange());832bugreporter::trackExpressionValue(N, E, *Report);833C.emitReport(std::move(Report));834}835}836837void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,838ProgramStateRef State, const Stmt *S,839StringRef WarningMsg) const {840if (ExplodedNode *N = C.generateErrorNode(State)) {841if (!BT_Bounds)842BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds843? Filter.CheckNameCStringOutOfBounds844: Filter.CheckNameCStringNullArg,845"Out-of-bound array access"));846847// FIXME: It would be nice to eventually make this diagnostic more clear,848// e.g., by referencing the original declaration or by saying *why* this849// reference is outside the range.850auto Report =851std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);852Report->addRange(S->getSourceRange());853C.emitReport(std::move(Report));854}855}856857void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,858const Stmt *S,859StringRef WarningMsg) const {860if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {861if (!BT_NotCString) {862// FIXME: This call uses the string constant 'categories::UnixAPI' as the863// description of the bug; it should be replaced by a real description.864BT_NotCString.reset(865new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));866}867868auto Report =869std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);870871Report->addRange(S->getSourceRange());872C.emitReport(std::move(Report));873}874}875876void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,877ProgramStateRef State) const {878if (ExplodedNode *N = C.generateErrorNode(State)) {879if (!BT_AdditionOverflow) {880// FIXME: This call uses the word "API" as the description of the bug;881// it should be replaced by a better error message (if this unlikely882// situation continues to exist as a separate bug type).883BT_AdditionOverflow.reset(884new BugType(Filter.CheckNameCStringOutOfBounds, "API"));885}886887// This isn't a great error message, but this should never occur in real888// code anyway -- you'd have to create a buffer longer than a size_t can889// represent, which is sort of a contradiction.890const char *WarningMsg =891"This expression will create a string whose length is too big to "892"be represented as a size_t";893894auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,895WarningMsg, N);896C.emitReport(std::move(Report));897}898}899900ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,901ProgramStateRef state,902NonLoc left,903NonLoc right) const {904// If out-of-bounds checking is turned off, skip the rest.905if (!Filter.CheckCStringOutOfBounds)906return state;907908// If a previous check has failed, propagate the failure.909if (!state)910return nullptr;911912SValBuilder &svalBuilder = C.getSValBuilder();913BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();914915QualType sizeTy = svalBuilder.getContext().getSizeType();916const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);917NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);918919SVal maxMinusRight;920if (isa<nonloc::ConcreteInt>(right)) {921maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,922sizeTy);923} else {924// Try switching the operands. (The order of these two assignments is925// important!)926maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,927sizeTy);928left = right;929}930931if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {932QualType cmpTy = svalBuilder.getConditionType();933// If left > max - right, we have an overflow.934SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,935*maxMinusRightNL, cmpTy);936937ProgramStateRef stateOverflow, stateOkay;938std::tie(stateOverflow, stateOkay) =939state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());940941if (stateOverflow && !stateOkay) {942// We have an overflow. Emit a bug report.943emitAdditionOverflowBug(C, stateOverflow);944return nullptr;945}946947// From now on, assume an overflow didn't occur.948assert(stateOkay);949state = stateOkay;950}951952return state;953}954955ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,956const MemRegion *MR,957SVal strLength) {958assert(!strLength.isUndef() && "Attempt to set an undefined string length");959960MR = MR->StripCasts();961962switch (MR->getKind()) {963case MemRegion::StringRegionKind:964// FIXME: This can happen if we strcpy() into a string region. This is965// undefined [C99 6.4.5p6], but we should still warn about it.966return state;967968case MemRegion::SymbolicRegionKind:969case MemRegion::AllocaRegionKind:970case MemRegion::NonParamVarRegionKind:971case MemRegion::ParamVarRegionKind:972case MemRegion::FieldRegionKind:973case MemRegion::ObjCIvarRegionKind:974// These are the types we can currently track string lengths for.975break;976977case MemRegion::ElementRegionKind:978// FIXME: Handle element regions by upper-bounding the parent region's979// string length.980return state;981982default:983// Other regions (mostly non-data) can't have a reliable C string length.984// For now, just ignore the change.985// FIXME: These are rare but not impossible. We should output some kind of986// warning for things like strcpy((char[]){'a', 0}, "b");987return state;988}989990if (strLength.isUnknown())991return state->remove<CStringLength>(MR);992993return state->set<CStringLength>(MR, strLength);994}995996SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,997ProgramStateRef &state,998const Expr *Ex,999const MemRegion *MR,1000bool hypothetical) {1001if (!hypothetical) {1002// If there's a recorded length, go ahead and return it.1003const SVal *Recorded = state->get<CStringLength>(MR);1004if (Recorded)1005return *Recorded;1006}10071008// Otherwise, get a new symbol and update the state.1009SValBuilder &svalBuilder = C.getSValBuilder();1010QualType sizeTy = svalBuilder.getContext().getSizeType();1011SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),1012MR, Ex, sizeTy,1013C.getLocationContext(),1014C.blockCount());10151016if (!hypothetical) {1017if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {1018// In case of unbounded calls strlen etc bound the range to SIZE_MAX/41019BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();1020const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);1021llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);1022const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,1023fourInt);1024NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);1025SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,1026svalBuilder.getConditionType());1027state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);1028}1029state = state->set<CStringLength>(MR, strLength);1030}10311032return strLength;1033}10341035SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,1036const Expr *Ex, SVal Buf,1037bool hypothetical) const {1038const MemRegion *MR = Buf.getAsRegion();1039if (!MR) {1040// If we can't get a region, see if it's something we /know/ isn't a1041// C string. In the context of locations, the only time we can issue such1042// a warning is for labels.1043if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {1044if (Filter.CheckCStringNotNullTerm) {1045SmallString<120> buf;1046llvm::raw_svector_ostream os(buf);1047assert(CurrentFunctionDescription);1048os << "Argument to " << CurrentFunctionDescription1049<< " is the address of the label '" << Label->getLabel()->getName()1050<< "', which is not a null-terminated string";10511052emitNotCStringBug(C, state, Ex, os.str());1053}1054return UndefinedVal();1055}10561057// If it's not a region and not a label, give up.1058return UnknownVal();1059}10601061// If we have a region, strip casts from it and see if we can figure out1062// its length. For anything we can't figure out, just return UnknownVal.1063MR = MR->StripCasts();10641065switch (MR->getKind()) {1066case MemRegion::StringRegionKind: {1067// Modifying the contents of string regions is undefined [C99 6.4.5p6],1068// so we can assume that the byte length is the correct C string length.1069SValBuilder &svalBuilder = C.getSValBuilder();1070QualType sizeTy = svalBuilder.getContext().getSizeType();1071const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();1072return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);1073}1074case MemRegion::NonParamVarRegionKind: {1075// If we have a global constant with a string literal initializer,1076// compute the initializer's length.1077const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();1078if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {1079if (const Expr *Init = Decl->getInit()) {1080if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {1081SValBuilder &SvalBuilder = C.getSValBuilder();1082QualType SizeTy = SvalBuilder.getContext().getSizeType();1083return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);1084}1085}1086}1087[[fallthrough]];1088}1089case MemRegion::SymbolicRegionKind:1090case MemRegion::AllocaRegionKind:1091case MemRegion::ParamVarRegionKind:1092case MemRegion::FieldRegionKind:1093case MemRegion::ObjCIvarRegionKind:1094return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);1095case MemRegion::CompoundLiteralRegionKind:1096// FIXME: Can we track this? Is it necessary?1097return UnknownVal();1098case MemRegion::ElementRegionKind:1099// FIXME: How can we handle this? It's not good enough to subtract the1100// offset from the base string length; consider "123\x00567" and &a[5].1101return UnknownVal();1102default:1103// Other regions (mostly non-data) can't have a reliable C string length.1104// In this case, an error is emitted and UndefinedVal is returned.1105// The caller should always be prepared to handle this case.1106if (Filter.CheckCStringNotNullTerm) {1107SmallString<120> buf;1108llvm::raw_svector_ostream os(buf);11091110assert(CurrentFunctionDescription);1111os << "Argument to " << CurrentFunctionDescription << " is ";11121113if (SummarizeRegion(os, C.getASTContext(), MR))1114os << ", which is not a null-terminated string";1115else1116os << "not a null-terminated string";11171118emitNotCStringBug(C, state, Ex, os.str());1119}1120return UndefinedVal();1121}1122}11231124const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,1125ProgramStateRef &state, const Expr *expr, SVal val) const {11261127// Get the memory region pointed to by the val.1128const MemRegion *bufRegion = val.getAsRegion();1129if (!bufRegion)1130return nullptr;11311132// Strip casts off the memory region.1133bufRegion = bufRegion->StripCasts();11341135// Cast the memory region to a string region.1136const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);1137if (!strRegion)1138return nullptr;11391140// Return the actual string in the string region.1141return strRegion->getStringLiteral();1142}11431144bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,1145SVal BufVal, QualType BufTy,1146SVal LengthVal, QualType LengthTy) {1147// If we do not know that the buffer is long enough we return 'true'.1148// Otherwise the parent region of this field region would also get1149// invalidated, which would lead to warnings based on an unknown state.11501151if (LengthVal.isUnknown())1152return false;11531154// Originally copied from CheckBufferAccess and CheckLocation.1155SValBuilder &SB = C.getSValBuilder();1156ASTContext &Ctx = C.getASTContext();11571158QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);11591160std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();1161if (!Length)1162return true; // cf top comment.11631164// Compute the offset of the last element to be accessed: size-1.1165NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();1166SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);1167if (Offset.isUnknown())1168return true; // cf top comment1169NonLoc LastOffset = Offset.castAs<NonLoc>();11701171// Check that the first buffer is sufficiently long.1172SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);1173std::optional<Loc> BufLoc = BufStart.getAs<Loc>();1174if (!BufLoc)1175return true; // cf top comment.11761177SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);11781179// Check for out of bound array element access.1180const MemRegion *R = BufEnd.getAsRegion();1181if (!R)1182return true; // cf top comment.11831184const ElementRegion *ER = dyn_cast<ElementRegion>(R);1185if (!ER)1186return true; // cf top comment.11871188// FIXME: Does this crash when a non-standard definition1189// of a library function is encountered?1190assert(ER->getValueType() == C.getASTContext().CharTy &&1191"isFirstBufInBound should only be called with char* ElementRegions");11921193// Get the size of the array.1194const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());1195DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);11961197// Get the index of the accessed element.1198DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();11991200ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);12011202return static_cast<bool>(StInBound);1203}12041205ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(1206CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,1207SVal SizeV, QualType SizeTy) {1208auto InvalidationTraitOperations =1209[&C, S, BufTy = BufE->getType(), BufV, SizeV,1210SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {1211// If destination buffer is a field region and access is in bound, do1212// not invalidate its super region.1213if (MemRegion::FieldRegionKind == R->getKind() &&1214isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {1215ITraits.setTrait(1216R,1217RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);1218}1219return false;1220};12211222return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);1223}12241225ProgramStateRef1226CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(1227CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {1228auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,1229const MemRegion *R) {1230return isa<FieldRegion>(R);1231};12321233return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);1234}12351236ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(1237CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {1238auto InvalidationTraitOperations =1239[](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {1240if (MemRegion::FieldRegionKind == R->getKind())1241ITraits.setTrait(1242R,1243RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);1244return false;1245};12461247return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);1248}12491250ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,1251ProgramStateRef S,1252const Expr *BufE,1253SVal BufV) {1254auto InvalidationTraitOperations =1255[](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {1256ITraits.setTrait(1257R->getBaseRegion(),1258RegionAndSymbolInvalidationTraits::TK_PreserveContents);1259ITraits.setTrait(R,1260RegionAndSymbolInvalidationTraits::TK_SuppressEscape);1261return true;1262};12631264return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);1265}12661267ProgramStateRef CStringChecker::invalidateBufferAux(1268CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,1269llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,1270const MemRegion *)>1271InvalidationTraitOperations) {1272std::optional<Loc> L = V.getAs<Loc>();1273if (!L)1274return State;12751276// FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes1277// some assumptions about the value that CFRefCount can't. Even so, it should1278// probably be refactored.1279if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {1280const MemRegion *R = MR->getRegion()->StripCasts();12811282// Are we dealing with an ElementRegion? If so, we should be invalidating1283// the super-region.1284if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {1285R = ER->getSuperRegion();1286// FIXME: What about layers of ElementRegions?1287}12881289// Invalidate this region.1290const LocationContext *LCtx = C.getPredecessor()->getLocationContext();1291RegionAndSymbolInvalidationTraits ITraits;1292bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);12931294return State->invalidateRegions(R, E, C.blockCount(), LCtx,1295CausesPointerEscape, nullptr, nullptr,1296&ITraits);1297}12981299// If we have a non-region value by chance, just remove the binding.1300// FIXME: is this necessary or correct? This handles the non-Region1301// cases. Is it ever valid to store to these?1302return State->killBinding(*L);1303}13041305bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,1306const MemRegion *MR) {1307switch (MR->getKind()) {1308case MemRegion::FunctionCodeRegionKind: {1309if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())1310os << "the address of the function '" << *FD << '\'';1311else1312os << "the address of a function";1313return true;1314}1315case MemRegion::BlockCodeRegionKind:1316os << "block text";1317return true;1318case MemRegion::BlockDataRegionKind:1319os << "a block";1320return true;1321case MemRegion::CXXThisRegionKind:1322case MemRegion::CXXTempObjectRegionKind:1323os << "a C++ temp object of type "1324<< cast<TypedValueRegion>(MR)->getValueType();1325return true;1326case MemRegion::NonParamVarRegionKind:1327os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();1328return true;1329case MemRegion::ParamVarRegionKind:1330os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();1331return true;1332case MemRegion::FieldRegionKind:1333os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();1334return true;1335case MemRegion::ObjCIvarRegionKind:1336os << "an instance variable of type "1337<< cast<TypedValueRegion>(MR)->getValueType();1338return true;1339default:1340return false;1341}1342}13431344bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,1345const Expr *Size, CheckerContext &C,1346ProgramStateRef &State) {1347SVal MemVal = C.getSVal(DstBuffer);1348SVal SizeVal = C.getSVal(Size);1349const MemRegion *MR = MemVal.getAsRegion();1350if (!MR)1351return false;13521353// We're about to model memset by producing a "default binding" in the Store.1354// Our current implementation - RegionStore - doesn't support default bindings1355// that don't cover the whole base region. So we should first get the offset1356// and the base region to figure out whether the offset of buffer is 0.1357RegionOffset Offset = MR->getAsOffset();1358const MemRegion *BR = Offset.getRegion();13591360std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();1361if (!SizeNL)1362return false;13631364SValBuilder &svalBuilder = C.getSValBuilder();1365ASTContext &Ctx = C.getASTContext();13661367// void *memset(void *dest, int ch, size_t count);1368// For now we can only handle the case of offset is 0 and concrete char value.1369if (Offset.isValid() && !Offset.hasSymbolicOffset() &&1370Offset.getOffset() == 0) {1371// Get the base region's size.1372DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);13731374ProgramStateRef StateWholeReg, StateNotWholeReg;1375std::tie(StateWholeReg, StateNotWholeReg) =1376State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));13771378// With the semantic of 'memset()', we should convert the CharVal to1379// unsigned char.1380CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);13811382ProgramStateRef StateNullChar, StateNonNullChar;1383std::tie(StateNullChar, StateNonNullChar) =1384assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);13851386if (StateWholeReg && !StateNotWholeReg && StateNullChar &&1387!StateNonNullChar) {1388// If the 'memset()' acts on the whole region of destination buffer and1389// the value of the second argument of 'memset()' is zero, bind the second1390// argument's value to the destination buffer with 'default binding'.1391// FIXME: Since there is no perfect way to bind the non-zero character, we1392// can only deal with zero value here. In the future, we need to deal with1393// the binding of non-zero value in the case of whole region.1394State = State->bindDefaultZero(svalBuilder.makeLoc(BR),1395C.getLocationContext());1396} else {1397// If the destination buffer's extent is not equal to the value of1398// third argument, just invalidate buffer.1399State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,1400SizeVal, Size->getType());1401}14021403if (StateNullChar && !StateNonNullChar) {1404// If the value of the second argument of 'memset()' is zero, set the1405// string length of destination buffer to 0 directly.1406State = setCStringLength(State, MR,1407svalBuilder.makeZeroVal(Ctx.getSizeType()));1408} else if (!StateNullChar && StateNonNullChar) {1409SVal NewStrLen = svalBuilder.getMetadataSymbolVal(1410CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),1411C.getLocationContext(), C.blockCount());14121413// If the value of second argument is not zero, then the string length1414// is at least the size argument.1415SVal NewStrLenGESize = svalBuilder.evalBinOp(1416State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());14171418State = setCStringLength(1419State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),1420MR, NewStrLen);1421}1422} else {1423// If the offset is not zero and char value is not concrete, we can do1424// nothing but invalidate the buffer.1425State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,1426SizeVal, Size->getType());1427}1428return true;1429}14301431//===----------------------------------------------------------------------===//1432// evaluation of individual function calls.1433//===----------------------------------------------------------------------===//14341435void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,1436ProgramStateRef state, SizeArgExpr Size,1437DestinationArgExpr Dest,1438SourceArgExpr Source, bool Restricted,1439bool IsMempcpy, CharKind CK) const {1440CurrentFunctionDescription = "memory copy function";14411442// See if the size argument is zero.1443const LocationContext *LCtx = C.getLocationContext();1444SVal sizeVal = state->getSVal(Size.Expression, LCtx);1445QualType sizeTy = Size.Expression->getType();14461447ProgramStateRef stateZeroSize, stateNonZeroSize;1448std::tie(stateZeroSize, stateNonZeroSize) =1449assumeZero(C, state, sizeVal, sizeTy);14501451// Get the value of the Dest.1452SVal destVal = state->getSVal(Dest.Expression, LCtx);14531454// If the size is zero, there won't be any actual memory access, so1455// just bind the return value to the destination buffer and return.1456if (stateZeroSize && !stateNonZeroSize) {1457stateZeroSize =1458stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);1459C.addTransition(stateZeroSize);1460return;1461}14621463// If the size can be nonzero, we have to check the other arguments.1464if (stateNonZeroSize) {1465// TODO: If Size is tainted and we cannot prove that it is smaller or equal1466// to the size of the destination buffer, then emit a warning1467// that an attacker may provoke a buffer overflow error.1468state = stateNonZeroSize;14691470// Ensure the destination is not null. If it is NULL there will be a1471// NULL pointer dereference.1472state = checkNonNull(C, state, Dest, destVal);1473if (!state)1474return;14751476// Get the value of the Src.1477SVal srcVal = state->getSVal(Source.Expression, LCtx);14781479// Ensure the source is not null. If it is NULL there will be a1480// NULL pointer dereference.1481state = checkNonNull(C, state, Source, srcVal);1482if (!state)1483return;14841485// Ensure the accesses are valid and that the buffers do not overlap.1486state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);1487state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);14881489if (Restricted)1490state = CheckOverlap(C, state, Size, Dest, Source, CK);14911492if (!state)1493return;14941495// If this is mempcpy, get the byte after the last byte copied and1496// bind the expr.1497if (IsMempcpy) {1498// Get the byte after the last byte copied.1499SValBuilder &SvalBuilder = C.getSValBuilder();1500ASTContext &Ctx = SvalBuilder.getContext();1501QualType CharPtrTy = getCharPtrType(Ctx, CK);1502SVal DestRegCharVal =1503SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());1504SVal lastElement = C.getSValBuilder().evalBinOp(1505state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());1506// If we don't know how much we copied, we can at least1507// conjure a return value for later.1508if (lastElement.isUnknown())1509lastElement = C.getSValBuilder().conjureSymbolVal(1510nullptr, Call.getOriginExpr(), LCtx, C.blockCount());15111512// The byte after the last byte copied is the return value.1513state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);1514} else {1515// All other copies return the destination buffer.1516// (Well, bcopy() has a void return type, but this won't hurt.)1517state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);1518}15191520// Invalidate the destination (regular invalidation without pointer-escaping1521// the address of the top-level region).1522// FIXME: Even if we can't perfectly model the copy, we should see if we1523// can use LazyCompoundVals to copy the source values into the destination.1524// This would probably remove any existing bindings past the end of the1525// copied region, but that's still an improvement over blank invalidation.1526state = invalidateDestinationBufferBySize(1527C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,1528Size.Expression->getType());15291530// Invalidate the source (const-invalidation without const-pointer-escaping1531// the address of the top-level region).1532state = invalidateSourceBuffer(C, state, Source.Expression,1533C.getSVal(Source.Expression));15341535C.addTransition(state);1536}1537}15381539void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,1540CharKind CK) const {1541// void *memcpy(void *restrict dst, const void *restrict src, size_t n);1542// The return value is the address of the destination buffer.1543DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};1544SourceArgExpr Src = {{Call.getArgExpr(1), 1}};1545SizeArgExpr Size = {{Call.getArgExpr(2), 2}};15461547ProgramStateRef State = C.getState();15481549constexpr bool IsRestricted = true;1550constexpr bool IsMempcpy = false;1551evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);1552}15531554void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,1555CharKind CK) const {1556// void *mempcpy(void *restrict dst, const void *restrict src, size_t n);1557// The return value is a pointer to the byte following the last written byte.1558DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};1559SourceArgExpr Src = {{Call.getArgExpr(1), 1}};1560SizeArgExpr Size = {{Call.getArgExpr(2), 2}};15611562constexpr bool IsRestricted = true;1563constexpr bool IsMempcpy = true;1564evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,1565IsMempcpy, CK);1566}15671568void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,1569CharKind CK) const {1570// void *memmove(void *dst, const void *src, size_t n);1571// The return value is the address of the destination buffer.1572DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};1573SourceArgExpr Src = {{Call.getArgExpr(1), 1}};1574SizeArgExpr Size = {{Call.getArgExpr(2), 2}};15751576constexpr bool IsRestricted = false;1577constexpr bool IsMempcpy = false;1578evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,1579IsMempcpy, CK);1580}15811582void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {1583// void bcopy(const void *src, void *dst, size_t n);1584SourceArgExpr Src{{Call.getArgExpr(0), 0}};1585DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};1586SizeArgExpr Size = {{Call.getArgExpr(2), 2}};15871588constexpr bool IsRestricted = false;1589constexpr bool IsMempcpy = false;1590evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,1591IsMempcpy, CharKind::Regular);1592}15931594void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,1595CharKind CK) const {1596// int memcmp(const void *s1, const void *s2, size_t n);1597CurrentFunctionDescription = "memory comparison function";15981599AnyArgExpr Left = {Call.getArgExpr(0), 0};1600AnyArgExpr Right = {Call.getArgExpr(1), 1};1601SizeArgExpr Size = {{Call.getArgExpr(2), 2}};16021603ProgramStateRef State = C.getState();1604SValBuilder &Builder = C.getSValBuilder();1605const LocationContext *LCtx = C.getLocationContext();16061607// See if the size argument is zero.1608SVal sizeVal = State->getSVal(Size.Expression, LCtx);1609QualType sizeTy = Size.Expression->getType();16101611ProgramStateRef stateZeroSize, stateNonZeroSize;1612std::tie(stateZeroSize, stateNonZeroSize) =1613assumeZero(C, State, sizeVal, sizeTy);16141615// If the size can be zero, the result will be 0 in that case, and we don't1616// have to check either of the buffers.1617if (stateZeroSize) {1618State = stateZeroSize;1619State = State->BindExpr(Call.getOriginExpr(), LCtx,1620Builder.makeZeroVal(Call.getResultType()));1621C.addTransition(State);1622}16231624// If the size can be nonzero, we have to check the other arguments.1625if (stateNonZeroSize) {1626State = stateNonZeroSize;1627// If we know the two buffers are the same, we know the result is 0.1628// First, get the two buffers' addresses. Another checker will have already1629// made sure they're not undefined.1630DefinedOrUnknownSVal LV =1631State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();1632DefinedOrUnknownSVal RV =1633State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();16341635// See if they are the same.1636ProgramStateRef SameBuffer, NotSameBuffer;1637std::tie(SameBuffer, NotSameBuffer) =1638State->assume(Builder.evalEQ(State, LV, RV));16391640// If the two arguments are the same buffer, we know the result is 0,1641// and we only need to check one size.1642if (SameBuffer && !NotSameBuffer) {1643State = SameBuffer;1644State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);1645if (State) {1646State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,1647Builder.makeZeroVal(Call.getResultType()));1648C.addTransition(State);1649}1650return;1651}16521653// If the two arguments might be different buffers, we have to check1654// the size of both of them.1655assert(NotSameBuffer);1656State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);1657State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);1658if (State) {1659// The return value is the comparison result, which we don't know.1660SVal CmpV = Builder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,1661C.blockCount());1662State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);1663C.addTransition(State);1664}1665}1666}16671668void CStringChecker::evalstrLength(CheckerContext &C,1669const CallEvent &Call) const {1670// size_t strlen(const char *s);1671evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);1672}16731674void CStringChecker::evalstrnLength(CheckerContext &C,1675const CallEvent &Call) const {1676// size_t strnlen(const char *s, size_t maxlen);1677evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);1678}16791680void CStringChecker::evalstrLengthCommon(CheckerContext &C,1681const CallEvent &Call,1682bool IsStrnlen) const {1683CurrentFunctionDescription = "string length function";1684ProgramStateRef state = C.getState();1685const LocationContext *LCtx = C.getLocationContext();16861687if (IsStrnlen) {1688const Expr *maxlenExpr = Call.getArgExpr(1);1689SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);16901691ProgramStateRef stateZeroSize, stateNonZeroSize;1692std::tie(stateZeroSize, stateNonZeroSize) =1693assumeZero(C, state, maxlenVal, maxlenExpr->getType());16941695// If the size can be zero, the result will be 0 in that case, and we don't1696// have to check the string itself.1697if (stateZeroSize) {1698SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());1699stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);1700C.addTransition(stateZeroSize);1701}17021703// If the size is GUARANTEED to be zero, we're done!1704if (!stateNonZeroSize)1705return;17061707// Otherwise, record the assumption that the size is nonzero.1708state = stateNonZeroSize;1709}17101711// Check that the string argument is non-null.1712AnyArgExpr Arg = {Call.getArgExpr(0), 0};1713SVal ArgVal = state->getSVal(Arg.Expression, LCtx);1714state = checkNonNull(C, state, Arg, ArgVal);17151716if (!state)1717return;17181719SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);17201721// If the argument isn't a valid C string, there's no valid state to1722// transition to.1723if (strLength.isUndef())1724return;17251726DefinedOrUnknownSVal result = UnknownVal();17271728// If the check is for strnlen() then bind the return value to no more than1729// the maxlen value.1730if (IsStrnlen) {1731QualType cmpTy = C.getSValBuilder().getConditionType();17321733// It's a little unfortunate to be getting this again,1734// but it's not that expensive...1735const Expr *maxlenExpr = Call.getArgExpr(1);1736SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);17371738std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();1739std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();17401741if (strLengthNL && maxlenValNL) {1742ProgramStateRef stateStringTooLong, stateStringNotTooLong;17431744// Check if the strLength is greater than the maxlen.1745std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(1746C.getSValBuilder()1747.evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)1748.castAs<DefinedOrUnknownSVal>());17491750if (stateStringTooLong && !stateStringNotTooLong) {1751// If the string is longer than maxlen, return maxlen.1752result = *maxlenValNL;1753} else if (stateStringNotTooLong && !stateStringTooLong) {1754// If the string is shorter than maxlen, return its length.1755result = *strLengthNL;1756}1757}17581759if (result.isUnknown()) {1760// If we don't have enough information for a comparison, there's1761// no guarantee the full string length will actually be returned.1762// All we know is the return value is the min of the string length1763// and the limit. This is better than nothing.1764result = C.getSValBuilder().conjureSymbolVal(1765nullptr, Call.getOriginExpr(), LCtx, C.blockCount());1766NonLoc resultNL = result.castAs<NonLoc>();17671768if (strLengthNL) {1769state = state->assume(C.getSValBuilder().evalBinOpNN(1770state, BO_LE, resultNL, *strLengthNL, cmpTy)1771.castAs<DefinedOrUnknownSVal>(), true);1772}17731774if (maxlenValNL) {1775state = state->assume(C.getSValBuilder().evalBinOpNN(1776state, BO_LE, resultNL, *maxlenValNL, cmpTy)1777.castAs<DefinedOrUnknownSVal>(), true);1778}1779}17801781} else {1782// This is a plain strlen(), not strnlen().1783result = strLength.castAs<DefinedOrUnknownSVal>();17841785// If we don't know the length of the string, conjure a return1786// value, so it can be used in constraints, at least.1787if (result.isUnknown()) {1788result = C.getSValBuilder().conjureSymbolVal(1789nullptr, Call.getOriginExpr(), LCtx, C.blockCount());1790}1791}17921793// Bind the return value.1794assert(!result.isUnknown() && "Should have conjured a value by now");1795state = state->BindExpr(Call.getOriginExpr(), LCtx, result);1796C.addTransition(state);1797}17981799void CStringChecker::evalStrcpy(CheckerContext &C,1800const CallEvent &Call) const {1801// char *strcpy(char *restrict dst, const char *restrict src);1802evalStrcpyCommon(C, Call,1803/* ReturnEnd = */ false,1804/* IsBounded = */ false,1805/* appendK = */ ConcatFnKind::none);1806}18071808void CStringChecker::evalStrncpy(CheckerContext &C,1809const CallEvent &Call) const {1810// char *strncpy(char *restrict dst, const char *restrict src, size_t n);1811evalStrcpyCommon(C, Call,1812/* ReturnEnd = */ false,1813/* IsBounded = */ true,1814/* appendK = */ ConcatFnKind::none);1815}18161817void CStringChecker::evalStpcpy(CheckerContext &C,1818const CallEvent &Call) const {1819// char *stpcpy(char *restrict dst, const char *restrict src);1820evalStrcpyCommon(C, Call,1821/* ReturnEnd = */ true,1822/* IsBounded = */ false,1823/* appendK = */ ConcatFnKind::none);1824}18251826void CStringChecker::evalStrlcpy(CheckerContext &C,1827const CallEvent &Call) const {1828// size_t strlcpy(char *dest, const char *src, size_t size);1829evalStrcpyCommon(C, Call,1830/* ReturnEnd = */ true,1831/* IsBounded = */ true,1832/* appendK = */ ConcatFnKind::none,1833/* returnPtr = */ false);1834}18351836void CStringChecker::evalStrcat(CheckerContext &C,1837const CallEvent &Call) const {1838// char *strcat(char *restrict s1, const char *restrict s2);1839evalStrcpyCommon(C, Call,1840/* ReturnEnd = */ false,1841/* IsBounded = */ false,1842/* appendK = */ ConcatFnKind::strcat);1843}18441845void CStringChecker::evalStrncat(CheckerContext &C,1846const CallEvent &Call) const {1847// char *strncat(char *restrict s1, const char *restrict s2, size_t n);1848evalStrcpyCommon(C, Call,1849/* ReturnEnd = */ false,1850/* IsBounded = */ true,1851/* appendK = */ ConcatFnKind::strcat);1852}18531854void CStringChecker::evalStrlcat(CheckerContext &C,1855const CallEvent &Call) const {1856// size_t strlcat(char *dst, const char *src, size_t size);1857// It will append at most size - strlen(dst) - 1 bytes,1858// NULL-terminating the result.1859evalStrcpyCommon(C, Call,1860/* ReturnEnd = */ false,1861/* IsBounded = */ true,1862/* appendK = */ ConcatFnKind::strlcat,1863/* returnPtr = */ false);1864}18651866void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,1867bool ReturnEnd, bool IsBounded,1868ConcatFnKind appendK,1869bool returnPtr) const {1870if (appendK == ConcatFnKind::none)1871CurrentFunctionDescription = "string copy function";1872else1873CurrentFunctionDescription = "string concatenation function";18741875ProgramStateRef state = C.getState();1876const LocationContext *LCtx = C.getLocationContext();18771878// Check that the destination is non-null.1879DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};1880SVal DstVal = state->getSVal(Dst.Expression, LCtx);1881state = checkNonNull(C, state, Dst, DstVal);1882if (!state)1883return;18841885// Check that the source is non-null.1886SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};1887SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);1888state = checkNonNull(C, state, srcExpr, srcVal);1889if (!state)1890return;18911892// Get the string length of the source.1893SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);1894std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();18951896// Get the string length of the destination buffer.1897SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);1898std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();18991900// If the source isn't a valid C string, give up.1901if (strLength.isUndef())1902return;19031904SValBuilder &svalBuilder = C.getSValBuilder();1905QualType cmpTy = svalBuilder.getConditionType();1906QualType sizeTy = svalBuilder.getContext().getSizeType();19071908// These two values allow checking two kinds of errors:1909// - actual overflows caused by a source that doesn't fit in the destination1910// - potential overflows caused by a bound that could exceed the destination1911SVal amountCopied = UnknownVal();1912SVal maxLastElementIndex = UnknownVal();1913const char *boundWarning = nullptr;19141915// FIXME: Why do we choose the srcExpr if the access has no size?1916// Note that the 3rd argument of the call would be the size parameter.1917SizeArgExpr SrcExprAsSizeDummy = {1918{srcExpr.Expression, srcExpr.ArgumentIndex}};1919state = CheckOverlap(1920C, state,1921(IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),1922Dst, srcExpr);19231924if (!state)1925return;19261927// If the function is strncpy, strncat, etc... it is bounded.1928if (IsBounded) {1929// Get the max number of characters to copy.1930SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};1931SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);19321933// Protect against misdeclared strncpy().1934lenVal =1935svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());19361937std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();19381939// If we know both values, we might be able to figure out how much1940// we're copying.1941if (strLengthNL && lenValNL) {1942switch (appendK) {1943case ConcatFnKind::none:1944case ConcatFnKind::strcat: {1945ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;1946// Check if the max number to copy is less than the length of the src.1947// If the bound is equal to the source length, strncpy won't null-1948// terminate the result!1949std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(1950svalBuilder1951.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)1952.castAs<DefinedOrUnknownSVal>());19531954if (stateSourceTooLong && !stateSourceNotTooLong) {1955// Max number to copy is less than the length of the src, so the1956// actual strLength copied is the max number arg.1957state = stateSourceTooLong;1958amountCopied = lenVal;19591960} else if (!stateSourceTooLong && stateSourceNotTooLong) {1961// The source buffer entirely fits in the bound.1962state = stateSourceNotTooLong;1963amountCopied = strLength;1964}1965break;1966}1967case ConcatFnKind::strlcat:1968if (!dstStrLengthNL)1969return;19701971// amountCopied = min (size - dstLen - 1 , srcLen)1972SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,1973*dstStrLengthNL, sizeTy);1974if (!isa<NonLoc>(freeSpace))1975return;1976freeSpace =1977svalBuilder.evalBinOp(state, BO_Sub, freeSpace,1978svalBuilder.makeIntVal(1, sizeTy), sizeTy);1979std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();19801981// While unlikely, it is possible that the subtraction is1982// too complex to compute, let's check whether it succeeded.1983if (!freeSpaceNL)1984return;1985SVal hasEnoughSpace = svalBuilder.evalBinOpNN(1986state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);19871988ProgramStateRef TrueState, FalseState;1989std::tie(TrueState, FalseState) =1990state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());19911992// srcStrLength <= size - dstStrLength -11993if (TrueState && !FalseState) {1994amountCopied = strLength;1995}19961997// srcStrLength > size - dstStrLength -11998if (!TrueState && FalseState) {1999amountCopied = freeSpace;2000}20012002if (TrueState && FalseState)2003amountCopied = UnknownVal();2004break;2005}2006}2007// We still want to know if the bound is known to be too large.2008if (lenValNL) {2009switch (appendK) {2010case ConcatFnKind::strcat:2011// For strncat, the check is strlen(dst) + lenVal < sizeof(dst)20122013// Get the string length of the destination. If the destination is2014// memory that can't have a string length, we shouldn't be copying2015// into it anyway.2016if (dstStrLength.isUndef())2017return;20182019if (dstStrLengthNL) {2020maxLastElementIndex = svalBuilder.evalBinOpNN(2021state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);20222023boundWarning = "Size argument is greater than the free space in the "2024"destination buffer";2025}2026break;2027case ConcatFnKind::none:2028case ConcatFnKind::strlcat:2029// For strncpy and strlcat, this is just checking2030// that lenVal <= sizeof(dst).2031// (Yes, strncpy and strncat differ in how they treat termination.2032// strncat ALWAYS terminates, but strncpy doesn't.)20332034// We need a special case for when the copy size is zero, in which2035// case strncpy will do no work at all. Our bounds check uses n-12036// as the last element accessed, so n == 0 is problematic.2037ProgramStateRef StateZeroSize, StateNonZeroSize;2038std::tie(StateZeroSize, StateNonZeroSize) =2039assumeZero(C, state, *lenValNL, sizeTy);20402041// If the size is known to be zero, we're done.2042if (StateZeroSize && !StateNonZeroSize) {2043if (returnPtr) {2044StateZeroSize =2045StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);2046} else {2047if (appendK == ConcatFnKind::none) {2048// strlcpy returns strlen(src)2049StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),2050LCtx, strLength);2051} else {2052// strlcat returns strlen(src) + strlen(dst)2053SVal retSize = svalBuilder.evalBinOp(2054state, BO_Add, strLength, dstStrLength, sizeTy);2055StateZeroSize =2056StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);2057}2058}2059C.addTransition(StateZeroSize);2060return;2061}20622063// Otherwise, go ahead and figure out the last element we'll touch.2064// We don't record the non-zero assumption here because we can't2065// be sure. We won't warn on a possible zero.2066NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();2067maxLastElementIndex =2068svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);2069boundWarning = "Size argument is greater than the length of the "2070"destination buffer";2071break;2072}2073}2074} else {2075// The function isn't bounded. The amount copied should match the length2076// of the source buffer.2077amountCopied = strLength;2078}20792080assert(state);20812082// This represents the number of characters copied into the destination2083// buffer. (It may not actually be the strlen if the destination buffer2084// is not terminated.)2085SVal finalStrLength = UnknownVal();2086SVal strlRetVal = UnknownVal();20872088if (appendK == ConcatFnKind::none && !returnPtr) {2089// strlcpy returns the sizeof(src)2090strlRetVal = strLength;2091}20922093// If this is an appending function (strcat, strncat...) then set the2094// string length to strlen(src) + strlen(dst) since the buffer will2095// ultimately contain both.2096if (appendK != ConcatFnKind::none) {2097// Get the string length of the destination. If the destination is memory2098// that can't have a string length, we shouldn't be copying into it anyway.2099if (dstStrLength.isUndef())2100return;21012102if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {2103strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,2104*dstStrLengthNL, sizeTy);2105}21062107std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();21082109// If we know both string lengths, we might know the final string length.2110if (amountCopiedNL && dstStrLengthNL) {2111// Make sure the two lengths together don't overflow a size_t.2112state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);2113if (!state)2114return;21152116finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,2117*dstStrLengthNL, sizeTy);2118}21192120// If we couldn't get a single value for the final string length,2121// we can at least bound it by the individual lengths.2122if (finalStrLength.isUnknown()) {2123// Try to get a "hypothetical" string length symbol, which we can later2124// set as a real value if that turns out to be the case.2125finalStrLength =2126getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);2127assert(!finalStrLength.isUndef());21282129if (std::optional<NonLoc> finalStrLengthNL =2130finalStrLength.getAs<NonLoc>()) {2131if (amountCopiedNL && appendK == ConcatFnKind::none) {2132// we overwrite dst string with the src2133// finalStrLength >= srcStrLength2134SVal sourceInResult = svalBuilder.evalBinOpNN(2135state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);2136state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),2137true);2138if (!state)2139return;2140}21412142if (dstStrLengthNL && appendK != ConcatFnKind::none) {2143// we extend the dst string with the src2144// finalStrLength >= dstStrLength2145SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,2146*finalStrLengthNL,2147*dstStrLengthNL,2148cmpTy);2149state =2150state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);2151if (!state)2152return;2153}2154}2155}21562157} else {2158// Otherwise, this is a copy-over function (strcpy, strncpy, ...), and2159// the final string length will match the input string length.2160finalStrLength = amountCopied;2161}21622163SVal Result;21642165if (returnPtr) {2166// The final result of the function will either be a pointer past the last2167// copied element, or a pointer to the start of the destination buffer.2168Result = (ReturnEnd ? UnknownVal() : DstVal);2169} else {2170if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)2171//strlcpy, strlcat2172Result = strlRetVal;2173else2174Result = finalStrLength;2175}21762177assert(state);21782179// If the destination is a MemRegion, try to check for a buffer overflow and2180// record the new string length.2181if (std::optional<loc::MemRegionVal> dstRegVal =2182DstVal.getAs<loc::MemRegionVal>()) {2183QualType ptrTy = Dst.Expression->getType();21842185// If we have an exact value on a bounded copy, use that to check for2186// overflows, rather than our estimate about how much is actually copied.2187if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {2188SVal maxLastElement =2189svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);21902191// Check if the first byte of the destination is writable.2192state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);2193if (!state)2194return;2195// Check if the last byte of the destination is writable.2196state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);2197if (!state)2198return;2199}22002201// Then, if the final length is known...2202if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {2203SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,2204*knownStrLength, ptrTy);22052206// ...and we haven't checked the bound, we'll check the actual copy.2207if (!boundWarning) {2208// Check if the first byte of the destination is writable.2209state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);2210if (!state)2211return;2212// Check if the last byte of the destination is writable.2213state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);2214if (!state)2215return;2216}22172218// If this is a stpcpy-style copy, the last element is the return value.2219if (returnPtr && ReturnEnd)2220Result = lastElement;2221}22222223// Invalidate the destination (regular invalidation without pointer-escaping2224// the address of the top-level region). This must happen before we set the2225// C string length because invalidation will clear the length.2226// FIXME: Even if we can't perfectly model the copy, we should see if we2227// can use LazyCompoundVals to copy the source values into the destination.2228// This would probably remove any existing bindings past the end of the2229// string, but that's still an improvement over blank invalidation.2230state = invalidateDestinationBufferBySize(C, state, Dst.Expression,2231*dstRegVal, amountCopied,2232C.getASTContext().getSizeType());22332234// Invalidate the source (const-invalidation without const-pointer-escaping2235// the address of the top-level region).2236state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);22372238// Set the C string length of the destination, if we know it.2239if (IsBounded && (appendK == ConcatFnKind::none)) {2240// strncpy is annoying in that it doesn't guarantee to null-terminate2241// the result string. If the original string didn't fit entirely inside2242// the bound (including the null-terminator), we don't know how long the2243// result is.2244if (amountCopied != strLength)2245finalStrLength = UnknownVal();2246}2247state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);2248}22492250assert(state);22512252if (returnPtr) {2253// If this is a stpcpy-style copy, but we were unable to check for a buffer2254// overflow, we still need a result. Conjure a return value.2255if (ReturnEnd && Result.isUnknown()) {2256Result = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,2257C.blockCount());2258}2259}2260// Set the return value.2261state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);2262C.addTransition(state);2263}22642265void CStringChecker::evalStrcmp(CheckerContext &C,2266const CallEvent &Call) const {2267//int strcmp(const char *s1, const char *s2);2268evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);2269}22702271void CStringChecker::evalStrncmp(CheckerContext &C,2272const CallEvent &Call) const {2273//int strncmp(const char *s1, const char *s2, size_t n);2274evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);2275}22762277void CStringChecker::evalStrcasecmp(CheckerContext &C,2278const CallEvent &Call) const {2279//int strcasecmp(const char *s1, const char *s2);2280evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);2281}22822283void CStringChecker::evalStrncasecmp(CheckerContext &C,2284const CallEvent &Call) const {2285//int strncasecmp(const char *s1, const char *s2, size_t n);2286evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);2287}22882289void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,2290bool IsBounded, bool IgnoreCase) const {2291CurrentFunctionDescription = "string comparison function";2292ProgramStateRef state = C.getState();2293const LocationContext *LCtx = C.getLocationContext();22942295// Check that the first string is non-null2296AnyArgExpr Left = {Call.getArgExpr(0), 0};2297SVal LeftVal = state->getSVal(Left.Expression, LCtx);2298state = checkNonNull(C, state, Left, LeftVal);2299if (!state)2300return;23012302// Check that the second string is non-null.2303AnyArgExpr Right = {Call.getArgExpr(1), 1};2304SVal RightVal = state->getSVal(Right.Expression, LCtx);2305state = checkNonNull(C, state, Right, RightVal);2306if (!state)2307return;23082309// Get the string length of the first string or give up.2310SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);2311if (LeftLength.isUndef())2312return;23132314// Get the string length of the second string or give up.2315SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);2316if (RightLength.isUndef())2317return;23182319// If we know the two buffers are the same, we know the result is 0.2320// First, get the two buffers' addresses. Another checker will have already2321// made sure they're not undefined.2322DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();2323DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();23242325// See if they are the same.2326SValBuilder &svalBuilder = C.getSValBuilder();2327DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);2328ProgramStateRef StSameBuf, StNotSameBuf;2329std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);23302331// If the two arguments might be the same buffer, we know the result is 0,2332// and we only need to check one size.2333if (StSameBuf) {2334StSameBuf =2335StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,2336svalBuilder.makeZeroVal(Call.getResultType()));2337C.addTransition(StSameBuf);23382339// If the two arguments are GUARANTEED to be the same, we're done!2340if (!StNotSameBuf)2341return;2342}23432344assert(StNotSameBuf);2345state = StNotSameBuf;23462347// At this point we can go about comparing the two buffers.2348// For now, we only do this if they're both known string literals.23492350// Attempt to extract string literals from both expressions.2351const StringLiteral *LeftStrLiteral =2352getCStringLiteral(C, state, Left.Expression, LeftVal);2353const StringLiteral *RightStrLiteral =2354getCStringLiteral(C, state, Right.Expression, RightVal);2355bool canComputeResult = false;2356SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, Call.getOriginExpr(),2357LCtx, C.blockCount());23582359if (LeftStrLiteral && RightStrLiteral) {2360StringRef LeftStrRef = LeftStrLiteral->getString();2361StringRef RightStrRef = RightStrLiteral->getString();23622363if (IsBounded) {2364// Get the max number of characters to compare.2365const Expr *lenExpr = Call.getArgExpr(2);2366SVal lenVal = state->getSVal(lenExpr, LCtx);23672368// If the length is known, we can get the right substrings.2369if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {2370// Create substrings of each to compare the prefix.2371LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());2372RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());2373canComputeResult = true;2374}2375} else {2376// This is a normal, unbounded strcmp.2377canComputeResult = true;2378}23792380if (canComputeResult) {2381// Real strcmp stops at null characters.2382size_t s1Term = LeftStrRef.find('\0');2383if (s1Term != StringRef::npos)2384LeftStrRef = LeftStrRef.substr(0, s1Term);23852386size_t s2Term = RightStrRef.find('\0');2387if (s2Term != StringRef::npos)2388RightStrRef = RightStrRef.substr(0, s2Term);23892390// Use StringRef's comparison methods to compute the actual result.2391int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)2392: LeftStrRef.compare(RightStrRef);23932394// The strcmp function returns an integer greater than, equal to, or less2395// than zero, [c11, p7.24.4.2].2396if (compareRes == 0) {2397resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());2398}2399else {2400DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());2401// Constrain strcmp's result range based on the result of StringRef's2402// comparison methods.2403BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;2404SVal compareWithZero =2405svalBuilder.evalBinOp(state, op, resultVal, zeroVal,2406svalBuilder.getConditionType());2407DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();2408state = state->assume(compareWithZeroVal, true);2409}2410}2411}24122413state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);24142415// Record this as a possible path.2416C.addTransition(state);2417}24182419void CStringChecker::evalStrsep(CheckerContext &C,2420const CallEvent &Call) const {2421// char *strsep(char **stringp, const char *delim);2422// Verify whether the search string parameter matches the return type.2423SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};24242425QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();2426if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=2427CharPtrTy.getUnqualifiedType())2428return;24292430CurrentFunctionDescription = "strsep()";2431ProgramStateRef State = C.getState();2432const LocationContext *LCtx = C.getLocationContext();24332434// Check that the search string pointer is non-null (though it may point to2435// a null string).2436SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);2437State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);2438if (!State)2439return;24402441// Check that the delimiter string is non-null.2442AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};2443SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);2444State = checkNonNull(C, State, DelimStr, DelimStrVal);2445if (!State)2446return;24472448SValBuilder &SVB = C.getSValBuilder();2449SVal Result;2450if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {2451// Get the current value of the search string pointer, as a char*.2452Result = State->getSVal(*SearchStrLoc, CharPtrTy);24532454// Invalidate the search string, representing the change of one delimiter2455// character to NUL.2456// As the replacement never overflows, do not invalidate its super region.2457State = invalidateDestinationBufferNeverOverflows(2458C, State, SearchStrPtr.Expression, Result);24592460// Overwrite the search string pointer. The new value is either an address2461// further along in the same string, or NULL if there are no more tokens.2462State =2463State->bindLoc(*SearchStrLoc,2464SVB.conjureSymbolVal(getTag(), Call.getOriginExpr(),2465LCtx, CharPtrTy, C.blockCount()),2466LCtx);2467} else {2468assert(SearchStrVal.isUnknown());2469// Conjure a symbolic value. It's the best we can do.2470Result = SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx,2471C.blockCount());2472}24732474// Set the return value, and finish.2475State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);2476C.addTransition(State);2477}24782479// These should probably be moved into a C++ standard library checker.2480void CStringChecker::evalStdCopy(CheckerContext &C,2481const CallEvent &Call) const {2482evalStdCopyCommon(C, Call);2483}24842485void CStringChecker::evalStdCopyBackward(CheckerContext &C,2486const CallEvent &Call) const {2487evalStdCopyCommon(C, Call);2488}24892490void CStringChecker::evalStdCopyCommon(CheckerContext &C,2491const CallEvent &Call) const {2492if (!Call.getArgExpr(2)->getType()->isPointerType())2493return;24942495ProgramStateRef State = C.getState();24962497const LocationContext *LCtx = C.getLocationContext();24982499// template <class _InputIterator, class _OutputIterator>2500// _OutputIterator2501// copy(_InputIterator __first, _InputIterator __last,2502// _OutputIterator __result)25032504// Invalidate the destination buffer2505const Expr *Dst = Call.getArgExpr(2);2506SVal DstVal = State->getSVal(Dst, LCtx);2507// FIXME: As we do not know how many items are copied, we also invalidate the2508// super region containing the target location.2509State =2510invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);25112512SValBuilder &SVB = C.getSValBuilder();25132514SVal ResultVal =2515SVB.conjureSymbolVal(nullptr, Call.getOriginExpr(), LCtx, C.blockCount());2516State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);25172518C.addTransition(State);2519}25202521void CStringChecker::evalMemset(CheckerContext &C,2522const CallEvent &Call) const {2523// void *memset(void *s, int c, size_t n);2524CurrentFunctionDescription = "memory set function";25252526DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};2527AnyArgExpr CharE = {Call.getArgExpr(1), 1};2528SizeArgExpr Size = {{Call.getArgExpr(2), 2}};25292530ProgramStateRef State = C.getState();25312532// See if the size argument is zero.2533const LocationContext *LCtx = C.getLocationContext();2534SVal SizeVal = C.getSVal(Size.Expression);2535QualType SizeTy = Size.Expression->getType();25362537ProgramStateRef ZeroSize, NonZeroSize;2538std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);25392540// Get the value of the memory area.2541SVal BufferPtrVal = C.getSVal(Buffer.Expression);25422543// If the size is zero, there won't be any actual memory access, so2544// just bind the return value to the buffer and return.2545if (ZeroSize && !NonZeroSize) {2546ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);2547C.addTransition(ZeroSize);2548return;2549}25502551// Ensure the memory area is not null.2552// If it is NULL there will be a NULL pointer dereference.2553State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);2554if (!State)2555return;25562557State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);2558if (!State)2559return;25602561// According to the values of the arguments, bind the value of the second2562// argument to the destination buffer and set string length, or just2563// invalidate the destination buffer.2564if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),2565Size.Expression, C, State))2566return;25672568State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);2569C.addTransition(State);2570}25712572void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {2573CurrentFunctionDescription = "memory clearance function";25742575DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};2576SizeArgExpr Size = {{Call.getArgExpr(1), 1}};2577SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);25782579ProgramStateRef State = C.getState();25802581// See if the size argument is zero.2582SVal SizeVal = C.getSVal(Size.Expression);2583QualType SizeTy = Size.Expression->getType();25842585ProgramStateRef StateZeroSize, StateNonZeroSize;2586std::tie(StateZeroSize, StateNonZeroSize) =2587assumeZero(C, State, SizeVal, SizeTy);25882589// If the size is zero, there won't be any actual memory access,2590// In this case we just return.2591if (StateZeroSize && !StateNonZeroSize) {2592C.addTransition(StateZeroSize);2593return;2594}25952596// Get the value of the memory area.2597SVal MemVal = C.getSVal(Buffer.Expression);25982599// Ensure the memory area is not null.2600// If it is NULL there will be a NULL pointer dereference.2601State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);2602if (!State)2603return;26042605State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);2606if (!State)2607return;26082609if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))2610return;26112612C.addTransition(State);2613}26142615void CStringChecker::evalSprintf(CheckerContext &C,2616const CallEvent &Call) const {2617CurrentFunctionDescription = "'sprintf'";2618evalSprintfCommon(C, Call, /* IsBounded = */ false);2619}26202621void CStringChecker::evalSnprintf(CheckerContext &C,2622const CallEvent &Call) const {2623CurrentFunctionDescription = "'snprintf'";2624evalSprintfCommon(C, Call, /* IsBounded = */ true);2625}26262627void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,2628bool IsBounded) const {2629ProgramStateRef State = C.getState();2630const auto *CE = cast<CallExpr>(Call.getOriginExpr());2631DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};26322633const auto NumParams = Call.parameters().size();2634if (CE->getNumArgs() < NumParams) {2635// This is an invalid call, let's just ignore it.2636return;2637}26382639const auto AllArguments =2640llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());2641const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);26422643for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {2644// We consider only string buffers2645if (const QualType type = ArgExpr->getType();2646!type->isAnyPointerType() ||2647!type->getPointeeType()->isAnyCharacterType())2648continue;2649SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};26502651// Ensure the buffers do not overlap.2652SizeArgExpr SrcExprAsSizeDummy = {2653{Source.Expression, Source.ArgumentIndex}};2654State = CheckOverlap(2655C, State,2656(IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),2657Dest, Source);2658if (!State)2659return;2660}26612662C.addTransition(State);2663}26642665//===----------------------------------------------------------------------===//2666// The driver method, and other Checker callbacks.2667//===----------------------------------------------------------------------===//26682669CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,2670CheckerContext &C) const {2671const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());2672if (!CE)2673return nullptr;26742675const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());2676if (!FD)2677return nullptr;26782679if (StdCopy.matches(Call))2680return &CStringChecker::evalStdCopy;2681if (StdCopyBackward.matches(Call))2682return &CStringChecker::evalStdCopyBackward;26832684// Pro-actively check that argument types are safe to do arithmetic upon.2685// We do not want to crash if someone accidentally passes a structure2686// into, say, a C++ overload of any of these functions. We could not check2687// that for std::copy because they may have arguments of other types.2688for (auto I : CE->arguments()) {2689QualType T = I->getType();2690if (!T->isIntegralOrEnumerationType() && !T->isPointerType())2691return nullptr;2692}26932694const FnCheck *Callback = Callbacks.lookup(Call);2695if (Callback)2696return *Callback;26972698return nullptr;2699}27002701bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {2702FnCheck Callback = identifyCall(Call, C);27032704// If the callee isn't a string function, let another checker handle it.2705if (!Callback)2706return false;27072708// Check and evaluate the call.2709assert(isa<CallExpr>(Call.getOriginExpr()));2710Callback(this, C, Call);27112712// If the evaluate call resulted in no change, chain to the next eval call2713// handler.2714// Note, the custom CString evaluation calls assume that basic safety2715// properties are held. However, if the user chooses to turn off some of these2716// checks, we ignore the issues and leave the call evaluation to a generic2717// handler.2718return C.isDifferent();2719}27202721void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {2722// Record string length for char a[] = "abc";2723ProgramStateRef state = C.getState();27242725for (const auto *I : DS->decls()) {2726const VarDecl *D = dyn_cast<VarDecl>(I);2727if (!D)2728continue;27292730// FIXME: Handle array fields of structs.2731if (!D->getType()->isArrayType())2732continue;27332734const Expr *Init = D->getInit();2735if (!Init)2736continue;2737if (!isa<StringLiteral>(Init))2738continue;27392740Loc VarLoc = state->getLValue(D, C.getLocationContext());2741const MemRegion *MR = VarLoc.getAsRegion();2742if (!MR)2743continue;27442745SVal StrVal = C.getSVal(Init);2746assert(StrVal.isValid() && "Initializer string is unknown or undefined");2747DefinedOrUnknownSVal strLength =2748getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();27492750state = state->set<CStringLength>(MR, strLength);2751}27522753C.addTransition(state);2754}27552756ProgramStateRef2757CStringChecker::checkRegionChanges(ProgramStateRef state,2758const InvalidatedSymbols *,2759ArrayRef<const MemRegion *> ExplicitRegions,2760ArrayRef<const MemRegion *> Regions,2761const LocationContext *LCtx,2762const CallEvent *Call) const {2763CStringLengthTy Entries = state->get<CStringLength>();2764if (Entries.isEmpty())2765return state;27662767llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;2768llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;27692770// First build sets for the changed regions and their super-regions.2771for (const MemRegion *MR : Regions) {2772Invalidated.insert(MR);27732774SuperRegions.insert(MR);2775while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {2776MR = SR->getSuperRegion();2777SuperRegions.insert(MR);2778}2779}27802781CStringLengthTy::Factory &F = state->get_context<CStringLength>();27822783// Then loop over the entries in the current state.2784for (const MemRegion *MR : llvm::make_first_range(Entries)) {2785// Is this entry for a super-region of a changed region?2786if (SuperRegions.count(MR)) {2787Entries = F.remove(Entries, MR);2788continue;2789}27902791// Is this entry for a sub-region of a changed region?2792const MemRegion *Super = MR;2793while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {2794Super = SR->getSuperRegion();2795if (Invalidated.count(Super)) {2796Entries = F.remove(Entries, MR);2797break;2798}2799}2800}28012802return state->set<CStringLength>(Entries);2803}28042805void CStringChecker::checkLiveSymbols(ProgramStateRef state,2806SymbolReaper &SR) const {2807// Mark all symbols in our string length map as valid.2808CStringLengthTy Entries = state->get<CStringLength>();28092810for (SVal Len : llvm::make_second_range(Entries)) {2811for (SymbolRef Sym : Len.symbols())2812SR.markInUse(Sym);2813}2814}28152816void CStringChecker::checkDeadSymbols(SymbolReaper &SR,2817CheckerContext &C) const {2818ProgramStateRef state = C.getState();2819CStringLengthTy Entries = state->get<CStringLength>();2820if (Entries.isEmpty())2821return;28222823CStringLengthTy::Factory &F = state->get_context<CStringLength>();2824for (auto [Reg, Len] : Entries) {2825if (SymbolRef Sym = Len.getAsSymbol()) {2826if (SR.isDead(Sym))2827Entries = F.remove(Entries, Reg);2828}2829}28302831state = state->set<CStringLength>(Entries);2832C.addTransition(state);2833}28342835void ento::registerCStringModeling(CheckerManager &Mgr) {2836Mgr.registerChecker<CStringChecker>();2837}28382839bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {2840return true;2841}28422843#define REGISTER_CHECKER(name) \2844void ento::register##name(CheckerManager &mgr) { \2845CStringChecker *checker = mgr.getChecker<CStringChecker>(); \2846checker->Filter.Check##name = true; \2847checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \2848} \2849\2850bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }28512852REGISTER_CHECKER(CStringNullArg)2853REGISTER_CHECKER(CStringOutOfBounds)2854REGISTER_CHECKER(CStringBufferOverlap)2855REGISTER_CHECKER(CStringNotNullTerm)2856REGISTER_CHECKER(CStringUninitializedRead)285728582859