Path: blob/main/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CStringSyntaxChecker.cpp
35266 views
//== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// An AST checker that looks for common pitfalls when using C string APIs.9// - Identifies erroneous patterns in the last argument to strncat - the number10// of bytes to copy.11//12//===----------------------------------------------------------------------===//13#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"14#include "clang/AST/Expr.h"15#include "clang/AST/OperationKinds.h"16#include "clang/AST/StmtVisitor.h"17#include "clang/Analysis/AnalysisDeclContext.h"18#include "clang/Basic/TargetInfo.h"19#include "clang/Basic/TypeTraits.h"20#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"21#include "clang/StaticAnalyzer/Core/Checker.h"22#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"23#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"24#include "llvm/ADT/SmallString.h"25#include "llvm/Support/raw_ostream.h"2627using namespace clang;28using namespace ento;2930namespace {31class WalkAST: public StmtVisitor<WalkAST> {32const CheckerBase *Checker;33BugReporter &BR;34AnalysisDeclContext* AC;3536/// Check if two expressions refer to the same declaration.37bool sameDecl(const Expr *A1, const Expr *A2) {38if (const auto *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))39if (const auto *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))40return D1->getDecl() == D2->getDecl();41return false;42}4344/// Check if the expression E is a sizeof(WithArg).45bool isSizeof(const Expr *E, const Expr *WithArg) {46if (const auto *UE = dyn_cast<UnaryExprOrTypeTraitExpr>(E))47if (UE->getKind() == UETT_SizeOf && !UE->isArgumentType())48return sameDecl(UE->getArgumentExpr(), WithArg);49return false;50}5152/// Check if the expression E is a strlen(WithArg).53bool isStrlen(const Expr *E, const Expr *WithArg) {54if (const auto *CE = dyn_cast<CallExpr>(E)) {55const FunctionDecl *FD = CE->getDirectCallee();56if (!FD)57return false;58return (CheckerContext::isCLibraryFunction(FD, "strlen") &&59sameDecl(CE->getArg(0), WithArg));60}61return false;62}6364/// Check if the expression is an integer literal with value 1.65bool isOne(const Expr *E) {66if (const auto *IL = dyn_cast<IntegerLiteral>(E))67return (IL->getValue().isIntN(1));68return false;69}7071StringRef getPrintableName(const Expr *E) {72if (const auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))73return D->getDecl()->getName();74return StringRef();75}7677/// Identify erroneous patterns in the last argument to strncat - the number78/// of bytes to copy.79bool containsBadStrncatPattern(const CallExpr *CE);8081/// Identify erroneous patterns in the last argument to strlcpy - the number82/// of bytes to copy.83/// The bad pattern checked is when the size is known84/// to be larger than the destination can handle.85/// char dst[2];86/// size_t cpy = 4;87/// strlcpy(dst, "abcd", sizeof("abcd") - 1);88/// strlcpy(dst, "abcd", 4);89/// strlcpy(dst + 3, "abcd", 2);90/// strlcpy(dst, "abcd", cpy);91/// Identify erroneous patterns in the last argument to strlcat - the number92/// of bytes to copy.93/// The bad pattern checked is when the last argument is basically94/// pointing to the destination buffer size or argument larger or95/// equal to.96/// char dst[2];97/// strlcat(dst, src2, sizeof(dst));98/// strlcat(dst, src2, 2);99/// strlcat(dst, src2, 10);100bool containsBadStrlcpyStrlcatPattern(const CallExpr *CE);101102public:103WalkAST(const CheckerBase *Checker, BugReporter &BR, AnalysisDeclContext *AC)104: Checker(Checker), BR(BR), AC(AC) {}105106// Statement visitor methods.107void VisitChildren(Stmt *S);108void VisitStmt(Stmt *S) {109VisitChildren(S);110}111void VisitCallExpr(CallExpr *CE);112};113} // end anonymous namespace114115// The correct size argument should look like following:116// strncat(dst, src, sizeof(dst) - strlen(dest) - 1);117// We look for the following anti-patterns:118// - strncat(dst, src, sizeof(dst) - strlen(dst));119// - strncat(dst, src, sizeof(dst) - 1);120// - strncat(dst, src, sizeof(dst));121bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {122if (CE->getNumArgs() != 3)123return false;124const Expr *DstArg = CE->getArg(0);125const Expr *SrcArg = CE->getArg(1);126const Expr *LenArg = CE->getArg(2);127128// Identify wrong size expressions, which are commonly used instead.129if (const auto *BE = dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {130// - sizeof(dst) - strlen(dst)131if (BE->getOpcode() == BO_Sub) {132const Expr *L = BE->getLHS();133const Expr *R = BE->getRHS();134if (isSizeof(L, DstArg) && isStrlen(R, DstArg))135return true;136137// - sizeof(dst) - 1138if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))139return true;140}141}142// - sizeof(dst)143if (isSizeof(LenArg, DstArg))144return true;145146// - sizeof(src)147if (isSizeof(LenArg, SrcArg))148return true;149return false;150}151152bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr *CE) {153if (CE->getNumArgs() != 3)154return false;155const Expr *DstArg = CE->getArg(0);156const Expr *LenArg = CE->getArg(2);157158const auto *DstArgDRE = dyn_cast<DeclRefExpr>(DstArg->IgnoreParenImpCasts());159const auto *LenArgDRE =160dyn_cast<DeclRefExpr>(LenArg->IgnoreParenLValueCasts());161uint64_t DstOff = 0;162if (isSizeof(LenArg, DstArg))163return false;164165// - size_t dstlen = sizeof(dst)166if (LenArgDRE) {167const auto *LenArgVal = dyn_cast<VarDecl>(LenArgDRE->getDecl());168// If it's an EnumConstantDecl instead, then we're missing out on something.169if (!LenArgVal) {170assert(isa<EnumConstantDecl>(LenArgDRE->getDecl()));171return false;172}173if (LenArgVal->getInit())174LenArg = LenArgVal->getInit();175}176177// - integral value178// We try to figure out if the last argument is possibly longer179// than the destination can possibly handle if its size can be defined.180if (const auto *IL = dyn_cast<IntegerLiteral>(LenArg->IgnoreParenImpCasts())) {181uint64_t ILRawVal = IL->getValue().getZExtValue();182183// Case when there is pointer arithmetic on the destination buffer184// especially when we offset from the base decreasing the185// buffer length accordingly.186if (!DstArgDRE) {187if (const auto *BE =188dyn_cast<BinaryOperator>(DstArg->IgnoreParenImpCasts())) {189DstArgDRE = dyn_cast<DeclRefExpr>(BE->getLHS()->IgnoreParenImpCasts());190if (BE->getOpcode() == BO_Add) {191if ((IL = dyn_cast<IntegerLiteral>(BE->getRHS()->IgnoreParenImpCasts()))) {192DstOff = IL->getValue().getZExtValue();193}194}195}196}197if (DstArgDRE) {198if (const auto *Buffer =199dyn_cast<ConstantArrayType>(DstArgDRE->getType())) {200ASTContext &C = BR.getContext();201uint64_t BufferLen = C.getTypeSize(Buffer) / 8;202auto RemainingBufferLen = BufferLen - DstOff;203if (RemainingBufferLen < ILRawVal)204return true;205}206}207}208209return false;210}211212void WalkAST::VisitCallExpr(CallExpr *CE) {213const FunctionDecl *FD = CE->getDirectCallee();214if (!FD)215return;216217if (CheckerContext::isCLibraryFunction(FD, "strncat")) {218if (containsBadStrncatPattern(CE)) {219const Expr *DstArg = CE->getArg(0);220const Expr *LenArg = CE->getArg(2);221PathDiagnosticLocation Loc =222PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);223224StringRef DstName = getPrintableName(DstArg);225226SmallString<256> S;227llvm::raw_svector_ostream os(S);228os << "Potential buffer overflow. ";229if (!DstName.empty()) {230os << "Replace with 'sizeof(" << DstName << ") "231"- strlen(" << DstName <<") - 1'";232os << " or u";233} else234os << "U";235os << "se a safer 'strlcat' API";236237BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",238"C String API", os.str(), Loc,239LenArg->getSourceRange());240}241} else if (CheckerContext::isCLibraryFunction(FD, "strlcpy") ||242CheckerContext::isCLibraryFunction(FD, "strlcat")) {243if (containsBadStrlcpyStrlcatPattern(CE)) {244const Expr *DstArg = CE->getArg(0);245const Expr *LenArg = CE->getArg(2);246PathDiagnosticLocation Loc =247PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);248249StringRef DstName = getPrintableName(DstArg);250251SmallString<256> S;252llvm::raw_svector_ostream os(S);253os << "The third argument allows to potentially copy more bytes than it should. ";254os << "Replace with the value ";255if (!DstName.empty())256os << "sizeof(" << DstName << ")";257else258os << "sizeof(<destination buffer>)";259os << " or lower";260261BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",262"C String API", os.str(), Loc,263LenArg->getSourceRange());264}265}266267// Recurse and check children.268VisitChildren(CE);269}270271void WalkAST::VisitChildren(Stmt *S) {272for (Stmt *Child : S->children())273if (Child)274Visit(Child);275}276277namespace {278class CStringSyntaxChecker: public Checker<check::ASTCodeBody> {279public:280281void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,282BugReporter &BR) const {283WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D));284walker.Visit(D->getBody());285}286};287}288289void ento::registerCStringSyntaxChecker(CheckerManager &mgr) {290mgr.registerChecker<CStringSyntaxChecker>();291}292293bool ento::shouldRegisterCStringSyntaxChecker(const CheckerManager &mgr) {294return true;295}296297298