Path: blob/main/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
35269 views
//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This checker defines the attack surface for generic taint propagation.9//10// The taint information produced by it might be useful to other checkers. For11// example, checkers should report errors which involve tainted data more12// aggressively, even if the involved symbols are under constrained.13//14//===----------------------------------------------------------------------===//1516#include "Yaml.h"17#include "clang/AST/Attr.h"18#include "clang/Basic/Builtins.h"19#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"20#include "clang/StaticAnalyzer/Checkers/Taint.h"21#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"22#include "clang/StaticAnalyzer/Core/Checker.h"23#include "clang/StaticAnalyzer/Core/CheckerManager.h"24#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"25#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"26#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"27#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"28#include "llvm/ADT/StringExtras.h"29#include "llvm/ADT/StringRef.h"30#include "llvm/Support/YAMLTraits.h"3132#include <limits>33#include <memory>34#include <optional>35#include <utility>36#include <vector>3738#define DEBUG_TYPE "taint-checker"3940using namespace clang;41using namespace ento;42using namespace taint;4344using llvm::ImmutableSet;4546namespace {4748class GenericTaintChecker;4950/// Check for CWE-134: Uncontrolled Format String.51constexpr llvm::StringLiteral MsgUncontrolledFormatString =52"Untrusted data is used as a format string "53"(CWE-134: Uncontrolled Format String)";5455/// Check for:56/// CERT/STR02-C. "Sanitize data passed to complex subsystems"57/// CWE-78, "Failure to Sanitize Data into an OS Command"58constexpr llvm::StringLiteral MsgSanitizeSystemArgs =59"Untrusted data is passed to a system call "60"(CERT/STR02-C. Sanitize data passed to complex subsystems)";6162/// Check if tainted data is used as a custom sink's parameter.63constexpr llvm::StringLiteral MsgCustomSink =64"Untrusted data is passed to a user-defined sink";6566using ArgIdxTy = int;67using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;6869/// Denotes the return value.70constexpr ArgIdxTy ReturnValueIndex{-1};7172static ArgIdxTy fromArgumentCount(unsigned Count) {73assert(Count <=74static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&75"ArgIdxTy is not large enough to represent the number of arguments.");76return Count;77}7879/// Check if the region the expression evaluates to is the standard input,80/// and thus, is tainted.81/// FIXME: Move this to Taint.cpp.82bool isStdin(SVal Val, const ASTContext &ACtx) {83// FIXME: What if Val is NonParamVarRegion?8485// The region should be symbolic, we do not know it's value.86const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());87if (!SymReg)88return false;8990// Get it's symbol and find the declaration region it's pointing to.91const auto *DeclReg =92dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());93if (!DeclReg)94return false;9596// This region corresponds to a declaration, find out if it's a global/extern97// variable named stdin with the proper type.98if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {99D = D->getCanonicalDecl();100if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) {101const QualType FILETy = ACtx.getFILEType().getCanonicalType();102const QualType Ty = D->getType().getCanonicalType();103104if (Ty->isPointerType())105return Ty->getPointeeType() == FILETy;106}107}108return false;109}110111SVal getPointeeOf(ProgramStateRef State, Loc LValue) {112const QualType ArgTy = LValue.getType(State->getStateManager().getContext());113if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())114return State->getSVal(LValue);115116// Do not dereference void pointers. Treat them as byte pointers instead.117// FIXME: we might want to consider more than just the first byte.118return State->getSVal(LValue, State->getStateManager().getContext().CharTy);119}120121/// Given a pointer/reference argument, return the value it refers to.122std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) {123if (auto LValue = Arg.getAs<Loc>())124return getPointeeOf(State, *LValue);125return std::nullopt;126}127128/// Given a pointer, return the SVal of its pointee or if it is tainted,129/// otherwise return the pointer's SVal if tainted.130/// Also considers stdin as a taint source.131std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State,132SVal Arg) {133if (auto Pointee = getPointeeOf(State, Arg))134if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;135return Pointee;136137if (isTainted(State, Arg))138return Arg;139return std::nullopt;140}141142bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) {143return getTaintedPointeeOrPointer(State, ExprSVal).has_value();144}145146/// Helps in printing taint diagnostics.147/// Marks the incoming parameters of a function interesting (to be printed)148/// when the return value, or the outgoing parameters are tainted.149const NoteTag *taintOriginTrackerTag(CheckerContext &C,150std::vector<SymbolRef> TaintedSymbols,151std::vector<ArgIdxTy> TaintedArgs,152const LocationContext *CallLocation) {153return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),154TaintedArgs = std::move(TaintedArgs), CallLocation](155PathSensitiveBugReport &BR) -> std::string {156SmallString<256> Msg;157// We give diagnostics only for taint related reports158if (!BR.isInteresting(CallLocation) ||159BR.getBugType().getCategory() != categories::TaintedData) {160return "";161}162if (TaintedSymbols.empty())163return "Taint originated here";164165for (auto Sym : TaintedSymbols) {166BR.markInteresting(Sym);167}168LLVM_DEBUG(for (auto Arg169: TaintedArgs) {170llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n";171});172return "";173});174}175176/// Helps in printing taint diagnostics.177/// Marks the function interesting (to be printed)178/// when the return value, or the outgoing parameters are tainted.179const NoteTag *taintPropagationExplainerTag(180CheckerContext &C, std::vector<SymbolRef> TaintedSymbols,181std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) {182assert(TaintedSymbols.size() == TaintedArgs.size());183return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),184TaintedArgs = std::move(TaintedArgs), CallLocation](185PathSensitiveBugReport &BR) -> std::string {186SmallString<256> Msg;187llvm::raw_svector_ostream Out(Msg);188// We give diagnostics only for taint related reports189if (TaintedSymbols.empty() ||190BR.getBugType().getCategory() != categories::TaintedData) {191return "";192}193int nofTaintedArgs = 0;194for (auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {195if (BR.isInteresting(Sym)) {196BR.markInteresting(CallLocation);197if (TaintedArgs[Idx] != ReturnValueIndex) {198LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument "199<< TaintedArgs[Idx] + 1 << "\n");200if (nofTaintedArgs == 0)201Out << "Taint propagated to the ";202else203Out << ", ";204Out << TaintedArgs[Idx] + 1205<< llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) << " argument";206nofTaintedArgs++;207} else {208LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n");209Out << "Taint propagated to the return value";210}211}212}213return std::string(Out.str());214});215}216217/// ArgSet is used to describe arguments relevant for taint detection or218/// taint application. A discrete set of argument indexes and a variadic219/// argument list signified by a starting index are supported.220class ArgSet {221public:222ArgSet() = default;223ArgSet(ArgVecTy &&DiscreteArgs,224std::optional<ArgIdxTy> VariadicIndex = std::nullopt)225: DiscreteArgs(std::move(DiscreteArgs)),226VariadicIndex(std::move(VariadicIndex)) {}227228bool contains(ArgIdxTy ArgIdx) const {229if (llvm::is_contained(DiscreteArgs, ArgIdx))230return true;231232return VariadicIndex && ArgIdx >= *VariadicIndex;233}234235bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }236237private:238ArgVecTy DiscreteArgs;239std::optional<ArgIdxTy> VariadicIndex;240};241242/// A struct used to specify taint propagation rules for a function.243///244/// If any of the possible taint source arguments is tainted, all of the245/// destination arguments should also be tainted. If ReturnValueIndex is added246/// to the dst list, the return value will be tainted.247class GenericTaintRule {248/// Arguments which are taints sinks and should be checked, and a report249/// should be emitted if taint reaches these.250ArgSet SinkArgs;251/// Arguments which should be sanitized on function return.252ArgSet FilterArgs;253/// Arguments which can participate in taint propagation. If any of the254/// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should255/// be tainted.256ArgSet PropSrcArgs;257ArgSet PropDstArgs;258259/// A message that explains why the call is sensitive to taint.260std::optional<StringRef> SinkMsg;261262GenericTaintRule() = default;263264GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,265std::optional<StringRef> SinkMsg = std::nullopt)266: SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),267PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),268SinkMsg(SinkMsg) {}269270public:271/// Make a rule that reports a warning if taint reaches any of \p FilterArgs272/// arguments.273static GenericTaintRule Sink(ArgSet &&SinkArgs,274std::optional<StringRef> Msg = std::nullopt) {275return {std::move(SinkArgs), {}, {}, {}, Msg};276}277278/// Make a rule that sanitizes all FilterArgs arguments.279static GenericTaintRule Filter(ArgSet &&FilterArgs) {280return {{}, std::move(FilterArgs), {}, {}};281}282283/// Make a rule that unconditionally taints all Args.284/// If Func is provided, it must also return true for taint to propagate.285static GenericTaintRule Source(ArgSet &&SourceArgs) {286return {{}, {}, {}, std::move(SourceArgs)};287}288289/// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.290static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {291return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};292}293294/// Process a function which could either be a taint source, a taint sink, a295/// taint filter or a taint propagator.296void process(const GenericTaintChecker &Checker, const CallEvent &Call,297CheckerContext &C) const;298299/// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.300static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {301return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()302: Call.getArgExpr(ArgIdx);303};304305/// Functions for custom taintedness propagation.306static bool UntrustedEnv(CheckerContext &C);307};308309using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;310311/// Used to parse the configuration file.312struct TaintConfiguration {313using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;314enum class VariadicType { None, Src, Dst };315316struct Common {317std::string Name;318std::string Scope;319};320321struct Sink : Common {322ArgVecTy SinkArgs;323};324325struct Filter : Common {326ArgVecTy FilterArgs;327};328329struct Propagation : Common {330ArgVecTy SrcArgs;331ArgVecTy DstArgs;332VariadicType VarType;333ArgIdxTy VarIndex;334};335336std::vector<Propagation> Propagations;337std::vector<Filter> Filters;338std::vector<Sink> Sinks;339340TaintConfiguration() = default;341TaintConfiguration(const TaintConfiguration &) = default;342TaintConfiguration(TaintConfiguration &&) = default;343TaintConfiguration &operator=(const TaintConfiguration &) = default;344TaintConfiguration &operator=(TaintConfiguration &&) = default;345};346347struct GenericTaintRuleParser {348GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}349/// Container type used to gather call identification objects grouped into350/// pairs with their corresponding taint rules. It is temporary as it is used351/// to finally initialize RuleLookupTy, which is considered to be immutable.352using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;353RulesContTy parseConfiguration(const std::string &Option,354TaintConfiguration &&Config) const;355356private:357using NamePartsTy = llvm::SmallVector<StringRef, 2>;358359/// Validate part of the configuration, which contains a list of argument360/// indexes.361void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;362363template <typename Config> static NamePartsTy parseNameParts(const Config &C);364365// Takes the config and creates a CallDescription for it and associates a Rule366// with that.367template <typename Config>368static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,369RulesContTy &Rules);370371void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,372RulesContTy &Rules) const;373void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,374RulesContTy &Rules) const;375void parseConfig(const std::string &Option,376TaintConfiguration::Propagation &&P,377RulesContTy &Rules) const;378379CheckerManager &Mgr;380};381382class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {383public:384void checkPreCall(const CallEvent &Call, CheckerContext &C) const;385void checkPostCall(const CallEvent &Call, CheckerContext &C) const;386387void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,388const char *Sep) const override;389390/// Generate a report if the expression is tainted or points to tainted data.391bool generateReportIfTainted(const Expr *E, StringRef Msg,392CheckerContext &C) const;393394bool isTaintReporterCheckerEnabled = false;395std::optional<BugType> BT;396397private:398bool checkUncontrolledFormatString(const CallEvent &Call,399CheckerContext &C) const;400401void taintUnsafeSocketProtocol(const CallEvent &Call,402CheckerContext &C) const;403404/// The taint rules are initalized with the help of a CheckerContext to405/// access user-provided configuration.406void initTaintRules(CheckerContext &C) const;407408// TODO: The two separate `CallDescriptionMap`s were introduced when409// `CallDescription` was unable to restrict matches to the global namespace410// only. This limitation no longer exists, so the following two maps should411// be unified.412mutable std::optional<RuleLookupTy> StaticTaintRules;413mutable std::optional<RuleLookupTy> DynamicTaintRules;414};415} // end of anonymous namespace416417/// YAML serialization mapping.418LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)419LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)420LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)421422namespace llvm {423namespace yaml {424template <> struct MappingTraits<TaintConfiguration> {425static void mapping(IO &IO, TaintConfiguration &Config) {426IO.mapOptional("Propagations", Config.Propagations);427IO.mapOptional("Filters", Config.Filters);428IO.mapOptional("Sinks", Config.Sinks);429}430};431432template <> struct MappingTraits<TaintConfiguration::Sink> {433static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {434IO.mapRequired("Name", Sink.Name);435IO.mapOptional("Scope", Sink.Scope);436IO.mapRequired("Args", Sink.SinkArgs);437}438};439440template <> struct MappingTraits<TaintConfiguration::Filter> {441static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {442IO.mapRequired("Name", Filter.Name);443IO.mapOptional("Scope", Filter.Scope);444IO.mapRequired("Args", Filter.FilterArgs);445}446};447448template <> struct MappingTraits<TaintConfiguration::Propagation> {449static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {450IO.mapRequired("Name", Propagation.Name);451IO.mapOptional("Scope", Propagation.Scope);452IO.mapOptional("SrcArgs", Propagation.SrcArgs);453IO.mapOptional("DstArgs", Propagation.DstArgs);454IO.mapOptional("VariadicType", Propagation.VarType);455IO.mapOptional("VariadicIndex", Propagation.VarIndex);456}457};458459template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {460static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {461IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);462IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);463IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);464}465};466} // namespace yaml467} // namespace llvm468469/// A set which is used to pass information from call pre-visit instruction470/// to the call post-visit. The values are signed integers, which are either471/// ReturnValueIndex, or indexes of the pointer/reference argument, which472/// points to data, which should be tainted on return.473REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,474ImmutableSet<ArgIdxTy>)475REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)476477void GenericTaintRuleParser::validateArgVector(const std::string &Option,478const ArgVecTy &Args) const {479for (ArgIdxTy Arg : Args) {480if (Arg < ReturnValueIndex) {481Mgr.reportInvalidCheckerOptionValue(482Mgr.getChecker<GenericTaintChecker>(), Option,483"an argument number for propagation rules greater or equal to -1");484}485}486}487488template <typename Config>489GenericTaintRuleParser::NamePartsTy490GenericTaintRuleParser::parseNameParts(const Config &C) {491NamePartsTy NameParts;492if (!C.Scope.empty()) {493// If the Scope argument contains multiple "::" parts, those are considered494// namespace identifiers.495StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,496/*KeepEmpty*/ false);497}498NameParts.emplace_back(C.Name);499return NameParts;500}501502template <typename Config>503void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,504GenericTaintRule &&Rule,505RulesContTy &Rules) {506NamePartsTy NameParts = parseNameParts(C);507Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts),508std::move(Rule));509}510511void GenericTaintRuleParser::parseConfig(const std::string &Option,512TaintConfiguration::Sink &&S,513RulesContTy &Rules) const {514validateArgVector(Option, S.SinkArgs);515consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),516Rules);517}518519void GenericTaintRuleParser::parseConfig(const std::string &Option,520TaintConfiguration::Filter &&S,521RulesContTy &Rules) const {522validateArgVector(Option, S.FilterArgs);523consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),524Rules);525}526527void GenericTaintRuleParser::parseConfig(const std::string &Option,528TaintConfiguration::Propagation &&P,529RulesContTy &Rules) const {530validateArgVector(Option, P.SrcArgs);531validateArgVector(Option, P.DstArgs);532bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;533bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;534std::optional<ArgIdxTy> JustVarIndex = P.VarIndex;535536ArgSet SrcDesc(std::move(P.SrcArgs),537IsSrcVariadic ? JustVarIndex : std::nullopt);538ArgSet DstDesc(std::move(P.DstArgs),539IsDstVariadic ? JustVarIndex : std::nullopt);540541consumeRulesFromConfig(542P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);543}544545GenericTaintRuleParser::RulesContTy546GenericTaintRuleParser::parseConfiguration(const std::string &Option,547TaintConfiguration &&Config) const {548549RulesContTy Rules;550551for (auto &F : Config.Filters)552parseConfig(Option, std::move(F), Rules);553554for (auto &S : Config.Sinks)555parseConfig(Option, std::move(S), Rules);556557for (auto &P : Config.Propagations)558parseConfig(Option, std::move(P), Rules);559560return Rules;561}562563void GenericTaintChecker::initTaintRules(CheckerContext &C) const {564// Check for exact name match for functions without builtin substitutes.565// Use qualified name, because these are C functions without namespace.566567if (StaticTaintRules || DynamicTaintRules)568return;569570using RulesConstructionTy =571std::vector<std::pair<CallDescription, GenericTaintRule>>;572using TR = GenericTaintRule;573574RulesConstructionTy GlobalCRules{575// Sources576{{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})},577{{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})},578{{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})},579{{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})},580{{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})},581{{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},582{{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})},583{{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})},584{{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})},585{{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})},586{{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})},587// Sometimes the line between taint sources and propagators is blurry.588// _IO_getc is choosen to be a source, but could also be a propagator.589// This way it is simpler, as modeling it as a propagator would require590// to model the possible sources of _IO_FILE * values, which the _IO_getc591// function takes as parameters.592{{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},593{{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},594{{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})},595{{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})},596{{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},597{{CDM::CLibrary, {"get_current_dir_name"}},598TR::Source({{ReturnValueIndex}})},599{{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})},600{{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})},601{{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})},602{{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},603{{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})},604{{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})},605606// Props607{{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},608{{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},609{{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},610{{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},611{{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},612{{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},613{{CDM::CLibraryMaybeHardened, {"fgets"}},614TR::Prop({{2}}, {{0, ReturnValueIndex}})},615{{CDM::CLibraryMaybeHardened, {"fgetws"}},616TR::Prop({{2}}, {{0, ReturnValueIndex}})},617{{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})},618{{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})},619{{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})},620{{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})},621622{{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},623{{CDM::CLibrary, {"getc_unlocked"}},624TR::Prop({{0}}, {{ReturnValueIndex}})},625{{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})},626// TODO: this intends to match the C function `getline()`, but the call627// description also matches the C++ function `std::getline()`; it should628// be ruled out by some additional logic.629{{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})},630{{CDM::CLibrary, {"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},631{{CDM::CLibraryMaybeHardened, {"pread"}},632TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},633{{CDM::CLibraryMaybeHardened, {"read"}},634TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},635{{CDM::CLibraryMaybeHardened, {"fread"}},636TR::Prop({{3}}, {{0, ReturnValueIndex}})},637{{CDM::CLibraryMaybeHardened, {"recv"}},638TR::Prop({{0}}, {{1, ReturnValueIndex}})},639{{CDM::CLibraryMaybeHardened, {"recvfrom"}},640TR::Prop({{0}}, {{1, ReturnValueIndex}})},641642{{CDM::CLibrary, {"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},643{{CDM::CLibrary, {"ttyname_r"}},644TR::Prop({{0}}, {{1, ReturnValueIndex}})},645646{{CDM::CLibrary, {"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},647{{CDM::CLibrary, {"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},648{{CDM::CLibrary, {"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},649650{{CDM::CLibrary, {"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},651{{CDM::CLibrary, {"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},652{{CDM::CLibrary, {"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},653654{{CDM::CLibrary, {"memcmp"}},655TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},656{{CDM::CLibraryMaybeHardened, {"memcpy"}},657TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},658{{CDM::CLibraryMaybeHardened, {"memmove"}},659TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},660{{CDM::CLibraryMaybeHardened, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})},661662// Note: "memmem" and its variants search for a byte sequence ("needle")663// in a larger area ("haystack"). Currently we only propagate taint from664// the haystack to the result, but in theory tampering with the needle665// could also produce incorrect results.666{{CDM::CLibrary, {"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},667{{CDM::CLibrary, {"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},668{{CDM::CLibrary, {"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},669670// Analogously, the following functions search for a byte within a buffer671// and we only propagate taint from the buffer to the result.672{{CDM::CLibraryMaybeHardened, {"memchr"}},673TR::Prop({{0}}, {{ReturnValueIndex}})},674{{CDM::CLibraryMaybeHardened, {"memrchr"}},675TR::Prop({{0}}, {{ReturnValueIndex}})},676{{CDM::CLibrary, {"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},677{{CDM::CLibraryMaybeHardened, {"strchr"}},678TR::Prop({{0}}, {{ReturnValueIndex}})},679{{CDM::CLibraryMaybeHardened, {"strrchr"}},680TR::Prop({{0}}, {{ReturnValueIndex}})},681{{CDM::CLibraryMaybeHardened, {"strchrnul"}},682TR::Prop({{0}}, {{ReturnValueIndex}})},683{{CDM::CLibrary, {"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},684{{CDM::CLibrary, {"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},685686// FIXME: In case of arrays, only the first element of the array gets687// tainted.688{{CDM::CLibrary, {"qsort"}}, TR::Prop({{0}}, {{0}})},689{{CDM::CLibrary, {"qsort_r"}}, TR::Prop({{0}}, {{0}})},690691{{CDM::CLibrary, {"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},692{{CDM::CLibrary, {"strcasecmp"}},693TR::Prop({{0, 1}}, {{ReturnValueIndex}})},694{{CDM::CLibrary, {"strncmp"}},695TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},696{{CDM::CLibrary, {"strncasecmp"}},697TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},698{{CDM::CLibrary, {"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},699{{CDM::CLibrary, {"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},700{{CDM::CLibrary, {"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},701702{{CDM::CLibrary, {"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},703{{CDM::CLibrary, {"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},704{{CDM::CLibrary, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},705{{CDM::CLibrary, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},706{{CDM::CLibrary, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},707708// strlen, wcslen, strnlen and alike intentionally don't propagate taint.709// See the details here: https://github.com/llvm/llvm-project/pull/66086710711{{CDM::CLibrary, {"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},712{{CDM::CLibrary, {"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},713{{CDM::CLibrary, {"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},714{{CDM::CLibrary, {"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},715716{{CDM::CLibrary, {"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},717{{CDM::CLibrary, {"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},718719{{CDM::CLibrary, {"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},720{{CDM::CLibrary, {"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},721{{CDM::CLibrary, {"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},722{{CDM::CLibrary, {"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},723{{CDM::CLibrary, {"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},724{{CDM::CLibrary, {"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},725{{CDM::CLibrary, {"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},726{{CDM::CLibrary, {"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},727{{CDM::CLibrary, {"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},728{{CDM::CLibrary, {"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},729{{CDM::CLibrary, {"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},730{{CDM::CLibrary, {"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},731{{CDM::CLibrary, {"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},732733{{CDM::CLibraryMaybeHardened, {"strcpy"}},734TR::Prop({{1}}, {{0, ReturnValueIndex}})},735{{CDM::CLibraryMaybeHardened, {"stpcpy"}},736TR::Prop({{1}}, {{0, ReturnValueIndex}})},737{{CDM::CLibraryMaybeHardened, {"strcat"}},738TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},739{{CDM::CLibraryMaybeHardened, {"wcsncat"}},740TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})},741{{CDM::CLibraryMaybeHardened, {"strncpy"}},742TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},743{{CDM::CLibraryMaybeHardened, {"strncat"}},744TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})},745{{CDM::CLibraryMaybeHardened, {"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})},746{{CDM::CLibraryMaybeHardened, {"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})},747748// Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient749// for unified handling of a function `FOO()` and its hardened variant750// `__FOO_chk()`, but in the "sprintf" family the extra parameters of the751// hardened variants are inserted into the middle of the parameter list,752// so that would not work in their case.753// int snprintf(char * str, size_t maxlen, const char * format, ...);754{{CDM::CLibrary, {"snprintf"}},755TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})},756// int sprintf(char * str, const char * format, ...);757{{CDM::CLibrary, {"sprintf"}},758TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},759// int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen,760// const char * format, ...);761{{CDM::CLibrary, {"__snprintf_chk"}},762TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})},763// int __sprintf_chk(char * str, int flag, size_t strlen, const char *764// format, ...);765{{CDM::CLibrary, {"__sprintf_chk"}},766TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})},767768// Sinks769{{CDM::CLibrary, {"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},770{{CDM::CLibrary, {"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},771{{CDM::CLibrary, {"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},772{{CDM::CLibrary, {"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},773{{CDM::CLibrary, {"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)},774{{CDM::CLibrary, {"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},775{{CDM::CLibrary, {"execve"}},776TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},777{{CDM::CLibrary, {"fexecve"}},778TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},779{{CDM::CLibrary, {"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)},780{{CDM::CLibrary, {"execvpe"}},781TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)},782{{CDM::CLibrary, {"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},783784// malloc, calloc, alloca, realloc, memccpy785// are intentionally not marked as taint sinks because unconditional786// reporting for these functions generates many false positives.787// These taint sinks should be implemented in other checkers with more788// sophisticated sanitation heuristics.789790{{CDM::CLibrary, {"setproctitle"}},791TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},792{{CDM::CLibrary, {"setproctitle_fast"}},793TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}};794795if (TR::UntrustedEnv(C)) {796// void setproctitle_init(int argc, char *argv[], char *envp[])797// TODO: replace `MsgCustomSink` with a message that fits this situation.798GlobalCRules.push_back({{CDM::CLibrary, {"setproctitle_init"}},799TR::Sink({{1, 2}}, MsgCustomSink)});800801// `getenv` returns taint only in untrusted environments.802GlobalCRules.push_back(803{{CDM::CLibrary, {"getenv"}}, TR::Source({{ReturnValueIndex}})});804}805806StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),807std::make_move_iterator(GlobalCRules.end()));808809// User-provided taint configuration.810CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();811assert(Mgr);812GenericTaintRuleParser ConfigParser{*Mgr};813std::string Option{"Config"};814StringRef ConfigFile =815Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);816std::optional<TaintConfiguration> Config =817getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);818if (!Config) {819// We don't have external taint config, no parsing required.820DynamicTaintRules = RuleLookupTy{};821return;822}823824GenericTaintRuleParser::RulesContTy Rules{825ConfigParser.parseConfiguration(Option, std::move(*Config))};826827DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),828std::make_move_iterator(Rules.end()));829}830831void GenericTaintChecker::checkPreCall(const CallEvent &Call,832CheckerContext &C) const {833initTaintRules(C);834835// FIXME: this should be much simpler.836if (const auto *Rule =837Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)838Rule->process(*this, Call, C);839else if (const auto *Rule = DynamicTaintRules->lookup(Call))840Rule->process(*this, Call, C);841842// FIXME: These edge cases are to be eliminated from here eventually.843//844// Additional check that is not supported by CallDescription.845// TODO: Make CallDescription be able to match attributes such as printf-like846// arguments.847checkUncontrolledFormatString(Call, C);848849// TODO: Modeling sockets should be done in a specific checker.850// Socket is a source, which taints the return value.851taintUnsafeSocketProtocol(Call, C);852}853854void GenericTaintChecker::checkPostCall(const CallEvent &Call,855CheckerContext &C) const {856// Set the marked values as tainted. The return value only accessible from857// checkPostStmt.858ProgramStateRef State = C.getState();859const StackFrameContext *CurrentFrame = C.getStackFrame();860861// Depending on what was tainted at pre-visit, we determined a set of862// arguments which should be tainted after the function returns. These are863// stored in the state as TaintArgsOnPostVisit set.864TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();865866const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);867if (!TaintArgs)868return;869assert(!TaintArgs->isEmpty());870871LLVM_DEBUG(for (ArgIdxTy I872: *TaintArgs) {873llvm::dbgs() << "PostCall<";874Call.dump(llvm::dbgs());875llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';876});877878const NoteTag *InjectionTag = nullptr;879std::vector<SymbolRef> TaintedSymbols;880std::vector<ArgIdxTy> TaintedIndexes;881for (ArgIdxTy ArgNum : *TaintArgs) {882// Special handling for the tainted return value.883if (ArgNum == ReturnValueIndex) {884State = addTaint(State, Call.getReturnValue());885std::vector<SymbolRef> TaintedSyms =886getTaintedSymbols(State, Call.getReturnValue());887if (!TaintedSyms.empty()) {888TaintedSymbols.push_back(TaintedSyms[0]);889TaintedIndexes.push_back(ArgNum);890}891continue;892}893// The arguments are pointer arguments. The data they are pointing at is894// tainted after the call.895if (auto V = getPointeeOf(State, Call.getArgSVal(ArgNum))) {896State = addTaint(State, *V);897std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, *V);898if (!TaintedSyms.empty()) {899TaintedSymbols.push_back(TaintedSyms[0]);900TaintedIndexes.push_back(ArgNum);901}902}903}904// Create a NoteTag callback, which prints to the user where the taintedness905// was propagated to.906InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,907Call.getCalleeStackFrame(0));908// Clear up the taint info from the state.909State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);910C.addTransition(State, InjectionTag);911}912913void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,914const char *NL, const char *Sep) const {915printTaint(State, Out, NL, Sep);916}917918void GenericTaintRule::process(const GenericTaintChecker &Checker,919const CallEvent &Call, CheckerContext &C) const {920ProgramStateRef State = C.getState();921const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());922923/// Iterate every call argument, and get their corresponding Expr and SVal.924const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {925for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {926const Expr *E = GetArgExpr(I, Call);927Fun(I, E, C.getSVal(E));928}929};930931/// Check for taint sinks.932ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {933// Add taintedness to stdin parameters934if (isStdin(C.getSVal(E), C.getASTContext())) {935State = addTaint(State, C.getSVal(E));936}937if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State, C.getSVal(E)))938Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);939});940941/// Check for taint filters.942ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) {943if (FilterArgs.contains(I)) {944State = removeTaint(State, S);945if (auto P = getPointeeOf(State, S))946State = removeTaint(State, *P);947}948});949950/// Check for taint propagation sources.951/// A rule will make the destination variables tainted if PropSrcArgs952/// is empty (taints the destination953/// arguments unconditionally), or if any of its signified954/// args are tainted in context of the current CallEvent.955bool IsMatching = PropSrcArgs.isEmpty();956std::vector<SymbolRef> TaintedSymbols;957std::vector<ArgIdxTy> TaintedIndexes;958ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols,959&TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) {960std::optional<SVal> TaintedSVal =961getTaintedPointeeOrPointer(State, C.getSVal(E));962IsMatching =963IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());964965// We track back tainted arguments except for stdin966if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) {967std::vector<SymbolRef> TaintedArgSyms =968getTaintedSymbols(State, *TaintedSVal);969if (!TaintedArgSyms.empty()) {970llvm::append_range(TaintedSymbols, TaintedArgSyms);971TaintedIndexes.push_back(I);972}973}974});975976// Early return for propagation rules which dont match.977// Matching propagations, Sinks and Filters will pass this point.978if (!IsMatching)979return;980981const auto WouldEscape = [](SVal V, QualType Ty) -> bool {982if (!isa<Loc>(V))983return false;984985const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();986const bool IsNonConstPtr =987Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();988989return IsNonConstRef || IsNonConstPtr;990};991992/// Propagate taint where it is necessary.993auto &F = State->getStateManager().get_context<ArgIdxFactory>();994ImmutableSet<ArgIdxTy> Result = F.getEmptySet();995ForEachCallArg(996[&](ArgIdxTy I, const Expr *E, SVal V) {997if (PropDstArgs.contains(I)) {998LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());999llvm::dbgs()1000<< "> prepares tainting arg index: " << I << '\n';);1001Result = F.add(Result, I);1002}10031004// Taint property gets lost if the variable is passed as a1005// non-const pointer or reference to a function which is1006// not inlined. For matching rules we want to preserve the taintedness.1007// TODO: We should traverse all reachable memory regions via the1008// escaping parameter. Instead of doing that we simply mark only the1009// referred memory region as tainted.1010if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, V)) {1011LLVM_DEBUG(if (!Result.contains(I)) {1012llvm::dbgs() << "PreCall<";1013Call.dump(llvm::dbgs());1014llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';1015});1016Result = F.add(Result, I);1017}1018});10191020if (!Result.isEmpty())1021State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);1022const NoteTag *InjectionTag = taintOriginTrackerTag(1023C, std::move(TaintedSymbols), std::move(TaintedIndexes),1024Call.getCalleeStackFrame(0));1025C.addTransition(State, InjectionTag);1026}10271028bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {1029return !C.getAnalysisManager()1030.getAnalyzerOptions()1031.ShouldAssumeControlledEnvironment;1032}10331034bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,1035CheckerContext &C) const {1036assert(E);1037if (!isTaintReporterCheckerEnabled)1038return false;1039std::optional<SVal> TaintedSVal =1040getTaintedPointeeOrPointer(C.getState(), C.getSVal(E));10411042if (!TaintedSVal)1043return false;10441045// Generate diagnostic.1046assert(BT);1047static CheckerProgramPointTag Tag(BT->getCheckerName(), Msg);1048if (ExplodedNode *N = C.generateNonFatalErrorNode(C.getState(), &Tag)) {1049auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);1050report->addRange(E->getSourceRange());1051for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) {1052report->markInteresting(TaintedSym);1053}1054C.emitReport(std::move(report));1055return true;1056}1057return false;1058}10591060/// TODO: remove checking for printf format attributes and socket whitelisting1061/// from GenericTaintChecker, and that means the following functions:1062/// getPrintfFormatArgumentNum,1063/// GenericTaintChecker::checkUncontrolledFormatString,1064/// GenericTaintChecker::taintUnsafeSocketProtocol10651066static bool getPrintfFormatArgumentNum(const CallEvent &Call,1067const CheckerContext &C,1068ArgIdxTy &ArgNum) {1069// Find if the function contains a format string argument.1070// Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,1071// vsnprintf, syslog, custom annotated functions.1072const Decl *CallDecl = Call.getDecl();1073if (!CallDecl)1074return false;1075const FunctionDecl *FDecl = CallDecl->getAsFunction();1076if (!FDecl)1077return false;10781079const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());10801081for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {1082ArgNum = Format->getFormatIdx() - 1;1083if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)1084return true;1085}10861087return false;1088}10891090bool GenericTaintChecker::checkUncontrolledFormatString(1091const CallEvent &Call, CheckerContext &C) const {1092// Check if the function contains a format string argument.1093ArgIdxTy ArgNum = 0;1094if (!getPrintfFormatArgumentNum(Call, C, ArgNum))1095return false;10961097// If either the format string content or the pointer itself are tainted,1098// warn.1099return generateReportIfTainted(Call.getArgExpr(ArgNum),1100MsgUncontrolledFormatString, C);1101}11021103void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,1104CheckerContext &C) const {1105if (Call.getNumArgs() < 1)1106return;1107const IdentifierInfo *ID = Call.getCalleeIdentifier();1108if (!ID)1109return;1110if (ID->getName() != "socket")1111return;11121113SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();1114StringRef DomName = C.getMacroNameOrSpelling(DomLoc);1115// Allow internal communication protocols.1116bool SafeProtocol = DomName == "AF_SYSTEM" || DomName == "AF_LOCAL" ||1117DomName == "AF_UNIX" || DomName == "AF_RESERVED_36";1118if (SafeProtocol)1119return;11201121ProgramStateRef State = C.getState();1122auto &F = State->getStateManager().get_context<ArgIdxFactory>();1123ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);1124State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);1125C.addTransition(State);1126}11271128/// Checker registration1129void ento::registerTaintPropagationChecker(CheckerManager &Mgr) {1130Mgr.registerChecker<GenericTaintChecker>();1131}11321133bool ento::shouldRegisterTaintPropagationChecker(const CheckerManager &mgr) {1134return true;1135}11361137void ento::registerGenericTaintChecker(CheckerManager &Mgr) {1138GenericTaintChecker *checker = Mgr.getChecker<GenericTaintChecker>();1139checker->isTaintReporterCheckerEnabled = true;1140checker->BT.emplace(Mgr.getCurrentCheckerName(), "Use of Untrusted Data",1141categories::TaintedData);1142}11431144bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {1145return true;1146}114711481149