llvm-project/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Borsik Gabor 273e674252 [analyzer] Add support for namespaces to GenericTaintChecker
This patch introduces the namespaces for the configured functions and
also enables the use of the member functions.

I added an optional Scope field for every configured function. Functions
without Scope match for every function regardless of the namespace.
Functions with Scope will match if the full name of the function starts
with the Scope.
Multiple functions can exist with the same name.

Differential Revision: https://reviews.llvm.org/D70878
2019-12-15 12:11:22 +01:00

948 lines
35 KiB
C++

//== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This checker defines the attack surface for generic taint propagation.
//
// The taint information produced by it might be useful to other checkers. For
// example, checkers should report errors which involve tainted data more
// aggressively, even if the involved symbols are under constrained.
//
//===----------------------------------------------------------------------===//
#include "Taint.h"
#include "Yaml.h"
#include "clang/AST/Attr.h"
#include "clang/Basic/Builtins.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "llvm/Support/YAMLTraits.h"
#include <algorithm>
#include <limits>
#include <unordered_map>
#include <utility>
using namespace clang;
using namespace ento;
using namespace taint;
namespace {
class GenericTaintChecker
: public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
public:
static void *getTag() {
static int Tag;
return &Tag;
}
void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
const char *Sep) const override;
using ArgVector = SmallVector<unsigned, 2>;
using SignedArgVector = SmallVector<int, 2>;
enum class VariadicType { None, Src, Dst };
/// Used to parse the configuration file.
struct TaintConfiguration {
using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
struct Propagation {
std::string Name;
std::string Scope;
ArgVector SrcArgs;
SignedArgVector DstArgs;
VariadicType VarType;
unsigned VarIndex;
};
std::vector<Propagation> Propagations;
std::vector<NameScopeArgs> Filters;
std::vector<NameScopeArgs> Sinks;
TaintConfiguration() = default;
TaintConfiguration(const TaintConfiguration &) = default;
TaintConfiguration(TaintConfiguration &&) = default;
TaintConfiguration &operator=(const TaintConfiguration &) = default;
TaintConfiguration &operator=(TaintConfiguration &&) = default;
};
/// Convert SignedArgVector to ArgVector.
ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
SignedArgVector Args);
/// Parse the config.
void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
TaintConfiguration &&Config);
static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
/// Denotes the return vale.
static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
1};
private:
mutable std::unique_ptr<BugType> BT;
void initBugType() const {
if (!BT)
BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
}
struct FunctionData {
FunctionData() = delete;
FunctionData(const FunctionData &) = default;
FunctionData(FunctionData &&) = default;
FunctionData &operator=(const FunctionData &) = delete;
FunctionData &operator=(FunctionData &&) = delete;
static Optional<FunctionData> create(const CallExpr *CE,
const CheckerContext &C) {
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl || (FDecl->getKind() != Decl::Function &&
FDecl->getKind() != Decl::CXXMethod))
return None;
StringRef Name = C.getCalleeName(FDecl);
std::string FullName = FDecl->getQualifiedNameAsString();
if (Name.empty() || FullName.empty())
return None;
return FunctionData{FDecl, Name, FullName};
}
bool isInScope(StringRef Scope) const {
return StringRef(FullName).startswith(Scope);
}
const FunctionDecl *const FDecl;
const StringRef Name;
const std::string FullName;
};
/// Catch taint related bugs. Check if tainted data is passed to a
/// system call etc. Returns true on matching.
bool checkPre(const CallExpr *CE, const FunctionData &FData,
CheckerContext &C) const;
/// Add taint sources on a pre-visit. Returns true on matching.
bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
CheckerContext &C) const;
/// Mark filter's arguments not tainted on a pre-visit. Returns true on
/// matching.
bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
CheckerContext &C) const;
/// Propagate taint generated at pre-visit. Returns true on matching.
bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
/// Check if the region the expression evaluates to is the standard input,
/// and thus, is tainted.
static bool isStdin(const Expr *E, CheckerContext &C);
/// Given a pointer argument, return the value it points to.
static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
/// Check for CWE-134: Uncontrolled Format String.
static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
"Untrusted data is used as a format string "
"(CWE-134: Uncontrolled Format String)";
bool checkUncontrolledFormatString(const CallExpr *CE,
CheckerContext &C) const;
/// Check for:
/// CERT/STR02-C. "Sanitize data passed to complex subsystems"
/// CWE-78, "Failure to Sanitize Data into an OS Command"
static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
"Untrusted data is passed to a system call "
"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
bool checkSystemCall(const CallExpr *CE, StringRef Name,
CheckerContext &C) const;
/// Check if tainted data is used as a buffer size ins strn.. functions,
/// and allocators.
static constexpr llvm::StringLiteral MsgTaintedBufferSize =
"Untrusted data is used to specify the buffer size "
"(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
"for character data and the null terminator)";
bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
CheckerContext &C) const;
/// Check if tainted data is used as a custom sink's parameter.
static constexpr llvm::StringLiteral MsgCustomSink =
"Untrusted data is passed to a user-defined sink";
bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
CheckerContext &C) const;
/// Generate a report if the expression is tainted or points to tainted data.
bool generateReportIfTainted(const Expr *E, StringRef Msg,
CheckerContext &C) const;
struct TaintPropagationRule;
template <typename T>
using ConfigDataMap =
std::unordered_multimap<std::string, std::pair<std::string, T>>;
using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
using NameArgMap = ConfigDataMap<ArgVector>;
/// Find a function with the given name and scope. Returns the first match
/// or the end of the map.
template <typename T>
static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
const FunctionData &FData);
/// A struct used to specify taint propagation rules for a function.
///
/// If any of the possible taint source arguments is tainted, all of the
/// destination arguments should also be tainted. Use InvalidArgIndex in the
/// src list to specify that all of the arguments can introduce taint. Use
/// InvalidArgIndex in the dst arguments to signify that all the non-const
/// pointer and reference arguments might be tainted on return. If
/// ReturnValueIndex is added to the dst list, the return value will be
/// tainted.
struct TaintPropagationRule {
using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
CheckerContext &C);
/// List of arguments which can be taint sources and should be checked.
ArgVector SrcArgs;
/// List of arguments which should be tainted on function return.
ArgVector DstArgs;
/// Index for the first variadic parameter if exist.
unsigned VariadicIndex;
/// Show when a function has variadic parameters. If it has, it marks all
/// of them as source or destination.
VariadicType VarType;
/// Special function for tainted source determination. If defined, it can
/// override the default behavior.
PropagationFuncType PropagationFunc;
TaintPropagationRule()
: VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
PropagationFunc(nullptr) {}
TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
VariadicType Var = VariadicType::None,
unsigned VarIndex = InvalidArgIndex,
PropagationFuncType Func = nullptr)
: SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
/// Get the propagation rule for a given function.
static TaintPropagationRule
getTaintPropagationRule(const NameRuleMap &CustomPropagations,
const FunctionData &FData, CheckerContext &C);
void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
void addDstArg(unsigned A) { DstArgs.push_back(A); }
bool isNull() const {
return SrcArgs.empty() && DstArgs.empty() &&
VariadicType::None == VarType;
}
bool isDestinationArgument(unsigned ArgNum) const {
return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
}
static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
CheckerContext &C) {
if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
return true;
if (!E->getType().getTypePtr()->isPointerType())
return false;
Optional<SVal> V = getPointedToSVal(C, E);
return (V && isTainted(State, *V));
}
/// Pre-process a function which propagates taint according to the
/// taint rule.
ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
// Functions for custom taintedness propagation.
static bool postSocket(bool IsTainted, const CallExpr *CE,
CheckerContext &C);
};
/// Defines a map between the propagation function's name, scope
/// and TaintPropagationRule.
NameRuleMap CustomPropagations;
/// Defines a map between the filter function's name, scope and filtering
/// args.
NameArgMap CustomFilters;
/// Defines a map between the sink function's name, scope and sinking args.
NameArgMap CustomSinks;
};
const unsigned GenericTaintChecker::ReturnValueIndex;
const unsigned GenericTaintChecker::InvalidArgIndex;
// FIXME: these lines can be removed in C++17
constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
} // end of anonymous namespace
using TaintConfig = GenericTaintChecker::TaintConfiguration;
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
namespace llvm {
namespace yaml {
template <> struct MappingTraits<TaintConfig> {
static void mapping(IO &IO, TaintConfig &Config) {
IO.mapOptional("Propagations", Config.Propagations);
IO.mapOptional("Filters", Config.Filters);
IO.mapOptional("Sinks", Config.Sinks);
}
};
template <> struct MappingTraits<TaintConfig::Propagation> {
static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
IO.mapRequired("Name", Propagation.Name);
IO.mapOptional("Scope", Propagation.Scope);
IO.mapOptional("SrcArgs", Propagation.SrcArgs);
IO.mapOptional("DstArgs", Propagation.DstArgs);
IO.mapOptional("VariadicType", Propagation.VarType,
GenericTaintChecker::VariadicType::None);
IO.mapOptional("VariadicIndex", Propagation.VarIndex,
GenericTaintChecker::InvalidArgIndex);
}
};
template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
}
};
template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
IO.mapRequired("Name", std::get<0>(NSA));
IO.mapOptional("Scope", std::get<1>(NSA));
IO.mapRequired("Args", std::get<2>(NSA));
}
};
} // namespace yaml
} // namespace llvm
/// A set which is used to pass information from call pre-visit instruction
/// to the call post-visit. The values are unsigned integers, which are either
/// ReturnValueIndex, or indexes of the pointer/reference argument, which
/// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
ArgVector Result;
for (int Arg : Args) {
if (Arg == -1)
Result.push_back(ReturnValueIndex);
else if (Arg < -1) {
Result.push_back(InvalidArgIndex);
Mgr.reportInvalidCheckerOptionValue(
this, Option,
"an argument number for propagation rules greater or equal to -1");
} else
Result.push_back(static_cast<unsigned>(Arg));
}
return Result;
}
void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
const std::string &Option,
TaintConfiguration &&Config) {
for (auto &P : Config.Propagations) {
GenericTaintChecker::CustomPropagations.emplace(
P.Name,
std::make_pair(P.Scope, TaintPropagationRule{
std::move(P.SrcArgs),
convertToArgVector(Mgr, Option, P.DstArgs),
P.VarType, P.VarIndex}));
}
for (auto &F : Config.Filters) {
GenericTaintChecker::CustomFilters.emplace(
std::get<0>(F),
std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
}
for (auto &S : Config.Sinks) {
GenericTaintChecker::CustomSinks.emplace(
std::get<0>(S),
std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
}
}
template <typename T>
auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
const FunctionData &FData) {
auto Range = Map.equal_range(FData.Name);
auto It =
std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
const auto &Value = Entry.second;
StringRef Scope = Value.first;
return Scope.empty() || FData.isInScope(Scope);
});
return It != Range.second ? It : Map.end();
}
GenericTaintChecker::TaintPropagationRule
GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
const NameRuleMap &CustomPropagations, const FunctionData &FData,
CheckerContext &C) {
// TODO: Currently, we might lose precision here: we always mark a return
// value as tainted even if it's just a pointer, pointing to tainted data.
// Check for exact name match for functions without builtin substitutes.
// Use qualified name, because these are C functions without namespace.
TaintPropagationRule Rule =
llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
// Source functions
// TODO: Add support for vfscanf & family.
.Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getchar_unlocked",
TaintPropagationRule({}, {ReturnValueIndex}))
.Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
.Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
.Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
.Case("socket",
TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
InvalidArgIndex,
&TaintPropagationRule::postSocket))
.Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
// Propagating functions
.Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
.Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
.Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("getdelim", TaintPropagationRule({3}, {0}))
.Case("getline", TaintPropagationRule({2}, {0}))
.Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("pread",
TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
.Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
.Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
.Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
.Default(TaintPropagationRule());
if (!Rule.isNull())
return Rule;
// Check if it's one of the memory setting/copying functions.
// This check is specialized but faster then calling isCLibraryFunction.
const FunctionDecl *FDecl = FData.FDecl;
unsigned BId = 0;
if ((BId = FDecl->getMemoryFunctionKind()))
switch (BId) {
case Builtin::BImemcpy:
case Builtin::BImemmove:
case Builtin::BIstrncpy:
case Builtin::BIstrncat:
return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
case Builtin::BIstrlcpy:
case Builtin::BIstrlcat:
return TaintPropagationRule({1, 2}, {0});
case Builtin::BIstrndup:
return TaintPropagationRule({0, 1}, {ReturnValueIndex});
default:
break;
};
// Process all other functions which could be defined as builtins.
if (Rule.isNull()) {
if (C.isCLibraryFunction(FDecl, "snprintf"))
return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
3);
else if (C.isCLibraryFunction(FDecl, "sprintf"))
return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
2);
else if (C.isCLibraryFunction(FDecl, "strcpy") ||
C.isCLibraryFunction(FDecl, "stpcpy") ||
C.isCLibraryFunction(FDecl, "strcat"))
return TaintPropagationRule({1}, {0, ReturnValueIndex});
else if (C.isCLibraryFunction(FDecl, "bcopy"))
return TaintPropagationRule({0, 2}, {1});
else if (C.isCLibraryFunction(FDecl, "strdup") ||
C.isCLibraryFunction(FDecl, "strdupa"))
return TaintPropagationRule({0}, {ReturnValueIndex});
else if (C.isCLibraryFunction(FDecl, "wcsdup"))
return TaintPropagationRule({0}, {ReturnValueIndex});
}
// Skipping the following functions, since they might be used for cleansing
// or smart memory copy:
// - memccpy - copying until hitting a special character.
auto It = findFunctionInConfig(CustomPropagations, FData);
if (It != CustomPropagations.end()) {
const auto &Value = It->second;
return Value.second;
}
return TaintPropagationRule();
}
void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
CheckerContext &C) const {
Optional<FunctionData> FData = FunctionData::create(CE, C);
if (!FData)
return;
// Check for taintedness related errors first: system call, uncontrolled
// format string, tainted buffer size.
if (checkPre(CE, *FData, C))
return;
// Marks the function's arguments and/or return value tainted if it present in
// the list.
if (addSourcesPre(CE, *FData, C))
return;
addFiltersPre(CE, *FData, C);
}
void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
CheckerContext &C) const {
// Set the marked values as tainted. The return value only accessible from
// checkPostStmt.
propagateFromPre(CE, C);
}
void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const {
printTaint(State, Out, NL, Sep);
}
bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
const FunctionData &FData,
CheckerContext &C) const {
// First, try generating a propagation rule for this function.
TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
this->CustomPropagations, FData, C);
if (!Rule.isNull()) {
ProgramStateRef State = Rule.process(CE, C);
if (State) {
C.addTransition(State);
return true;
}
}
return false;
}
bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
const FunctionData &FData,
CheckerContext &C) const {
auto It = findFunctionInConfig(CustomFilters, FData);
if (It == CustomFilters.end())
return false;
ProgramStateRef State = C.getState();
const auto &Value = It->second;
const ArgVector &Args = Value.second;
for (unsigned ArgNum : Args) {
if (ArgNum >= CE->getNumArgs())
continue;
const Expr *Arg = CE->getArg(ArgNum);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = removeTaint(State, *V);
}
if (State != C.getState()) {
C.addTransition(State);
return true;
}
return false;
}
bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
// Depending on what was tainted at pre-visit, we determined a set of
// arguments which should be tainted after the function returns. These are
// stored in the state as TaintArgsOnPostVisit set.
TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
if (TaintArgs.isEmpty())
return false;
for (unsigned ArgNum : TaintArgs) {
// Special handling for the tainted return value.
if (ArgNum == ReturnValueIndex) {
State = addTaint(State, CE, C.getLocationContext());
continue;
}
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
if (CE->getNumArgs() < (ArgNum + 1))
return false;
const Expr *Arg = CE->getArg(ArgNum);
Optional<SVal> V = getPointedToSVal(C, Arg);
if (V)
State = addTaint(State, *V);
}
// Clear up the taint info from the state.
State = State->remove<TaintArgsOnPostVisit>();
if (State != C.getState()) {
C.addTransition(State);
return true;
}
return false;
}
bool GenericTaintChecker::checkPre(const CallExpr *CE,
const FunctionData &FData,
CheckerContext &C) const {
if (checkUncontrolledFormatString(CE, C))
return true;
if (checkSystemCall(CE, FData.Name, C))
return true;
if (checkTaintedBufferSize(CE, FData.FDecl, C))
return true;
if (checkCustomSinks(CE, FData, C))
return true;
return false;
}
Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
const Expr *Arg) {
ProgramStateRef State = C.getState();
SVal AddrVal = C.getSVal(Arg->IgnoreParens());
if (AddrVal.isUnknownOrUndef())
return None;
Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
if (!AddrLoc)
return None;
QualType ArgTy = Arg->getType().getCanonicalType();
if (!ArgTy->isPointerType())
return State->getSVal(*AddrLoc);
QualType ValTy = ArgTy->getPointeeType();
// Do not dereference void pointers. Treat them as byte pointers instead.
// FIXME: we might want to consider more than just the first byte.
if (ValTy->isVoidType())
ValTy = C.getASTContext().CharTy;
return State->getSVal(*AddrLoc, ValTy);
}
ProgramStateRef
GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
CheckerContext &C) const {
ProgramStateRef State = C.getState();
// Check for taint in arguments.
bool IsTainted = true;
for (unsigned ArgNum : SrcArgs) {
if (ArgNum >= CE->getNumArgs())
continue;
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
break;
}
// Check for taint in variadic arguments.
if (!IsTainted && VariadicType::Src == VarType) {
// Check if any of the arguments is tainted
for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
break;
}
}
if (PropagationFunc)
IsTainted = PropagationFunc(IsTainted, CE, C);
if (!IsTainted)
return State;
// Mark the arguments which should be tainted after the function returns.
for (unsigned ArgNum : DstArgs) {
// Should mark the return value?
if (ArgNum == ReturnValueIndex) {
State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
continue;
}
if (ArgNum >= CE->getNumArgs())
continue;
// Mark the given argument.
State = State->add<TaintArgsOnPostVisit>(ArgNum);
}
// Mark all variadic arguments tainted if present.
if (VariadicType::Dst == VarType) {
// For all pointer and references that were passed in:
// If they are not pointing to const data, mark data as tainted.
// TODO: So far we are just going one level down; ideally we'd need to
// recurse here.
for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
const Expr *Arg = CE->getArg(i);
// Process pointer argument.
const Type *ArgTy = Arg->getType().getTypePtr();
QualType PType = ArgTy->getPointeeType();
if ((!PType.isNull() && !PType.isConstQualified()) ||
(ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
State = State->add<TaintArgsOnPostVisit>(i);
}
}
return State;
}
// If argument 0(protocol domain) is network, the return value should get taint.
bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
const CallExpr *CE,
CheckerContext &C) {
SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
// White list the internal communication protocols.
if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
return false;
return true;
}
bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
ProgramStateRef State = C.getState();
SVal Val = C.getSVal(E);
// stdin is a pointer, so it would be a region.
const MemRegion *MemReg = Val.getAsRegion();
// The region should be symbolic, we do not know it's value.
const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
if (!SymReg)
return false;
// Get it's symbol and find the declaration region it's pointing to.
const SymbolRegionValue *Sm =
dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
if (!Sm)
return false;
const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
if (!DeclReg)
return false;
// This region corresponds to a declaration, find out if it's a global/extern
// variable named stdin with the proper type.
if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
D = D->getCanonicalDecl();
if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
C.getASTContext().getFILEType().getCanonicalType())
return true;
}
}
return false;
}
static bool getPrintfFormatArgumentNum(const CallExpr *CE,
const CheckerContext &C,
unsigned &ArgNum) {
// Find if the function contains a format string argument.
// Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
// vsnprintf, syslog, custom annotated functions.
const FunctionDecl *FDecl = C.getCalleeDecl(CE);
if (!FDecl)
return false;
for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
ArgNum = Format->getFormatIdx() - 1;
if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
return true;
}
// Or if a function is named setproctitle (this is a heuristic).
if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
ArgNum = 0;
return true;
}
return false;
}
bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
CheckerContext &C) const {
assert(E);
// Check for taint.
ProgramStateRef State = C.getState();
Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
SVal TaintedSVal;
if (PointedToSVal && isTainted(State, *PointedToSVal))
TaintedSVal = *PointedToSVal;
else if (isTainted(State, E, C.getLocationContext()))
TaintedSVal = C.getSVal(E);
else
return false;
// Generate diagnostic.
if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
initBugType();
auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
report->addRange(E->getSourceRange());
report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
C.emitReport(std::move(report));
return true;
}
return false;
}
bool GenericTaintChecker::checkUncontrolledFormatString(
const CallExpr *CE, CheckerContext &C) const {
// Check if the function contains a format string argument.
unsigned ArgNum = 0;
if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
return false;
// If either the format string content or the pointer itself are tainted,
// warn.
return generateReportIfTainted(CE->getArg(ArgNum),
MsgUncontrolledFormatString, C);
}
bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
CheckerContext &C) const {
// TODO: It might make sense to run this check on demand. In some cases,
// we should check if the environment has been cleansed here. We also might
// need to know if the user was reset before these calls(seteuid).
unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
.Case("system", 0)
.Case("popen", 0)
.Case("execl", 0)
.Case("execle", 0)
.Case("execlp", 0)
.Case("execv", 0)
.Case("execvp", 0)
.Case("execvP", 0)
.Case("execve", 0)
.Case("dlopen", 0)
.Default(InvalidArgIndex);
if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
return false;
return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
}
// TODO: Should this check be a part of the CString checker?
// If yes, should taint be a global setting?
bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
const FunctionDecl *FDecl,
CheckerContext &C) const {
// If the function has a buffer size argument, set ArgNum.
unsigned ArgNum = InvalidArgIndex;
unsigned BId = 0;
if ((BId = FDecl->getMemoryFunctionKind()))
switch (BId) {
case Builtin::BImemcpy:
case Builtin::BImemmove:
case Builtin::BIstrncpy:
ArgNum = 2;
break;
case Builtin::BIstrndup:
ArgNum = 1;
break;
default:
break;
};
if (ArgNum == InvalidArgIndex) {
if (C.isCLibraryFunction(FDecl, "malloc") ||
C.isCLibraryFunction(FDecl, "calloc") ||
C.isCLibraryFunction(FDecl, "alloca"))
ArgNum = 0;
else if (C.isCLibraryFunction(FDecl, "memccpy"))
ArgNum = 3;
else if (C.isCLibraryFunction(FDecl, "realloc"))
ArgNum = 1;
else if (C.isCLibraryFunction(FDecl, "bcopy"))
ArgNum = 2;
}
return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
}
bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
const FunctionData &FData,
CheckerContext &C) const {
auto It = findFunctionInConfig(CustomSinks, FData);
if (It == CustomSinks.end())
return false;
const auto &Value = It->second;
const GenericTaintChecker::ArgVector &Args = Value.second;
for (unsigned ArgNum : Args) {
if (ArgNum >= CE->getNumArgs())
continue;
if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
return true;
}
return false;
}
void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
std::string Option{"Config"};
StringRef ConfigFile =
Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
llvm::Optional<TaintConfig> Config =
getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
if (Config)
Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
}
bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
return true;
}