[TySan] Add initial Type Sanitizer support to Clang) (#76260)

This patch introduces the Clang components of type sanitizer: a
sanitizer for type-based aliasing violations.

It is based on Hal Finkel's https://reviews.llvm.org/D32198.

The Clang changes are mostly formulaic, the one specific change being
that when the TBAA sanitizer is enabled, TBAA is always generated, even
at -O0.

It goes together with the corresponding LLVM changes
(https://github.com/llvm/llvm-project/pull/76259) and compiler-rt
changes (https://github.com/llvm/llvm-project/pull/76261)

PR: https://github.com/llvm/llvm-project/pull/76260
This commit is contained in:
Florian Hahn 2024-12-17 15:13:42 +00:00 committed by GitHub
parent cf4375d107
commit c135f6ffe2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 183 additions and 13 deletions

View File

@ -1176,6 +1176,10 @@ Sanitizers
<https://clang.llvm.org/docs/SanitizerSpecialCaseList.html>`_. See that link
for examples.
- Introduced an experimental Type Sanitizer, activated by using the
``-fsanitize=type`` flag. This sanitizer detects violations of C/C++ type-based
aliasing rules.
Python Binding Changes
----------------------
- Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``.

View File

@ -102,6 +102,7 @@ FEATURE(numerical_stability_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Nume
FEATURE(memory_sanitizer,
LangOpts.Sanitize.hasOneOf(SanitizerKind::Memory |
SanitizerKind::KernelMemory))
FEATURE(type_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Type))
FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread))
FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow))
FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))

View File

@ -73,6 +73,9 @@ SANITIZER("fuzzer", Fuzzer)
// libFuzzer-required instrumentation, no linking.
SANITIZER("fuzzer-no-link", FuzzerNoLink)
// TypeSanitizer
SANITIZER("type", Type)
// ThreadSanitizer
SANITIZER("thread", Thread)

View File

@ -87,6 +87,7 @@ public:
bool needsHwasanAliasesRt() const {
return needsHwasanRt() && HwasanUseAliases;
}
bool needsTysanRt() const { return Sanitizers.has(SanitizerKind::Type); }
bool needsTsanRt() const { return Sanitizers.has(SanitizerKind::Thread); }
bool needsMsanRt() const { return Sanitizers.has(SanitizerKind::Memory); }
bool needsFuzzer() const { return Sanitizers.has(SanitizerKind::Fuzzer); }

View File

@ -77,6 +77,7 @@
#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Instrumentation/TypeSanitizer.h"
#include "llvm/Transforms/ObjCARC.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/GVN.h"
@ -735,6 +736,11 @@ static void addSanitizers(const Triple &TargetTriple,
MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
}
if (LangOpts.Sanitize.has(SanitizerKind::Type)) {
MPM.addPass(ModuleTypeSanitizerPass());
MPM.addPass(createModuleToFunctionPassAdaptor(TypeSanitizerPass()));
}
if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability))
MPM.addPass(NumericalStabilitySanitizerPass());

View File

@ -479,6 +479,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction(
!isInNoSanitizeList(SanitizerKind::MemtagStack, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeMemTag);
if (getLangOpts().Sanitize.has(SanitizerKind::Type) &&
!isInNoSanitizeList(SanitizerKind::Type, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeType);
if (getLangOpts().Sanitize.has(SanitizerKind::Thread) &&
!isInNoSanitizeList(SanitizerKind::Thread, Fn, Loc))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);

View File

@ -837,6 +837,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
Fn->addFnAttr(llvm::Attribute::SanitizeMemTag);
if (SanOpts.has(SanitizerKind::Thread))
Fn->addFnAttr(llvm::Attribute::SanitizeThread);
if (SanOpts.has(SanitizerKind::Type))
Fn->addFnAttr(llvm::Attribute::SanitizeType);
if (SanOpts.has(SanitizerKind::NumericalStability))
Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability);
if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory))

View File

@ -397,8 +397,8 @@ CodeGenModule::CodeGenModule(ASTContext &C,
if (LangOpts.HLSL)
createHLSLRuntime();
// Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
// Enable TBAA unless it's suppressed. TSan and TySan need TBAA even at O0.
if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Thread | SanitizerKind::Type) ||
(!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
TBAA.reset(new CodeGenTBAA(Context, getTypes(), TheModule, CodeGenOpts,
getLangOpts()));

View File

@ -314,8 +314,10 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
}
llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
// At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
// At -O0 or relaxed aliasing, TBAA is not emitted for regular types (unless
// we're running TypeSanitizer).
if (!Features.Sanitize.has(SanitizerKind::Type) &&
(CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing))
return nullptr;
// If the type has the may_alias attribute (even on a typedef), it is

View File

@ -19,9 +19,10 @@ using namespace CodeGen;
SanitizerMetadata::SanitizerMetadata(CodeGenModule &CGM) : CGM(CGM) {}
static bool isAsanHwasanOrMemTag(const SanitizerSet &SS) {
static bool isAsanHwasanMemTagOrTysan(const SanitizerSet &SS) {
return SS.hasOneOf(SanitizerKind::Address | SanitizerKind::KernelAddress |
SanitizerKind::HWAddress | SanitizerKind::MemTag);
SanitizerKind::HWAddress | SanitizerKind::MemTag |
SanitizerKind::Type);
}
static SanitizerMask expandKernelSanitizerMasks(SanitizerMask Mask) {
@ -68,7 +69,7 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
SanitizerMask NoSanitizeAttrMask,
bool IsDynInit) {
SanitizerSet FsanitizeArgument = CGM.getLangOpts().Sanitize;
if (!isAsanHwasanOrMemTag(FsanitizeArgument))
if (!isAsanHwasanMemTagOrTysan(FsanitizeArgument))
return;
FsanitizeArgument.Mask = expandKernelSanitizerMasks(FsanitizeArgument.Mask);
@ -105,11 +106,32 @@ void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV,
GV, Loc, Ty, "init");
GV->setSanitizerMetadata(Meta);
if (Ty.isNull() || !CGM.getLangOpts().Sanitize.has(SanitizerKind::Type) ||
NoSanitizeAttrMask & SanitizerKind::Type)
return;
llvm::MDNode *TBAAInfo = CGM.getTBAATypeInfo(Ty);
if (!TBAAInfo || TBAAInfo == CGM.getTBAATypeInfo(CGM.getContext().CharTy))
return;
llvm::Metadata *GlobalMetadata[] = {llvm::ConstantAsMetadata::get(GV),
TBAAInfo};
// Metadata for the global already registered.
if (llvm::MDNode::getIfExists(CGM.getLLVMContext(), GlobalMetadata))
return;
llvm::MDNode *ThisGlobal =
llvm::MDNode::get(CGM.getLLVMContext(), GlobalMetadata);
llvm::NamedMDNode *TysanGlobals =
CGM.getModule().getOrInsertNamedMetadata("llvm.tysan.globals");
TysanGlobals->addOperand(ThisGlobal);
}
void SanitizerMetadata::reportGlobal(llvm::GlobalVariable *GV, const VarDecl &D,
bool IsDynInit) {
if (!isAsanHwasanOrMemTag(CGM.getLangOpts().Sanitize))
if (!isAsanHwasanMemTagOrTysan(CGM.getLangOpts().Sanitize))
return;
std::string QualName;
llvm::raw_string_ostream OS(QualName);

View File

@ -37,15 +37,15 @@ static const SanitizerMask NotAllowedWithMinimalRuntime = SanitizerKind::Vptr;
static const SanitizerMask NotAllowedWithExecuteOnly =
SanitizerKind::Function | SanitizerKind::KCFI;
static const SanitizerMask NeedsUnwindTables =
SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::Thread |
SanitizerKind::Memory | SanitizerKind::DataFlow |
SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::Type |
SanitizerKind::Thread | SanitizerKind::Memory | SanitizerKind::DataFlow |
SanitizerKind::NumericalStability;
static const SanitizerMask SupportsCoverage =
SanitizerKind::Address | SanitizerKind::HWAddress |
SanitizerKind::KernelAddress | SanitizerKind::KernelHWAddress |
SanitizerKind::MemtagStack | SanitizerKind::MemtagHeap |
SanitizerKind::MemtagGlobals | SanitizerKind::Memory |
SanitizerKind::KernelMemory | SanitizerKind::Leak |
SanitizerKind::Type | SanitizerKind::MemtagStack |
SanitizerKind::MemtagHeap | SanitizerKind::MemtagGlobals |
SanitizerKind::Memory | SanitizerKind::KernelMemory | SanitizerKind::Leak |
SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::Bounds |
SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
@ -182,6 +182,7 @@ static void addDefaultIgnorelists(const Driver &D, SanitizerMask Kinds,
{"msan_ignorelist.txt", SanitizerKind::Memory},
{"nsan_ignorelist.txt", SanitizerKind::NumericalStability},
{"tsan_ignorelist.txt", SanitizerKind::Thread},
{"tysan_blacklist.txt", SanitizerKind::Type},
{"dfsan_abilist.txt", SanitizerKind::DataFlow},
{"cfi_ignorelist.txt", SanitizerKind::CFI},
{"ubsan_ignorelist.txt",
@ -542,6 +543,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
std::pair<SanitizerMask, SanitizerMask> IncompatibleGroups[] = {
std::make_pair(SanitizerKind::Address,
SanitizerKind::Thread | SanitizerKind::Memory),
std::make_pair(SanitizerKind::Type,
SanitizerKind::Address | SanitizerKind::KernelAddress |
SanitizerKind::Memory | SanitizerKind::Leak |
SanitizerKind::Thread | SanitizerKind::KernelAddress),
std::make_pair(SanitizerKind::Thread, SanitizerKind::Memory),
std::make_pair(SanitizerKind::Leak,
SanitizerKind::Thread | SanitizerKind::Memory),

View File

@ -1480,6 +1480,8 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
}
if (SanArgs.needsTsanRt())
SharedRuntimes.push_back("tsan");
if (SanArgs.needsTysanRt())
SharedRuntimes.push_back("tysan");
if (SanArgs.needsHwasanRt()) {
if (SanArgs.needsHwasanAliasesRt())
SharedRuntimes.push_back("hwasan_aliases");
@ -1552,6 +1554,8 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("tsan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsTysanRt())
StaticRuntimes.push_back("tysan");
if (!SanArgs.needsSharedRt() && SanArgs.needsUbsanRt()) {
if (SanArgs.requiresMinimalRuntime()) {
StaticRuntimes.push_back("ubsan_minimal");

View File

@ -1600,6 +1600,8 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
"Static sanitizer runtimes not supported");
AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
}
if (Sanitize.needsTysanRt())
AddLinkSanitizerLibArgs(Args, CmdArgs, "tysan");
if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib)) {
AddLinkSanitizerLibArgs(Args, CmdArgs, "fuzzer", /*shared=*/false);
@ -3603,6 +3605,10 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
Res |= SanitizerKind::Thread;
}
if ((IsX86_64 || IsAArch64) && isTargetMacOSBased()) {
Res |= SanitizerKind::Type;
}
if (IsX86_64)
Res |= SanitizerKind::NumericalStability;

View File

@ -837,6 +837,8 @@ SanitizerMask Linux::getSupportedSanitizers() const {
if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ ||
IsLoongArch64 || IsRISCV64)
Res |= SanitizerKind::Thread;
if (IsX86_64 || IsAArch64)
Res |= SanitizerKind::Type;
if (IsX86_64 || IsSystemZ || IsPowerPC64)
Res |= SanitizerKind::KernelMemory;
if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch ||

View File

@ -0,0 +1,85 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefix=WITHOUT %s
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=type | FileCheck -check-prefix=TYSAN %s
// RUN: echo "src:%s" | sed -e 's/\\/\\\\/g' > %t
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=type -fsanitize-blacklist=%t | FileCheck -check-prefix=BL %s
// The sanitize_type attribute should be attached to functions
// when TypeSanitizer is enabled, unless no_sanitize("type") attribute
// is present.
// WITHOUT: NoTYSAN1{{.*}}) [[NOATTR:#[0-9]+]]
// BL: NoTYSAN1{{.*}}) [[NOATTR:#[0-9]+]]
// TYSAN: NoTYSAN1{{.*}}) [[NOATTR:#[0-9]+]]
__attribute__((no_sanitize("type"))) int NoTYSAN1(int *a) { return *a; }
// WITHOUT: NoTYSAN2{{.*}}) [[NOATTR]]
// BL: NoTYSAN2{{.*}}) [[NOATTR]]
// TYSAN: NoTYSAN2{{.*}}) [[NOATTR]]
__attribute__((no_sanitize("type"))) int NoTYSAN2(int *a);
int NoTYSAN2(int *a) { return *a; }
// WITHOUT: NoTYSAN3{{.*}}) [[NOATTR:#[0-9]+]]
// BL: NoTYSAN3{{.*}}) [[NOATTR:#[0-9]+]]
// TYSAN: NoTYSAN3{{.*}}) [[NOATTR:#[0-9]+]]
__attribute__((no_sanitize("type"))) int NoTYSAN3(int *a) { return *a; }
// WITHOUT: TYSANOk{{.*}}) [[NOATTR]]
// BL: TYSANOk{{.*}}) [[NOATTR]]
// TYSAN: TYSANOk{{.*}}) [[WITH:#[0-9]+]]
int TYSANOk(int *a) { return *a; }
// WITHOUT: TemplateTYSANOk{{.*}}) [[NOATTR]]
// BL: TemplateTYSANOk{{.*}}) [[NOATTR]]
// TYSAN: TemplateTYSANOk{{.*}}) [[WITH]]
template <int i>
int TemplateTYSANOk() { return i; }
// WITHOUT: TemplateNoTYSAN{{.*}}) [[NOATTR]]
// BL: TemplateNoTYSAN{{.*}}) [[NOATTR]]
// TYSAN: TemplateNoTYSAN{{.*}}) [[NOATTR]]
template <int i>
__attribute__((no_sanitize("type"))) int TemplateNoTYSAN() { return i; }
int force_instance = TemplateTYSANOk<42>() + TemplateNoTYSAN<42>();
// Check that __cxx_global_var_init* get the sanitize_type attribute.
int global1 = 0;
int global2 = *(int *)((char *)&global1 + 1);
// WITHOUT: @__cxx_global_var_init{{.*}}[[NOATTR:#[0-9]+]]
// BL: @__cxx_global_var_init{{.*}}[[NOATTR:#[0-9]+]]
// TYSAN: @__cxx_global_var_init{{.*}}[[WITH:#[0-9]+]]
// Make sure that we don't add globals to the list for which we don't have a
// specific type description.
// FIXME: We now have a type description for this type and a global is added. Should it?
struct SX {
int a, b;
};
SX sx;
void consumer(const char *);
void char_caller() {
// TYSAN: void @_Z11char_callerv()
// TYSAN-NEXT: entry:
// TYSAN-NEXT: call void @_Z8consumerPKc(ptr noundef @.str)
// TYSAN-NEXT: ret void
consumer("foo");
}
// WITHOUT: attributes [[NOATTR]] = { noinline nounwind{{.*}} }
// BL: attributes [[NOATTR]] = { noinline nounwind{{.*}} }
// TYSAN: attributes [[NOATTR]] = { mustprogress noinline nounwind{{.*}} }
// TYSAN: attributes [[WITH]] = { noinline nounwind sanitize_type{{.*}} }
// TYSAN-DAG: !llvm.tysan.globals = !{[[G1MD:![0-9]+]], [[G2MD:![0-9]+]], [[G3MD:![0-9]+]], [[SXMD:![0-9]+]]}
// TYSAN-DAG: [[G1MD]] = !{ptr @force_instance, [[INTMD:![0-9]+]]}
// TYSAN-DAG: [[INTMD]] = !{!"int",
// TYSAN-DAG: [[G2MD]] = !{ptr @global1, [[INTMD]]}
// TYSAN-DAG: [[G3MD]] = !{ptr @global2, [[INTMD]]}
// TYSAN-DAG: [[SXMD]] = !{ptr @sx, [[SXTYMD:![0-9]+]]}
// TYSAN-DAG: [[SXTYMD]] = !{!"_ZTS2SX", [[INTMD]], i64 0, !1, i64 4}
// TYSAN-DAG: Simple C++ TBAA

View File

@ -274,6 +274,29 @@
// CHECK-ASAN-ANDROID-SHARED-NOT: "-lpthread"
// CHECK-ASAN-ANDROID-SHARED-NOT: "-lresolv"
// RUN: %clangxx %s -### -o %t.o 2>&1 \
// RUN: --target=x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
// RUN: -fsanitize=type \
// RUN: -resource-dir=%S/Inputs/resource_dir \
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
// RUN: | FileCheck --check-prefix=CHECK-TYSAN-LINUX-CXX %s
//
// CHECK-TYSAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
// CHECK-TYSAN-LINUX-CXX-NOT: stdc++
// CHECK-TYSAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.tysan{{[^.]*}}.a" "--no-whole-archive"
// CHECK-TYSAN-LINUX-CXX: stdc++
// RUN: %clangxx -fsanitize=type -### %s 2>&1 \
// RUN: -mmacosx-version-min=10.6 \
// RUN: --target=x86_64-apple-darwin13.4.0 -fuse-ld=ld -stdlib=platform \
// RUN: -resource-dir=%S/Inputs/resource_dir \
// RUN: --sysroot=%S/Inputs/basic_linux_tree \
// RUN: | FileCheck --check-prefix=CHECK-TYSAN-DARWIN-CXX %s
// CHECK-TYSAN-DARWIN-CXX: "{{.*}}ld{{(.exe)?}}"
// CHECK-TYSAN-DARWIN-CXX: libclang_rt.tysan_osx_dynamic.dylib
// CHECK-TYSAN-DARWIN-CXX-NOT: -lc++abi
// RUN: %clangxx -### %s 2>&1 \
// RUN: --target=x86_64-unknown-linux -fuse-ld=ld -stdlib=platform -lstdc++ \
// RUN: -fsanitize=thread \