This patch introduces the runtime components for type sanitizer: a sanitizer for type-based aliasing violations. It is based on Hal Finkel's https://reviews.llvm.org/D32197. C/C++ have type-based aliasing rules, and LLVM's optimizer can exploit these given TBAA metadata added by Clang. Roughly, a pointer of given type cannot be used to access an object of a different type (with, of course, certain exceptions). Unfortunately, there's a lot of code in the wild that violates these rules (e.g. for type punning), and such code often must be built with -fno-strict-aliasing. Performance is often sacrificed as a result. Part of the problem is the difficulty of finding TBAA violations. Hopefully, this sanitizer will help. For each TBAA type-access descriptor, encoded in LLVM's IR using metadata, the corresponding instrumentation pass generates descriptor tables. Thus, for each type (and access descriptor), we have a unique pointer representation. Excepting anonymous-namespace types, these tables are comdat, so the pointer values should be unique across the program. The descriptors refer to other descriptors to form a type aliasing tree (just like LLVM's TBAA metadata does). The instrumentation handles the "fast path" (where the types match exactly and no partial-overlaps are detected), and defers to the runtime to handle all of the more-complicated cases. The runtime, of course, is also responsible for reporting errors when those are detected. The runtime uses essentially the same shadow memory region as tsan, and we use 8 bytes of shadow memory, the size of the pointer to the type descriptor, for every byte of accessed data in the program. The value 0 is used to represent an unknown type. The value -1 is used to represent an interior byte (a byte that is part of a type, but not the first byte). The instrumentation first checks for an exact match between the type of the current access and the type for that address recorded in the shadow memory. If it matches, it then checks the shadow for the remainder of the bytes in the type to make sure that they're all -1. If not, we call the runtime. If the exact match fails, we next check if the value is 0 (i.e. unknown). If it is, then we check the shadow for the remainder of the byes in the type (to make sure they're all 0). If they're not, we call the runtime. We then set the shadow for the access address and set the shadow for the remaining bytes in the type to -1 (i.e. marking them as interior bytes). If the type indicated by the shadow memory for the access address is neither an exact match nor 0, we call the runtime. The instrumentation pass inserts calls to the memset intrinsic to set the memory updated by memset, memcpy, and memmove, as well as allocas/byval (and for lifetime.start/end) to reset the shadow memory to reflect that the type is now unknown. The runtime intercepts memset, memcpy, etc. to perform the same function for the library calls. The runtime essentially repeats these checks, but uses the full TBAA algorithm, just as the compiler does, to determine when two types are permitted to alias. In a situation where access overlap has occurred and aliasing is not permitted, an error is generated. As a note, this implementation does not use the compressed shadow-memory scheme discussed previously (http://lists.llvm.org/pipermail/llvm-dev/2017-April/111766.html). That scheme would not handle the struct-path (i.e. structure offset) information that our TBAA represents. I expect we'll want to further work on compressing the shadow-memory representation, but I think it makes sense to do that as follow-up work. This includes build fixes for Linux from Mingjie Xu. Depends on #76260 (Clang support), #76259 (LLVM support) PR: https://github.com/llvm/llvm-project/pull/76261
251 lines
7.0 KiB
C++
251 lines
7.0 KiB
C++
//===-- tysan_interceptors.cpp --------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of TypeSanitizer.
|
|
//
|
|
// Interceptors for standard library functions.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "interception/interception.h"
|
|
#include "sanitizer_common/sanitizer_common.h"
|
|
#include "tysan/tysan.h"
|
|
|
|
#if SANITIZER_LINUX && !SANITIZER_ANDROID
|
|
#define TYSAN_INTERCEPT___STRDUP 1
|
|
#else
|
|
#define TYSAN_INTERCEPT___STRDUP 0
|
|
#endif
|
|
|
|
#if SANITIZER_LINUX
|
|
extern "C" int mallopt(int param, int value);
|
|
#endif
|
|
|
|
using namespace __sanitizer;
|
|
using namespace __tysan;
|
|
|
|
static const uptr early_alloc_buf_size = 16384;
|
|
static uptr allocated_bytes;
|
|
static char early_alloc_buf[early_alloc_buf_size];
|
|
|
|
static bool isInEarlyAllocBuf(const void *ptr) {
|
|
return ((uptr)ptr >= (uptr)early_alloc_buf &&
|
|
((uptr)ptr - (uptr)early_alloc_buf) < sizeof(early_alloc_buf));
|
|
}
|
|
|
|
// Handle allocation requests early (before all interceptors are setup). dlsym,
|
|
// for example, calls calloc.
|
|
static void *handleEarlyAlloc(uptr size) {
|
|
void *mem = (void *)&early_alloc_buf[allocated_bytes];
|
|
allocated_bytes += size;
|
|
CHECK_LT(allocated_bytes, early_alloc_buf_size);
|
|
return mem;
|
|
}
|
|
|
|
INTERCEPTOR(void *, memset, void *dst, int v, uptr size) {
|
|
if (!tysan_inited && REAL(memset) == nullptr)
|
|
return internal_memset(dst, v, size);
|
|
|
|
void *res = REAL(memset)(dst, v, size);
|
|
tysan_set_type_unknown(dst, size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void *, memmove, void *dst, const void *src, uptr size) {
|
|
if (!tysan_inited && REAL(memmove) == nullptr)
|
|
return internal_memmove(dst, src, size);
|
|
|
|
void *res = REAL(memmove)(dst, src, size);
|
|
tysan_copy_types(dst, src, size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void *, memcpy, void *dst, const void *src, uptr size) {
|
|
if (!tysan_inited && REAL(memcpy) == nullptr) {
|
|
// memmove is used here because on some platforms this will also
|
|
// intercept the memmove implementation.
|
|
return internal_memmove(dst, src, size);
|
|
}
|
|
|
|
void *res = REAL(memcpy)(dst, src, size);
|
|
tysan_copy_types(dst, src, size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void *, mmap, void *addr, SIZE_T length, int prot, int flags,
|
|
int fd, OFF_T offset) {
|
|
void *res = REAL(mmap)(addr, length, prot, flags, fd, offset);
|
|
if (res != (void *)-1)
|
|
tysan_set_type_unknown(res, RoundUpTo(length, GetPageSize()));
|
|
return res;
|
|
}
|
|
|
|
#if !SANITIZER_APPLE
|
|
INTERCEPTOR(void *, mmap64, void *addr, SIZE_T length, int prot, int flags,
|
|
int fd, OFF64_T offset) {
|
|
void *res = REAL(mmap64)(addr, length, prot, flags, fd, offset);
|
|
if (res != (void *)-1)
|
|
tysan_set_type_unknown(res, RoundUpTo(length, GetPageSize()));
|
|
return res;
|
|
}
|
|
#endif
|
|
|
|
INTERCEPTOR(char *, strdup, const char *s) {
|
|
char *res = REAL(strdup)(s);
|
|
if (res)
|
|
tysan_copy_types(res, const_cast<char *>(s), internal_strlen(s));
|
|
return res;
|
|
}
|
|
|
|
#if TYSAN_INTERCEPT___STRDUP
|
|
INTERCEPTOR(char *, __strdup, const char *s) {
|
|
char *res = REAL(__strdup)(s);
|
|
if (res)
|
|
tysan_copy_types(res, const_cast<char *>(s), internal_strlen(s));
|
|
return res;
|
|
}
|
|
#endif // TYSAN_INTERCEPT___STRDUP
|
|
|
|
INTERCEPTOR(void *, malloc, uptr size) {
|
|
if (tysan_init_is_running && REAL(malloc) == nullptr)
|
|
return handleEarlyAlloc(size);
|
|
|
|
void *res = REAL(malloc)(size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void *, realloc, void *ptr, uptr size) {
|
|
void *res = REAL(realloc)(ptr, size);
|
|
// We might want to copy the types from the original allocation (although
|
|
// that would require that we knew its size).
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void *, calloc, uptr nmemb, uptr size) {
|
|
if (tysan_init_is_running && REAL(calloc) == nullptr)
|
|
return handleEarlyAlloc(nmemb * size);
|
|
|
|
void *res = REAL(calloc)(nmemb, size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, nmemb * size);
|
|
return res;
|
|
}
|
|
|
|
INTERCEPTOR(void, free, void *p) {
|
|
// There are only a few early allocation requests,
|
|
// so we simply skip the free.
|
|
if (isInEarlyAllocBuf(p))
|
|
return;
|
|
REAL(free)(p);
|
|
}
|
|
|
|
INTERCEPTOR(void *, valloc, uptr size) {
|
|
void *res = REAL(valloc)(size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
|
|
#if SANITIZER_INTERCEPT_MEMALIGN
|
|
INTERCEPTOR(void *, memalign, uptr alignment, uptr size) {
|
|
void *res = REAL(memalign)(alignment, size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
#define TYSAN_MAYBE_INTERCEPT_MEMALIGN INTERCEPT_FUNCTION(memalign)
|
|
#else
|
|
#define TYSAN_MAYBE_INTERCEPT_MEMALIGN
|
|
#endif // SANITIZER_INTERCEPT_MEMALIGN
|
|
|
|
#if SANITIZER_INTERCEPT___LIBC_MEMALIGN
|
|
INTERCEPTOR(void *, __libc_memalign, uptr alignment, uptr size) {
|
|
void *res = REAL(__libc_memalign)(alignment, size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
#define TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN \
|
|
INTERCEPT_FUNCTION(__libc_memalign)
|
|
#else
|
|
#define TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN
|
|
#endif // SANITIZER_INTERCEPT___LIBC_MEMALIGN
|
|
|
|
#if SANITIZER_INTERCEPT_PVALLOC
|
|
INTERCEPTOR(void *, pvalloc, uptr size) {
|
|
void *res = REAL(pvalloc)(size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
#define TYSAN_MAYBE_INTERCEPT_PVALLOC INTERCEPT_FUNCTION(pvalloc)
|
|
#else
|
|
#define TYSAN_MAYBE_INTERCEPT_PVALLOC
|
|
#endif // SANITIZER_INTERCEPT_PVALLOC
|
|
|
|
#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
|
|
INTERCEPTOR(void *, aligned_alloc, uptr alignment, uptr size) {
|
|
void *res = REAL(aligned_alloc)(alignment, size);
|
|
if (res)
|
|
tysan_set_type_unknown(res, size);
|
|
return res;
|
|
}
|
|
#define TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC INTERCEPT_FUNCTION(aligned_alloc)
|
|
#else
|
|
#define TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC
|
|
#endif
|
|
|
|
INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
|
|
int res = REAL(posix_memalign)(memptr, alignment, size);
|
|
if (res == 0 && *memptr)
|
|
tysan_set_type_unknown(*memptr, size);
|
|
return res;
|
|
}
|
|
|
|
namespace __tysan {
|
|
void InitializeInterceptors() {
|
|
static int inited = 0;
|
|
CHECK_EQ(inited, 0);
|
|
|
|
// Instruct libc malloc to consume less memory.
|
|
#if SANITIZER_LINUX
|
|
mallopt(1, 0); // M_MXFAST
|
|
mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD
|
|
#endif
|
|
|
|
INTERCEPT_FUNCTION(mmap);
|
|
|
|
INTERCEPT_FUNCTION(mmap64);
|
|
|
|
INTERCEPT_FUNCTION(strdup);
|
|
#if TYSAN_INTERCEPT___STRDUP
|
|
INTERCEPT_FUNCTION(__strdup);
|
|
#endif
|
|
|
|
INTERCEPT_FUNCTION(malloc);
|
|
INTERCEPT_FUNCTION(calloc);
|
|
INTERCEPT_FUNCTION(free);
|
|
INTERCEPT_FUNCTION(realloc);
|
|
INTERCEPT_FUNCTION(valloc);
|
|
TYSAN_MAYBE_INTERCEPT_MEMALIGN;
|
|
TYSAN_MAYBE_INTERCEPT___LIBC_MEMALIGN;
|
|
TYSAN_MAYBE_INTERCEPT_PVALLOC;
|
|
TYSAN_MAYBE_INTERCEPT_ALIGNED_ALLOC
|
|
INTERCEPT_FUNCTION(posix_memalign);
|
|
|
|
INTERCEPT_FUNCTION(memset);
|
|
INTERCEPT_FUNCTION(memmove);
|
|
INTERCEPT_FUNCTION(memcpy);
|
|
|
|
inited = 1;
|
|
}
|
|
} // namespace __tysan
|