From 5ec319119f52aa7a8ab384bf58d341e7b69cdd59 Mon Sep 17 00:00:00 2001 From: Jake Egan Date: Fri, 2 Jan 2026 17:31:26 -0500 Subject: [PATCH] [asan] Adjust interception compatibility for AIX (#131870) Adjust asan interceptor compatbility for AIX. AIX uses dlsym to retrieve addresses of exported functions. However, some functions in libc.a, such as memcpy, are not exported, so we currently have a limitation in retrieving these addresses. Issue: https://github.com/llvm/llvm-project/issues/138916 --- compiler-rt/lib/asan/asan_interceptors.cpp | 9 ++++ compiler-rt/lib/asan/asan_interceptors.h | 28 ++++++++-- compiler-rt/lib/asan/asan_malloc_linux.cpp | 2 +- .../lib/interception/interception_aix.cpp | 51 +++++++++++++++++-- .../lib/sanitizer_common/sanitizer_libc.cpp | 8 +++ .../lib/sanitizer_common/sanitizer_libc.h | 1 + .../sanitizer_platform_interceptors.h | 6 +-- 7 files changed, 95 insertions(+), 10 deletions(-) diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index dab05e2e8d75..02f0ffde7204 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -282,7 +282,12 @@ INTERCEPTOR(int, pthread_create, void* thread, void* attr, # endif asanThreadArgRetval().Create(detached, {start_routine, arg}, [&]() -> uptr { result = REAL(pthread_create)(thread, attr, asan_thread_start, t); +// AIX pthread_t is unsigned int. +# if SANITIZER_AIX + return result ? 0 : *(unsigned*)(thread); +# else return result ? 0 : *(uptr*)(thread); +# endif }); } if (result != 0) { @@ -439,10 +444,12 @@ INTERCEPTOR(int, swapcontext, struct ucontext_t* oucp, struct ucontext_t* ucp) { # define siglongjmp __siglongjmp14 # endif +# if ASAN_INTERCEPT_LONGJMP INTERCEPTOR(void, longjmp, void* env, int val) { __asan_handle_no_return(); REAL(longjmp)(env, val); } +# endif # if ASAN_INTERCEPT__LONGJMP INTERCEPTOR(void, _longjmp, void* env, int val) { @@ -867,7 +874,9 @@ void InitializeAsanInterceptors() { # endif // Intercept jump-related functions. +# if ASAN_INTERCEPT_LONGJMP ASAN_INTERCEPT_FUNC(longjmp); +# endif # if ASAN_INTERCEPT_SWAPCONTEXT ASAN_INTERCEPT_FUNC(swapcontext); diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 7081e8bbb2af..2d806b7df8ff 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -31,10 +31,26 @@ void InitializePlatformInterceptors(); // really defined to replace libc functions. #if !SANITIZER_FUCHSIA +// Sanitizer on AIX is currently unable to retrieve the address +// of the real longjump (or an alternative thereto). +// TODO: Consider intercepting longjmpx on AIX. +# if !SANITIZER_AIX +# define ASAN_INTERCEPT_LONGJMP 1 +# else +# define ASAN_INTERCEPT_LONGJMP 0 +# endif + // Use macro to describe if specific function should be // intercepted on a given platform. # if !SANITIZER_WINDOWS -# define ASAN_INTERCEPT__LONGJMP 1 + // Sanitizer on AIX is currently unable to retrieve the address + // of the real _longjump (or an alternative thereto). + // TODO: Consider intercepting _longjmpx on AIX. +# if !SANITIZER_AIX +# define ASAN_INTERCEPT__LONGJMP 1 +# else +# define ASAN_INTERCEPT__LONGJMP 0 +# endif # define ASAN_INTERCEPT_INDEX 1 # define ASAN_INTERCEPT_PTHREAD_CREATE 1 # else @@ -56,7 +72,10 @@ void InitializePlatformInterceptors(); # define ASAN_INTERCEPT_SWAPCONTEXT 0 # endif -# if !SANITIZER_WINDOWS +// Sanitizer on AIX is currently unable to retrieve the address +// of the real siglongjump (or an alternative thereto). +// TODO: Consider intercepting sigsetjmpx on AIX. +# if !SANITIZER_WINDOWS && !SANITIZER_AIX # define ASAN_INTERCEPT_SIGLONGJMP 1 # else # define ASAN_INTERCEPT_SIGLONGJMP 0 @@ -84,7 +103,10 @@ void InitializePlatformInterceptors(); # define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 0 # endif -# if !SANITIZER_WINDOWS +// Clang on AIX neither uses `__cxa_atexit` nor links against a library with +// such. +// TODO: Consider intercepting `atexit` and `unatexit` on AIX. +# if !SANITIZER_WINDOWS && !SANITIZER_AIX # define ASAN_INTERCEPT___CXA_ATEXIT 1 # else # define ASAN_INTERCEPT___CXA_ATEXIT 0 diff --git a/compiler-rt/lib/asan/asan_malloc_linux.cpp b/compiler-rt/lib/asan/asan_malloc_linux.cpp index add57318785b..b54e38baf0b5 100644 --- a/compiler-rt/lib/asan/asan_malloc_linux.cpp +++ b/compiler-rt/lib/asan/asan_malloc_linux.cpp @@ -15,7 +15,7 @@ #include "sanitizer_common/sanitizer_platform.h" #if SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || \ - SANITIZER_NETBSD || SANITIZER_SOLARIS || SANITIZER_HAIKU + SANITIZER_NETBSD || SANITIZER_SOLARIS || SANITIZER_HAIKU || SANITIZER_AIX # include "asan_allocator.h" # include "asan_interceptors.h" diff --git a/compiler-rt/lib/interception/interception_aix.cpp b/compiler-rt/lib/interception/interception_aix.cpp index 953bbad96eb4..a6cc4f7498f4 100644 --- a/compiler-rt/lib/interception/interception_aix.cpp +++ b/compiler-rt/lib/interception/interception_aix.cpp @@ -17,16 +17,61 @@ #if SANITIZER_AIX # include // for dlsym() +# include // for size_t + +# if SANITIZER_WORDSIZE == 64 +# define STRCPY_STR "___strcpy64" +# define MEMCPY_STR "___memcpy64" +# define MEMMOVE_STR "___memmove64" +# else +# define STRCPY_STR "___strcpy" +# define MEMCPY_STR "___memcpy" +# define MEMMOVE_STR "___memmove" +# endif namespace __interception { -static void *GetFuncAddr(const char *name, uptr wrapper_addr) { - // AIX dlsym can only defect the functions that are exported, so - // on AIX, we can not intercept some basic functions like memcpy. +// These symbols cannot be used for indirect calls. +char* ___strcpy(char*, const char*) __asm__(STRCPY_STR); +char* ___memcpy(char*, const char*, size_t) __asm__(MEMCPY_STR); +char* ___memmove(char*, const char*, size_t) __asm__(MEMMOVE_STR); + +static char* real_strcpy_wrapper(char* s1, const char* s2) { + return (char*)___strcpy(s1, s2); +} + +static char* real_memcpy_wrapper(char* s1, const char* s2, size_t n) { + return (char*)___memcpy(s1, s2, n); +} + +static char* real_memmove_wrapper(char* s1, const char* s2, size_t n) { + return (char*)___memmove(s1, s2, n); +} + +static void* GetFuncAddr(const char* name, uptr wrapper_addr) { // FIXME: if we are going to ship dynamic asan library, we may need to search // all the loaded modules with RTLD_DEFAULT if RTLD_NEXT failed. void *addr = dlsym(RTLD_NEXT, name); + // AIX dlsym can only detect functions that are exported, so + // some basic functions like memcpy return null. In this case, we fall back + // to a corresponding internal libc symbol (for example, ___memcpy) if it's + // available and, otherwise, to the internal sanitizer function. + if (!addr) { + if (internal_strcmp(name, "strcpy") == 0) + addr = (void*)real_strcpy_wrapper; + else if (internal_strcmp(name, "strncpy") == 0) + addr = (void*)internal_strncpy; + else if (internal_strcmp(name, "strcat") == 0) + addr = (void*)internal_strcat; + else if (internal_strcmp(name, "strncat") == 0) + addr = (void*)internal_strncat; + else if (internal_strcmp(name, "memcpy") == 0) + addr = (void*)real_memcpy_wrapper; + else if (internal_strcmp(name, "memmove") == 0) + addr = (void*)real_memmove_wrapper; + } + // In case `name' is not loaded, dlsym ends up finding the actual wrapper. // We don't want to intercept the wrapper and have it point to itself. if ((uptr)addr == wrapper_addr) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp index 9318066afed2..ece768ec8dab 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cpp @@ -190,6 +190,14 @@ uptr internal_strlcat(char *dst, const char *src, uptr maxlen) { return dstlen + srclen; } +char* internal_strcat(char* dst, const char* src) { + uptr len = internal_strlen(dst); + uptr i; + for (i = 0; src[i]; i++) dst[len + i] = src[i]; + dst[len + i] = 0; + return dst; +} + char *internal_strncat(char *dst, const char *src, uptr n) { uptr len = internal_strlen(dst); uptr i; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h index 1906569e2a5f..2f7ec9249e34 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.h @@ -59,6 +59,7 @@ char *internal_strdup(const char *s); uptr internal_strlen(const char *s); uptr internal_strlcat(char *dst, const char *src, uptr maxlen); char *internal_strncat(char *dst, const char *src, uptr n); +char* internal_strcat(char* dst, const char* src); int internal_strncmp(const char *s1, const char *s2, uptr n); uptr internal_strlcpy(char *dst, const char *src, uptr maxlen); char *internal_strncpy(char *dst, const char *src, uptr n); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index ed60671014d5..1b300bc7533b 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -167,7 +167,7 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, #define SANITIZER_INTERCEPT_STRLEN SI_NOT_FUCHSIA #define SANITIZER_INTERCEPT_STRNLEN (SI_NOT_MAC && SI_NOT_FUCHSIA) -#define SANITIZER_INTERCEPT_STRCMP (SI_NOT_FUCHSIA && SI_NOT_AIX) +#define SANITIZER_INTERCEPT_STRCMP SI_NOT_FUCHSIA #define SANITIZER_INTERCEPT_STRSTR SI_NOT_FUCHSIA #define SANITIZER_INTERCEPT_STRCASESTR (SI_POSIX && SI_NOT_AIX) #define SANITIZER_INTERCEPT_STRTOK SI_NOT_FUCHSIA @@ -179,8 +179,8 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, #define SANITIZER_INTERCEPT_TEXTDOMAIN SI_LINUX_NOT_ANDROID || SI_SOLARIS #define SANITIZER_INTERCEPT_STRCASECMP SI_POSIX #define SANITIZER_INTERCEPT_MEMSET 1 -#define SANITIZER_INTERCEPT_MEMMOVE SI_NOT_AIX -#define SANITIZER_INTERCEPT_MEMCPY SI_NOT_AIX +#define SANITIZER_INTERCEPT_MEMMOVE 1 +#define SANITIZER_INTERCEPT_MEMCPY 1 #define SANITIZER_INTERCEPT_MEMCMP SI_NOT_FUCHSIA #define SANITIZER_INTERCEPT_BCMP \ SANITIZER_INTERCEPT_MEMCMP && \