[BOLT][Instrumentation] AArch64 instrumentation support in runtime

This commit adds support for AArch64 in instrumentation runtime library,
including AArch64 system calls.
Also this commit divides syscalls into target-specific files.

Reviewed By: rafauler, yota9

Differential Revision: https://reviews.llvm.org/D151942
This commit is contained in:
Elvina Yakubova 2023-08-24 12:39:11 +03:00
parent 70405a0bf7
commit 87e9c42495
6 changed files with 857 additions and 400 deletions

View File

@ -32,10 +32,10 @@ foreach (tgt ${BOLT_TARGETS_TO_BUILD})
endforeach()
set(BOLT_ENABLE_RUNTIME_default OFF)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"
if ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"
OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
AND (CMAKE_SYSTEM_NAME STREQUAL "Linux"
OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")
AND "X86" IN_LIST BOLT_TARGETS_TO_BUILD)
OR CMAKE_SYSTEM_NAME STREQUAL "Darwin"))
set(BOLT_ENABLE_RUNTIME_default ON)
endif()
option(BOLT_ENABLE_RUNTIME "Enable BOLT runtime" ${BOLT_ENABLE_RUNTIME_default})

View File

@ -27,8 +27,11 @@ set(BOLT_RT_FLAGS
-fno-exceptions
-fno-rtti
-fno-stack-protector
-mno-sse
-fPIC)
-fPIC
-mgeneral-regs-only)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS} "-mno-sse")
endif()
# Don't let the compiler think it can create calls to standard libs
target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS})
@ -39,7 +42,7 @@ target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
add_library(bolt_rt_instr_osx STATIC
instr.cpp
${CMAKE_CURRENT_BINARY_DIR}/config.h

View File

@ -6,10 +6,6 @@
//
//===----------------------------------------------------------------------===//
#if !defined(__x86_64__)
#error "For x86_64 only"
#endif
#if defined(__linux__)
#include <cstddef>
@ -44,44 +40,6 @@ typedef int int32_t;
#error "For Linux or MacOS only"
#endif
// Save all registers while keeping 16B stack alignment
#define SAVE_ALL \
"push %%rax\n" \
"push %%rbx\n" \
"push %%rcx\n" \
"push %%rdx\n" \
"push %%rdi\n" \
"push %%rsi\n" \
"push %%rbp\n" \
"push %%r8\n" \
"push %%r9\n" \
"push %%r10\n" \
"push %%r11\n" \
"push %%r12\n" \
"push %%r13\n" \
"push %%r14\n" \
"push %%r15\n" \
"sub $8, %%rsp\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
"add $8, %%rsp\n" \
"pop %%r15\n" \
"pop %%r14\n" \
"pop %%r13\n" \
"pop %%r12\n" \
"pop %%r11\n" \
"pop %%r10\n" \
"pop %%r9\n" \
"pop %%r8\n" \
"pop %%rbp\n" \
"pop %%rsi\n" \
"pop %%rdi\n" \
"pop %%rdx\n" \
"pop %%rcx\n" \
"pop %%rbx\n" \
"pop %%rax\n"
#define PROT_READ 0x1 /* Page can be read. */
#define PROT_WRITE 0x2 /* Page can be written. */
#define PROT_EXEC 0x4 /* Page can be executed. */
@ -165,142 +123,42 @@ int memcmp(const void *s1, const void *s2, size_t n) {
// Anonymous namespace covering everything but our library entry point
namespace {
// Get the difference between runtime addrress of .text section and
// static address in section header table. Can be extracted from arbitrary
// pc value recorded at runtime to get the corresponding static address, which
// in turn can be used to search for indirect call description. Needed because
// indirect call descriptions are read-only non-relocatable data.
uint64_t getTextBaseAddress() {
uint64_t DynAddr;
uint64_t StaticAddr;
__asm__ volatile("leaq __hot_end(%%rip), %0\n\t"
"movabsq $__hot_end, %1\n\t"
: "=r"(DynAddr), "=r"(StaticAddr));
return DynAddr - StaticAddr;
}
struct dirent64 {
uint64_t d_ino; /* Inode number */
int64_t d_off; /* Offset to next linux_dirent */
unsigned short d_reclen; /* Length of this linux_dirent */
unsigned char d_type;
char d_name[]; /* Filename (null-terminated) */
/* length is actually (d_reclen - 2 -
offsetof(struct linux_dirent, d_name)) */
};
/* Length of the entries in `struct utsname' is 65. */
#define _UTSNAME_LENGTH 65
struct UtsNameTy {
char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */
char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined
network" */
char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */
char version[_UTSNAME_LENGTH]; /* Operating system version */
char machine[_UTSNAME_LENGTH]; /* Hardware identifier */
char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */
};
struct timespec {
uint64_t tv_sec; /* seconds */
uint64_t tv_nsec; /* nanoseconds */
};
#if defined(__aarch64__)
#include "sys_aarch64.h"
#else
#include "sys_x86_64.h"
#endif
constexpr uint32_t BufSize = 10240;
#define _STRINGIFY(x) #x
#define STRINGIFY(x) _STRINGIFY(x)
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
#define READ_SYSCALL 0x2000003
#else
#define READ_SYSCALL 0
#endif
__asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(buf), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
#define WRITE_SYSCALL 0x2000004
#else
#define WRITE_SYSCALL 1
#endif
__asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(buf), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
uint64_t fd, uint64_t offset) {
#if defined(__APPLE__)
#define MMAP_SYSCALL 0x20000c5
#else
#define MMAP_SYSCALL 9
#endif
void *ret;
register uint64_t r8 asm("r8") = fd;
register uint64_t r9 asm("r9") = offset;
register uint64_t r10 asm("r10") = flags;
__asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8),
"r"(r9)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __munmap(void *addr, uint64_t size) {
#if defined(__APPLE__)
#define MUNMAP_SYSCALL 0x2000049
#else
#define MUNMAP_SYSCALL 11
#endif
uint64_t ret;
__asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(size)
: "cc", "rcx", "r11", "memory");
return ret;
}
#define SIG_BLOCK 0
#define SIG_UNBLOCK 1
#define SIG_SETMASK 2
static const uint64_t MaskAllSignals[] = {-1ULL};
uint64_t __sigprocmask(int how, const void *set, void *oldset) {
#if defined(__APPLE__)
#define SIGPROCMASK_SYSCALL 0x2000030
#else
#define SIGPROCMASK_SYSCALL 14
#endif
uint64_t ret;
register long r10 asm("r10") = sizeof(uint64_t);
__asm__ __volatile__("movq $" STRINGIFY(SIGPROCMASK_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(how), "S"(set), "d"(oldset), "r"(r10)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getpid() {
uint64_t ret;
#if defined(__APPLE__)
#define GETPID_SYSCALL 20
#else
#define GETPID_SYSCALL 39
#endif
__asm__ __volatile__("movq $" STRINGIFY(GETPID_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __exit(uint64_t code) {
#if defined(__APPLE__)
#define EXIT_SYSCALL 0x2000001
#else
#define EXIT_SYSCALL 231
#endif
uint64_t ret;
__asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(code)
: "cc", "rcx", "r11", "memory");
return ret;
}
// Helper functions for writing strings to the .fdata file. We intentionally
// avoid using libc names to make it clear it is our impl.
@ -415,219 +273,6 @@ static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) {
return false;
}
#if !defined(__APPLE__)
// We use a stack-allocated buffer for string manipulation in many pieces of
// this code, including the code that prints each line of the fdata file. This
// buffer needs to accomodate large function names, but shouldn't be arbitrarily
// large (dynamically allocated) for simplicity of our memory space usage.
// Declare some syscall wrappers we use throughout this code to avoid linking
// against system libc.
uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
uint64_t ret;
__asm__ __volatile__("movq $2, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(pathname), "S"(flags), "d"(mode)
: "cc", "rcx", "r11", "memory");
return ret;
}
struct dirent {
unsigned long d_ino; /* Inode number */
unsigned long d_off; /* Offset to next linux_dirent */
unsigned short d_reclen; /* Length of this linux_dirent */
char d_name[]; /* Filename (null-terminated) */
/* length is actually (d_reclen - 2 -
offsetof(struct linux_dirent, d_name)) */
};
long __getdents(unsigned int fd, dirent *dirp, size_t count) {
long ret;
__asm__ __volatile__("movq $78, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(fd), "S"(dirp), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
uint64_t ret;
__asm__ __volatile__("movq $89, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(pathname), "S"(buf), "d"(bufsize)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
uint64_t ret;
__asm__ __volatile__("movq $8, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(pos), "d"(whence)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __ftruncate(uint64_t fd, uint64_t length) {
int ret;
__asm__ __volatile__("movq $77, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(length)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __close(uint64_t fd) {
uint64_t ret;
__asm__ __volatile__("movq $3, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __madvise(void *addr, size_t length, int advice) {
int ret;
__asm__ __volatile__("movq $28, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(length), "d"(advice)
: "cc", "rcx", "r11", "memory");
return ret;
}
#define _UTSNAME_LENGTH 65
struct UtsNameTy {
char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */
char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined
network" */
char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */
char version[_UTSNAME_LENGTH]; /* Operating system version */
char machine[_UTSNAME_LENGTH]; /* Hardware identifier */
char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */
};
int __uname(struct UtsNameTy *Buf) {
int Ret;
__asm__ __volatile__("movq $63, %%rax\n"
"syscall\n"
: "=a"(Ret)
: "D"(Buf)
: "cc", "rcx", "r11", "memory");
return Ret;
}
struct timespec {
uint64_t tv_sec; /* seconds */
uint64_t tv_nsec; /* nanoseconds */
};
uint64_t __nanosleep(const timespec *req, timespec *rem) {
uint64_t ret;
__asm__ __volatile__("movq $35, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(req), "S"(rem)
: "cc", "rcx", "r11", "memory");
return ret;
}
int64_t __fork() {
uint64_t ret;
__asm__ __volatile__("movq $57, %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
int __mprotect(void *addr, size_t len, int prot) {
int ret;
__asm__ __volatile__("movq $10, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(len), "d"(prot)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getppid() {
uint64_t ret;
__asm__ __volatile__("movq $110, %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
int __setpgid(uint64_t pid, uint64_t pgid) {
int ret;
__asm__ __volatile__("movq $109, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid), "S"(pgid)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getpgid(uint64_t pid) {
uint64_t ret;
__asm__ __volatile__("movq $121, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __kill(uint64_t pid, int sig) {
int ret;
__asm__ __volatile__("movq $62, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid), "S"(sig)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __fsync(int fd) {
int ret;
__asm__ __volatile__("movq $74, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd)
: "cc", "rcx", "r11", "memory");
return ret;
}
// %rdi %rsi %rdx %r10 %r8
// sys_prctl int option unsigned unsigned unsigned unsigned
// long arg2 long arg3 long arg4 long arg5
int __prctl(int Option, unsigned long Arg2, unsigned long Arg3,
unsigned long Arg4, unsigned long Arg5) {
int Ret;
register long rdx asm("rdx") = Arg3;
register long r8 asm("r8") = Arg5;
register long r10 asm("r10") = Arg4;
__asm__ __volatile__("movq $157, %%rax\n"
"syscall\n"
: "=a"(Ret)
: "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8)
:);
return Ret;
}
#endif
void reportError(const char *Msg, uint64_t Size) {
__write(2, Msg, Size);
__exit(1);
@ -644,6 +289,12 @@ void assert(bool Assertion, const char *Msg) {
reportError(Buf, Ptr - Buf);
}
#define SIG_BLOCK 0
#define SIG_UNBLOCK 1
#define SIG_SETMASK 2
static const uint64_t MaskAllSignals[] = {-1ULL};
class Mutex {
volatile bool InUse{false};

View File

@ -40,7 +40,6 @@
//
//===----------------------------------------------------------------------===//
#if defined (__x86_64__)
#include "common.h"
// Enables a very verbose logging to stderr useful when debugging
@ -695,12 +694,12 @@ static char *getBinaryPath() {
assert(static_cast<int64_t>(FDdir) >= 0,
"failed to open /proc/self/map_files");
while (long Nread = __getdents(FDdir, (struct dirent *)Buf, BufSize)) {
while (long Nread = __getdents64(FDdir, (struct dirent64 *)Buf, BufSize)) {
assert(static_cast<int64_t>(Nread) != -1, "failed to get folder entries");
struct dirent *d;
struct dirent64 *d;
for (long Bpos = 0; Bpos < Nread; Bpos += d->d_reclen) {
d = (struct dirent *)(Buf + Bpos);
d = (struct dirent64 *)(Buf + Bpos);
uint64_t StartAddress, EndAddress;
if (!parseAddressRange(d->d_name, StartAddress, EndAddress))
@ -1668,6 +1667,17 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
/// as well as the target address for the call
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
{
#if defined(__aarch64__)
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
:::);
// clang-format on
#else
// clang-format off
__asm__ __volatile__(SAVE_ALL
"mov 0xa0(%%rsp), %%rdi\n"
"mov 0x98(%%rsp), %%rsi\n"
@ -1675,10 +1685,23 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
RESTORE_ALL
"ret\n"
:::);
// clang-format on
#endif
}
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
{
#if defined(__aarch64__)
// clang-format off
__asm__ __volatile__(SAVE_ALL
"ldp x0, x1, [sp, #288]\n"
"bl instrumentIndirectCall\n"
RESTORE_ALL
"ret\n"
:::);
// clang-format on
#else
// clang-format off
__asm__ __volatile__(SAVE_ALL
"mov 0x98(%%rsp), %%rdi\n"
"mov 0x90(%%rsp), %%rsi\n"
@ -1686,21 +1709,48 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
RESTORE_ALL
"ret\n"
:::);
// clang-format on
#endif
}
/// This is hooking ELF's entry, it needs to save all machine state.
extern "C" __attribute((naked)) void __bolt_instr_start()
{
#if defined(__aarch64__)
// clang-format off
__asm__ __volatile__(SAVE_ALL
"bl __bolt_instr_setup\n"
RESTORE_ALL
"adrp x16, __bolt_start_trampoline\n"
"add x16, x16, #:lo12:__bolt_start_trampoline\n"
"br x16\n"
:::);
// clang-format on
#else
// clang-format off
__asm__ __volatile__(SAVE_ALL
"call __bolt_instr_setup\n"
RESTORE_ALL
"jmp __bolt_start_trampoline\n"
:::);
// clang-format on
#endif
}
/// This is hooking into ELF's DT_FINI
extern "C" void __bolt_instr_fini() {
__bolt_fini_trampoline();
#if defined(__aarch64__)
// clang-format off
__asm__ __volatile__(SAVE_ALL
"adrp x16, __bolt_fini_trampoline\n"
"add x16, x16, #:lo12:__bolt_fini_trampoline\n"
"blr x16\n"
RESTORE_ALL
:::);
// clang-format on
#else
__asm__ __volatile__("call __bolt_fini_trampoline\n" :::);
#endif
if (__bolt_instr_sleep_time == 0) {
int FD = openProfile();
__bolt_instr_data_dump(FD);
@ -1752,4 +1802,3 @@ void _bolt_instr_fini() {
}
#endif
#endif

394
bolt/runtime/sys_aarch64.h Normal file
View File

@ -0,0 +1,394 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64
#define LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64
// Save all registers while keeping 16B stack alignment
#define SAVE_ALL \
"stp x0, x1, [sp, #-16]!\n" \
"stp x2, x3, [sp, #-16]!\n" \
"stp x4, x5, [sp, #-16]!\n" \
"stp x6, x7, [sp, #-16]!\n" \
"stp x8, x9, [sp, #-16]!\n" \
"stp x10, x11, [sp, #-16]!\n" \
"stp x12, x13, [sp, #-16]!\n" \
"stp x14, x15, [sp, #-16]!\n" \
"stp x16, x17, [sp, #-16]!\n" \
"stp x18, x19, [sp, #-16]!\n" \
"stp x20, x21, [sp, #-16]!\n" \
"stp x22, x23, [sp, #-16]!\n" \
"stp x24, x25, [sp, #-16]!\n" \
"stp x26, x27, [sp, #-16]!\n" \
"stp x28, x29, [sp, #-16]!\n" \
"str x30, [sp,#-16]!\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
"ldr x30, [sp], #16\n" \
"ldp x28, x29, [sp], #16\n" \
"ldp x26, x27, [sp], #16\n" \
"ldp x24, x25, [sp], #16\n" \
"ldp x22, x23, [sp], #16\n" \
"ldp x20, x21, [sp], #16\n" \
"ldp x18, x19, [sp], #16\n" \
"ldp x16, x17, [sp], #16\n" \
"ldp x14, x15, [sp], #16\n" \
"ldp x12, x13, [sp], #16\n" \
"ldp x10, x11, [sp], #16\n" \
"ldp x8, x9, [sp], #16\n" \
"ldp x6, x7, [sp], #16\n" \
"ldp x4, x5, [sp], #16\n" \
"ldp x2, x3, [sp], #16\n" \
"ldp x0, x1, [sp], #16\n"
// Anonymous namespace covering everything but our library entry point
namespace {
// Get the difference between runtime addrress of .text section and
// static address in section header table. Can be extracted from arbitrary
// pc value recorded at runtime to get the corresponding static address, which
// in turn can be used to search for indirect call description. Needed because
// indirect call descriptions are read-only non-relocatable data.
uint64_t getTextBaseAddress() {
uint64_t DynAddr;
uint64_t StaticAddr;
__asm__ volatile("b .instr%=\n\t"
".StaticAddr%=:\n\t"
".dword __hot_end\n\t"
".instr%=:\n\t"
"ldr %0, .StaticAddr%=\n\t"
"adrp %1, __hot_end\n\t"
"add %1, %1, :lo12:__hot_end\n\t"
: "=r"(StaticAddr), "=r"(DynAddr));
return DynAddr - StaticAddr;
}
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
register uint64_t x0 __asm__("x0") = fd;
register const void *x1 __asm__("x1") = buf;
register uint64_t x2 __asm__("x2") = count;
register uint32_t w8 __asm__("w8") = 63;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
register uint64_t x0 __asm__("x0") = fd;
register const void *x1 __asm__("x1") = buf;
register uint64_t x2 __asm__("x2") = count;
register uint32_t w8 __asm__("w8") = 64;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
uint64_t fd, uint64_t offset) {
void *ret;
register uint64_t x0 __asm__("x0") = addr;
register uint64_t x1 __asm__("x1") = size;
register uint64_t x2 __asm__("x2") = prot;
register uint64_t x3 __asm__("x3") = flags;
register uint64_t x4 __asm__("x4") = fd;
register uint64_t x5 __asm__("x5") = offset;
register uint32_t w8 __asm__("w8") = 222;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __munmap(void *addr, uint64_t size) {
uint64_t ret;
register void *x0 __asm__("x0") = addr;
register uint64_t x1 __asm__("x1") = size;
register uint32_t w8 __asm__("w8") = 215;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __exit(uint64_t code) {
uint64_t ret;
register uint64_t x0 __asm__("x0") = code;
register uint32_t w8 __asm__("w8") = 94;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0)
: "r"(w8)
: "cc", "memory", "x1");
return ret;
}
uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
uint64_t ret;
register int x0 __asm__("x0") = -100;
register const char *x1 __asm__("x1") = pathname;
register uint64_t x2 __asm__("x2") = flags;
register uint64_t x3 __asm__("x3") = mode;
register uint32_t w8 __asm__("w8") = 56;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(w8)
: "cc", "memory");
return ret;
}
long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) {
long ret;
register unsigned int x0 __asm__("x0") = fd;
register dirent64 *x1 __asm__("x1") = dirp;
register size_t x2 __asm__("x2") = count;
register uint32_t w8 __asm__("w8") = 61;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
uint64_t ret;
register int x0 __asm__("x0") = -100;
register const char *x1 __asm__("x1") = pathname;
register char *x2 __asm__("x2") = buf;
register size_t x3 __asm__("x3") = bufsize;
register uint32_t w8 __asm__("w8") = 78; // readlinkat
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
uint64_t ret;
register uint64_t x0 __asm__("x0") = fd;
register uint64_t x1 __asm__("x1") = pos;
register uint64_t x2 __asm__("x2") = whence;
register uint32_t w8 __asm__("w8") = 62;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
int __ftruncate(uint64_t fd, uint64_t length) {
int ret;
register uint64_t x0 __asm__("x0") = fd;
register uint64_t x1 __asm__("x1") = length;
register uint32_t w8 __asm__("w8") = 46;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(w8)
: "cc", "memory");
return ret;
}
int __close(uint64_t fd) {
int ret;
register uint64_t x0 __asm__("x0") = fd;
register uint32_t w8 __asm__("w8") = 57;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0)
: "r"(w8)
: "cc", "memory", "x1");
return ret;
}
int __madvise(void *addr, size_t length, int advice) {
int ret;
register void *x0 __asm__("x0") = addr;
register size_t x1 __asm__("x1") = length;
register int x2 __asm__("x2") = advice;
register uint32_t w8 __asm__("w8") = 233;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
int __uname(struct UtsNameTy *buf) {
int ret;
register UtsNameTy *x0 __asm__("x0") = buf;
register uint32_t w8 __asm__("w8") = 160;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0)
: "r"(w8)
: "cc", "memory", "x1");
return ret;
}
uint64_t __nanosleep(const timespec *req, timespec *rem) {
uint64_t ret;
register const timespec *x0 __asm__("x0") = req;
register timespec *x1 __asm__("x1") = rem;
register uint32_t w8 __asm__("w8") = 101;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(w8)
: "cc", "memory");
return ret;
}
int64_t __fork() {
uint64_t ret;
// clone instead of fork with flags
// "CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD"
register uint64_t x0 __asm__("x0") = 0x1200011;
register uint64_t x1 __asm__("x1") = 0;
register uint64_t x2 __asm__("x2") = 0;
register uint64_t x3 __asm__("x3") = 0;
register uint64_t x4 __asm__("x4") = 0;
register uint32_t w8 __asm__("w8") = 220;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(x4), "r"(w8)
: "cc", "memory");
return ret;
}
int __mprotect(void *addr, size_t len, int prot) {
int ret;
register void *x0 __asm__("x0") = addr;
register size_t x1 __asm__("x1") = len;
register int x2 __asm__("x2") = prot;
register uint32_t w8 __asm__("w8") = 226;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __getpid() {
uint64_t ret;
register uint32_t w8 __asm__("w8") = 172;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret)
: "r"(w8)
: "cc", "memory", "x0", "x1");
return ret;
}
uint64_t __getppid() {
uint64_t ret;
register uint32_t w8 __asm__("w8") = 173;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret)
: "r"(w8)
: "cc", "memory", "x0", "x1");
return ret;
}
int __setpgid(uint64_t pid, uint64_t pgid) {
int ret;
register uint64_t x0 __asm__("x0") = pid;
register uint64_t x1 __asm__("x1") = pgid;
register uint32_t w8 __asm__("w8") = 154;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(w8)
: "cc", "memory");
return ret;
}
uint64_t __getpgid(uint64_t pid) {
uint64_t ret;
register uint64_t x0 __asm__("x0") = pid;
register uint32_t w8 __asm__("w8") = 155;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0)
: "r"(w8)
: "cc", "memory", "x1");
return ret;
}
int __kill(uint64_t pid, int sig) {
int ret;
register uint64_t x0 __asm__("x0") = pid;
register int x1 __asm__("x1") = sig;
register uint32_t w8 __asm__("w8") = 129;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(w8)
: "cc", "memory");
return ret;
}
int __fsync(int fd) {
int ret;
register int x0 __asm__("x0") = fd;
register uint32_t w8 __asm__("w8") = 82;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0)
: "r"(w8)
: "cc", "memory", "x1");
return ret;
}
uint64_t __sigprocmask(int how, const void *set, void *oldset) {
uint64_t ret;
register int x0 __asm__("x0") = how;
register const void *x1 __asm__("x1") = set;
register void *x2 __asm__("x2") = oldset;
register long x3 asm("x3") = 8;
register uint32_t w8 __asm__("w8") = 135;
__asm__ __volatile__("svc #0\n"
"mov %0, x0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(w8)
: "cc", "memory");
return ret;
}
int __prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5) {
int ret;
register int x0 __asm__("x0") = option;
register unsigned long x1 __asm__("x1") = arg2;
register unsigned long x2 __asm__("x2") = arg3;
register unsigned long x3 __asm__("x3") = arg4;
register unsigned long x4 __asm__("x4") = arg5;
register uint32_t w8 __asm__("w8") = 167;
__asm__ __volatile__("svc #0\n"
"mov %w0, w0"
: "=r"(ret), "+r"(x0), "+r"(x1)
: "r"(x2), "r"(x3), "r"(x4), "r"(w8)
: "cc", "memory");
return ret;
}
} // anonymous namespace
#endif

360
bolt/runtime/sys_x86_64.h Normal file
View File

@ -0,0 +1,360 @@
#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_X86_64
#define LLVM_TOOLS_LLVM_BOLT_SYS_X86_64
// Save all registers while keeping 16B stack alignment
#define SAVE_ALL \
"push %%rax\n" \
"push %%rbx\n" \
"push %%rcx\n" \
"push %%rdx\n" \
"push %%rdi\n" \
"push %%rsi\n" \
"push %%rbp\n" \
"push %%r8\n" \
"push %%r9\n" \
"push %%r10\n" \
"push %%r11\n" \
"push %%r12\n" \
"push %%r13\n" \
"push %%r14\n" \
"push %%r15\n" \
"sub $8, %%rsp\n"
// Mirrors SAVE_ALL
#define RESTORE_ALL \
"add $8, %%rsp\n" \
"pop %%r15\n" \
"pop %%r14\n" \
"pop %%r13\n" \
"pop %%r12\n" \
"pop %%r11\n" \
"pop %%r10\n" \
"pop %%r9\n" \
"pop %%r8\n" \
"pop %%rbp\n" \
"pop %%rsi\n" \
"pop %%rdi\n" \
"pop %%rdx\n" \
"pop %%rcx\n" \
"pop %%rbx\n" \
"pop %%rax\n"
namespace {
// Get the difference between runtime addrress of .text section and
// static address in section header table. Can be extracted from arbitrary
// pc value recorded at runtime to get the corresponding static address, which
// in turn can be used to search for indirect call description. Needed because
// indirect call descriptions are read-only non-relocatable data.
uint64_t getTextBaseAddress() {
uint64_t DynAddr;
uint64_t StaticAddr;
__asm__ volatile("leaq __hot_end(%%rip), %0\n\t"
"movabsq $__hot_end, %1\n\t"
: "=r"(DynAddr), "=r"(StaticAddr));
return DynAddr - StaticAddr;
}
#define _STRINGIFY(x) #x
#define STRINGIFY(x) _STRINGIFY(x)
uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
#define READ_SYSCALL 0x2000003
#else
#define READ_SYSCALL 0
#endif
__asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(buf), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
uint64_t ret;
#if defined(__APPLE__)
#define WRITE_SYSCALL 0x2000004
#else
#define WRITE_SYSCALL 1
#endif
__asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(buf), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
uint64_t fd, uint64_t offset) {
#if defined(__APPLE__)
#define MMAP_SYSCALL 0x20000c5
#else
#define MMAP_SYSCALL 9
#endif
void *ret;
register uint64_t r8 asm("r8") = fd;
register uint64_t r9 asm("r9") = offset;
register uint64_t r10 asm("r10") = flags;
__asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8),
"r"(r9)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __munmap(void *addr, uint64_t size) {
#if defined(__APPLE__)
#define MUNMAP_SYSCALL 0x2000049
#else
#define MUNMAP_SYSCALL 11
#endif
uint64_t ret;
__asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(size)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __sigprocmask(int how, const void *set, void *oldset) {
#if defined(__APPLE__)
#define SIGPROCMASK_SYSCALL 0x2000030
#else
#define SIGPROCMASK_SYSCALL 14
#endif
uint64_t ret;
register long r10 asm("r10") = sizeof(uint64_t);
__asm__ __volatile__("movq $" STRINGIFY(SIGPROCMASK_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(how), "S"(set), "d"(oldset), "r"(r10)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getpid() {
uint64_t ret;
#if defined(__APPLE__)
#define GETPID_SYSCALL 20
#else
#define GETPID_SYSCALL 39
#endif
__asm__ __volatile__("movq $" STRINGIFY(GETPID_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __exit(uint64_t code) {
#if defined(__APPLE__)
#define EXIT_SYSCALL 0x2000001
#else
#define EXIT_SYSCALL 231
#endif
uint64_t ret;
__asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(code)
: "cc", "rcx", "r11", "memory");
return ret;
}
#if !defined(__APPLE__)
// We use a stack-allocated buffer for string manipulation in many pieces of
// this code, including the code that prints each line of the fdata file. This
// buffer needs to accomodate large function names, but shouldn't be arbitrarily
// large (dynamically allocated) for simplicity of our memory space usage.
// Declare some syscall wrappers we use throughout this code to avoid linking
// against system libc.
uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
uint64_t ret;
__asm__ __volatile__("movq $2, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(pathname), "S"(flags), "d"(mode)
: "cc", "rcx", "r11", "memory");
return ret;
}
long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) {
long ret;
__asm__ __volatile__("movq $217, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(fd), "S"(dirp), "d"(count)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
uint64_t ret;
__asm__ __volatile__("movq $89, %%rax\n"
"syscall"
: "=a"(ret)
: "D"(pathname), "S"(buf), "d"(bufsize)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
uint64_t ret;
__asm__ __volatile__("movq $8, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(pos), "d"(whence)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __ftruncate(uint64_t fd, uint64_t length) {
int ret;
__asm__ __volatile__("movq $77, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd), "S"(length)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __close(uint64_t fd) {
uint64_t ret;
__asm__ __volatile__("movq $3, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __madvise(void *addr, size_t length, int advice) {
int ret;
__asm__ __volatile__("movq $28, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(length), "d"(advice)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __uname(struct UtsNameTy *Buf) {
int Ret;
__asm__ __volatile__("movq $63, %%rax\n"
"syscall\n"
: "=a"(Ret)
: "D"(Buf)
: "cc", "rcx", "r11", "memory");
return Ret;
}
uint64_t __nanosleep(const timespec *req, timespec *rem) {
uint64_t ret;
__asm__ __volatile__("movq $35, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(req), "S"(rem)
: "cc", "rcx", "r11", "memory");
return ret;
}
int64_t __fork() {
uint64_t ret;
__asm__ __volatile__("movq $57, %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
int __mprotect(void *addr, size_t len, int prot) {
int ret;
__asm__ __volatile__("movq $10, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(addr), "S"(len), "d"(prot)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getppid() {
uint64_t ret;
__asm__ __volatile__("movq $110, %%rax\n"
"syscall\n"
: "=a"(ret)
:
: "cc", "rcx", "r11", "memory");
return ret;
}
int __setpgid(uint64_t pid, uint64_t pgid) {
int ret;
__asm__ __volatile__("movq $109, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid), "S"(pgid)
: "cc", "rcx", "r11", "memory");
return ret;
}
uint64_t __getpgid(uint64_t pid) {
uint64_t ret;
__asm__ __volatile__("movq $121, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __kill(uint64_t pid, int sig) {
int ret;
__asm__ __volatile__("movq $62, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(pid), "S"(sig)
: "cc", "rcx", "r11", "memory");
return ret;
}
int __fsync(int fd) {
int ret;
__asm__ __volatile__("movq $74, %%rax\n"
"syscall\n"
: "=a"(ret)
: "D"(fd)
: "cc", "rcx", "r11", "memory");
return ret;
}
// %rdi %rsi %rdx %r10 %r8
// sys_prctl int option unsigned unsigned unsigned unsigned
// long arg2 long arg3 long arg4 long arg5
int __prctl(int Option, unsigned long Arg2, unsigned long Arg3,
unsigned long Arg4, unsigned long Arg5) {
int Ret;
register long rdx asm("rdx") = Arg3;
register long r8 asm("r8") = Arg5;
register long r10 asm("r10") = Arg4;
__asm__ __volatile__("movq $157, %%rax\n"
"syscall\n"
: "=a"(Ret)
: "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8)
:);
return Ret;
}
#endif
} // anonymous namespace
#endif