From c6799a689df719ca7ceee53774691bf6ac839157 Mon Sep 17 00:00:00 2001 From: Rafael Auler Date: Mon, 27 Jul 2020 16:52:51 -0700 Subject: [PATCH] [BOLT] Fix stack alignment for runtime lib Summary: Right now, the SAVE_ALL sequence executed upon entry of both of our runtime libs (hugify and instrumentation) will cause the stack to not be aligned at a 16B boundary because it saves 15 8-byte regs. Change the code sequence to adjust for that. The compiler may generate code that assumes the stack is aligned by using movaps instructions, which will crash. (cherry picked from FBD22744307) --- bolt/runtime/common.h | 6 +++++- bolt/runtime/hugify.cpp | 8 +++++--- bolt/runtime/instr.cpp | 8 ++++---- bolt/test/X86/user-func-reorder.c | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 816ad87ab042..d6f240469739 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -6,6 +6,7 @@ #include #endif +// Save all registers while keeping 16B stack alignment #define SAVE_ALL \ "push %%rax\n" \ "push %%rbx\n" \ @@ -21,9 +22,12 @@ "push %%r12\n" \ "push %%r13\n" \ "push %%r14\n" \ - "push %%r15\n" + "push %%r15\n" \ + "sub $8, %%rsp\n" +// Mirrors SAVE_ALL #define RESTORE_ALL \ + "add $8, %%rsp\n" \ "pop %%r15\n" \ "pop %%r14\n" \ "pop %%r13\n" \ diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index 09468a27e7a9..0a770a144d80 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -166,7 +166,9 @@ extern "C" void __bolt_hugify_self_impl() { /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { - __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL - "jmp *__bolt_hugify_init_ptr(%%rip)\n" :: - :); + __asm__ __volatile__(SAVE_ALL + "call __bolt_hugify_self_impl\n" + RESTORE_ALL + "jmp *__bolt_hugify_init_ptr(%%rip)\n" + :::); } diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index bf1fad8f0d04..c1401bcf0a25 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -1419,8 +1419,8 @@ extern "C" void instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) { extern "C" __attribute((naked)) void __bolt_instr_indirect_call() { __asm__ __volatile__(SAVE_ALL - "mov 0x88(%%rsp), %%rdi\n" - "mov 0x80(%%rsp), %%rsi\n" + "mov 0x90(%%rsp), %%rdi\n" + "mov 0x88(%%rsp), %%rsi\n" "call instrumentIndirectCall\n" RESTORE_ALL "pop %%rdi\n" @@ -1433,8 +1433,8 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call() extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall() { __asm__ __volatile__(SAVE_ALL - "mov 0x80(%%rsp), %%rdi\n" - "mov 0x78(%%rsp), %%rsi\n" + "mov 0x88(%%rsp), %%rdi\n" + "mov 0x80(%%rsp), %%rsi\n" "call instrumentIndirectCall\n" RESTORE_ALL "add $16, %%rsp\n" diff --git a/bolt/test/X86/user-func-reorder.c b/bolt/test/X86/user-func-reorder.c index d39eda56bf0d..580f5aa662dc 100644 --- a/bolt/test/X86/user-func-reorder.c +++ b/bolt/test/X86/user-func-reorder.c @@ -30,7 +30,7 @@ REQUIRES: system-linux RUN: %host_cc %s -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe -relocs=1 -lite -reorder-functions=user \ -RUN: -function-order=%p/Inputs/user_func_order.txt -o %t +RUN: -hugify -function-order=%p/Inputs/user_func_order.txt -o %t RUN: nm -ns %t | FileCheck %s -check-prefix=CHECK-NM RUN: %t 1 2 3 | FileCheck %s -check-prefix=CHECK-OUTPUT