From 4cd16f2a0ccbcd3421a787260cf873353bf9292e Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Mon, 10 Nov 2025 09:50:44 -0800 Subject: [PATCH] [BOLT][AArch64] Add more heuristics on epilogue determination (#167077) Add more heuristics to check if a basic block is an AArch64 epilogue. We assume instructions that load from stack or adjust stack pointer as valid epilogue code sequence if and only if they immediately precede the branch instruction that ends the basic block. --- bolt/include/bolt/Core/MCPlusBuilder.h | 5 ++ bolt/lib/Core/BinaryFunction.cpp | 11 ++--- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 46 +++++++++++++++++- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 6 +++ bolt/test/AArch64/epilogue-determination.s | 48 +++++++++++++++++++ 5 files changed, 107 insertions(+), 9 deletions(-) create mode 100644 bolt/test/AArch64/epilogue-determination.s diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 295558c0d3af..69ae4fb8ddcc 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -784,6 +784,11 @@ public: virtual bool isPop(const MCInst &Inst) const { return false; } + /// Determine if a basic block looks like an epilogue. For now it is only + /// called at the final stage of building CFG to check basic block ending + /// with an indirect call that has unknown control flow attribute. + virtual bool isEpilogue(const BinaryBasicBlock &BB) const { return false; } + /// Return true if the instruction is used to terminate an indirect branch. virtual bool isTerminateBranch(const MCInst &Inst) const { llvm_unreachable("not implemented"); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 2fd1ab13008b..a0d8385aa382 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2167,13 +2167,10 @@ bool BinaryFunction::postProcessIndirectBranches( continue; } - // If this block contains an epilogue code and has an indirect branch, - // then most likely it's a tail call. Otherwise, we cannot tell for sure - // what it is and conservatively reject the function's CFG. - bool IsEpilogue = llvm::any_of(BB, [&](const MCInst &Instr) { - return BC.MIB->isLeave(Instr) || BC.MIB->isPop(Instr); - }); - if (IsEpilogue) { + // If this block contains epilogue code and has an indirect branch, + // then most likely it's a tail call. Otherwise, we cannot tell for + // sure what it is and conservatively reject the function's CFG. + if (BC.MIB->isEpilogue(BB)) { BC.MIB->convertJmpToTailCall(Instr); BB.removeAllSuccessors(); continue; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 3c77091d91eb..db3989d6b0b5 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -164,11 +164,53 @@ public: bool isPush(const MCInst &Inst) const override { return isStoreToStack(Inst); - }; + } bool isPop(const MCInst &Inst) const override { return isLoadFromStack(Inst); - }; + } + + // We look for instructions that load from stack or make stack pointer + // adjustment, and assume the basic block is an epilogue if and only if + // such instructions are present and also immediately precede the branch + // instruction that ends the basic block. + bool isEpilogue(const BinaryBasicBlock &BB) const override { + if (BB.succ_size()) + return false; + + bool SeenLoadFromStack = false; + bool SeenStackPointerAdjustment = false; + for (const MCInst &Instr : BB) { + // Skip CFI pseudo instruction. + if (isCFI(Instr)) + continue; + + bool IsPop = isPop(Instr); + // A load from stack instruction could do SP adjustment in pre-index or + // post-index form, which we can skip to check for epilogue recognition + // purpose. + bool IsSPAdj = (isADD(Instr) || isMOVW(Instr)) && + Instr.getOperand(0).isReg() && + Instr.getOperand(0).getReg() == AArch64::SP; + SeenLoadFromStack |= IsPop; + SeenStackPointerAdjustment |= IsSPAdj; + + if (!SeenLoadFromStack && !SeenStackPointerAdjustment) + continue; + if (IsPop || IsSPAdj || isPAuthOnLR(Instr)) + continue; + if (isReturn(Instr)) + return true; + if (isBranch(Instr)) + break; + + // Any previously seen load from stack or stack adjustment instruction + // is definitely not part of epilogue code sequence, so reset these two. + SeenLoadFromStack = false; + SeenStackPointerAdjustment = false; + } + return SeenLoadFromStack || SeenStackPointerAdjustment; + } void createCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 5fca5e813515..7c24c2ce136f 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -219,6 +219,12 @@ public: return getPopSize(Inst) == 0 ? false : true; } + bool isEpilogue(const BinaryBasicBlock &BB) const override { + return ::llvm::any_of(BB, [&](const MCInst &Instr) { + return isLeave(Instr) || isPop(Instr); + }); + } + bool isTerminateBranch(const MCInst &Inst) const override { return Inst.getOpcode() == X86::ENDBR32 || Inst.getOpcode() == X86::ENDBR64; } diff --git a/bolt/test/AArch64/epilogue-determination.s b/bolt/test/AArch64/epilogue-determination.s new file mode 100644 index 000000000000..437d8149c0d6 --- /dev/null +++ b/bolt/test/AArch64/epilogue-determination.s @@ -0,0 +1,48 @@ +# Test that we will not incorrectly take the first basic block in function +# `_foo` as epilogue due to the first load from stack instruction. + +# RUN: %clang %cflags %s -o %t.so -Wl,-q +# RUN: llvm-bolt %t.so -o %t.bolt --print-cfg | FileCheck %s + + .text + .global _foo + .type _foo, %function +_foo: + ldr w8, [sp] + adr x10, _jmptbl + ldrsw x9, [x10, x9, lsl #2] + add x10, x10, x9 + br x10 +# CHECK-NOT: x10 # TAILCALL +# CHECK: x10 # UNKNOWN CONTROL FLOW + mov x0, 0 + ret + mov x0, 1 + ret + + .balign 4 +_jmptbl: + .long -16 + .long -8 + + .global _bar + .type _bar, %function +_bar: + stp x29, x30, [sp, #-0x10]! + mov x29, sp + sub sp, sp, #0x10 + ldr x8, [x29, #0x30] + blr x8 + add sp, sp, #0x10 + ldp x29, x30, [sp], #0x10 + br x2 +# CHECK-NOT: x2 # UNKNOWN CONTROL FLOW +# CHECK: x2 # TAILCALL + + .global _start + .type _start, %function +_start: + ret + + # Dummy relocation to force relocation mode + .reloc 0, R_AARCH64_NONE