diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 69ae4fb8ddcc..f543d2e5d79a 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -632,6 +632,12 @@ public: return false; } + /// Generate the matching pointer authentication instruction from a fused + /// pauth-and-return instruction. + virtual void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) { + llvm_unreachable("not implemented"); + } + /// Returns the register used as a return address. Returns std::nullopt if /// not applicable, such as reading the return address from a system register /// or from the stack. diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index 9b28c7efde5b..5a7d02a34b4d 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -195,6 +195,13 @@ InliningInfo getInliningInfo(const BinaryFunction &BF) { if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) continue; + // Pointer signing and authenticatin instructions are used around + // Push and Pop. These are also straightforward to handle. + if (BC.isAArch64() && + (BC.MIB->isPSignOnLR(Inst) || BC.MIB->isPAuthOnLR(Inst) || + BC.MIB->isPAuthAndRet(Inst))) + continue; + DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) || BC.MIB->hasUseOfPhysReg(Inst, SPReg); } @@ -338,6 +345,18 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB, BC.Ctx.get()); } + // Handling fused authentication and return instructions (Armv8.3-A): + // if the Callee does not end in a tailcall, the return will be removed + // from the inlined block. If that return is RETA(A|B), we have to keep + // the authentication part. + // RETAA -> AUTIASP + // RETAB -> AUTIBSP + if (!CSIsTailCall && BC.isAArch64() && BC.MIB->isPAuthAndRet(Inst)) { + MCInst Auth; + BC.MIB->createMatchingAuth(Inst, Auth); + InsertII = + std::next(InlinedBB->insertInstruction(InsertII, std::move(Auth))); + } if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { InsertII = std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst))); diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index db3989d6b0b5..d4fd4b78f382 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -313,6 +313,33 @@ public: Inst.getOpcode() == AArch64::RETABSPPCr; } + void createMatchingAuth(const MCInst &AuthAndRet, MCInst &Auth) override { + Auth.clear(); + Auth.setOperands(AuthAndRet.getOperands()); + switch (AuthAndRet.getOpcode()) { + case AArch64::RETAA: + Auth.setOpcode(AArch64::AUTIASP); + break; + case AArch64::RETAB: + Auth.setOpcode(AArch64::AUTIBSP); + break; + case AArch64::RETAASPPCi: + Auth.setOpcode(AArch64::AUTIASPPCi); + break; + case AArch64::RETABSPPCi: + Auth.setOpcode(AArch64::AUTIBSPPCi); + break; + case AArch64::RETAASPPCr: + Auth.setOpcode(AArch64::AUTIASPPCr); + break; + case AArch64::RETABSPPCr: + Auth.setOpcode(AArch64::AUTIBSPPCr); + break; + default: + llvm_unreachable("Unhandled fused pauth-and-return instruction"); + } + } + std::optional getSignedReg(const MCInst &Inst) const override { switch (Inst.getOpcode()) { case AArch64::PACIA: diff --git a/bolt/test/AArch64/inline-armv8.3-returns.s b/bolt/test/AArch64/inline-armv8.3-returns.s new file mode 100644 index 000000000000..055b589476ca --- /dev/null +++ b/bolt/test/AArch64/inline-armv8.3-returns.s @@ -0,0 +1,45 @@ +# This test checks that inlining functions with fused pointer-auth-and-return +# instructions is properly handled by BOLT. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \ +# RUN: %t.exe -o %t.bolt | FileCheck %s + +# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 8 bytes. +# CHECK: Binary Function "_Z3barP1A" after inlining { +# CHECK-NOT: bl _Z3fooP1A +# CHECK: ldr x8, [x0] +# CHECK-NEXT: ldr w0, [x8] +# CHECK-NEXT: autiasp + + .text + .globl _Z3fooP1A + .type _Z3fooP1A,@function +_Z3fooP1A: + paciasp + ldr x8, [x0] + ldr w0, [x8] + retaa + .size _Z3fooP1A, .-_Z3fooP1A + + .globl _Z3barP1A + .type _Z3barP1A,@function +_Z3barP1A: + stp x29, x30, [sp, #-16]! + mov x29, sp + bl _Z3fooP1A + mul w0, w0, w0 + ldp x29, x30, [sp], #16 + ret + .size _Z3barP1A, .-_Z3barP1A + + .globl main + .p2align 2 + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main diff --git a/bolt/test/AArch64/inline-armv8.3-tailcall.s b/bolt/test/AArch64/inline-armv8.3-tailcall.s new file mode 100644 index 000000000000..78e7285fbf10 --- /dev/null +++ b/bolt/test/AArch64/inline-armv8.3-tailcall.s @@ -0,0 +1,46 @@ +# This test checks that inlining functions with fused pointer-auth-and-return +# instructions into a location with a tailcall is properly handled by BOLT. +# Because _Z3barP1A ends in a tailcall, we don't remove the return instruction +# from the inlined block. Therefore, we should see a retaa, and not an autiasp. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown -mattr=+v8.3a %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \ +# RUN: %t.exe -o %t.bolt | FileCheck %s + +# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 12 bytes. +# CHECK: Binary Function "_Z3barP1A" after inlining { +# CHECK-NOT: bl _Z3fooP1A +# CHECK: mov x29, sp +# CHECK-NEXT: paciasp +# CHECK-NEXT: ldr x8, [x0] +# CHECK-NEXT: ldr w0, [x8] +# CHECK-NEXT: retaa + + .text + .globl _Z3fooP1A + .type _Z3fooP1A,@function +_Z3fooP1A: + paciasp + ldr x8, [x0] + ldr w0, [x8] + retaa + .size _Z3fooP1A, .-_Z3fooP1A + + .globl _Z3barP1A + .type _Z3barP1A,@function +_Z3barP1A: + stp x29, x30, [sp, #-16]! + mov x29, sp + b _Z3fooP1A // tailcall + .size _Z3barP1A, .-_Z3barP1A + + .globl main + .p2align 2 + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main diff --git a/bolt/test/AArch64/inline-pauth-lr.s b/bolt/test/AArch64/inline-pauth-lr.s new file mode 100644 index 000000000000..34f05721d5ea --- /dev/null +++ b/bolt/test/AArch64/inline-pauth-lr.s @@ -0,0 +1,61 @@ +# This test checks that inlining functions with the pauth-lr variants of +# fused pointer-auth-and-return instructions is properly handled by BOLT. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -march=armv9.5-a+pauth-lr -O0 %s -o %t.exe -Wl,-q +# RUN: llvm-bolt --inline-all --print-inline --print-only=_Z3barP1A \ +# RUN: %t.exe -o %t.bolt | FileCheck %s + +# CHECK: BOLT-INFO: inlined 0 calls at 2 call sites in 2 iteration(s). Change in binary size: 16 bytes. +# CHECK: Binary Function "_Z3barP1A" after inlining { +# CHECK-NOT: bl _Z3fooP1A +# CHECK: paciasppc +# CHECK-NEXT: ldr x8, [x0] +# CHECK-NEXT: ldr w0, [x8] +# CHECK-NEXT: autiasppcr x28 +# CHECK-NEXT: paciasppc +# CHECK-NEXT: ldr x7, [x0] +# CHECK-NEXT: ldr w0, [x7] +# CHECK-NEXT: autiasppc _Z3bazP1A + + .text + .globl _Z3fooP1A + .type _Z3fooP1A,@function +_Z3fooP1A: + paciasppc + ldr x8, [x0] + ldr w0, [x8] + retaasppcr x28 + .size _Z3fooP1A, .-_Z3fooP1A + + .text + .globl _Z3bazP1A + .type _Z3bazP1A,@function +_Z3bazP1A: +0: + paciasppc + ldr x7, [x0] + ldr w0, [x7] + retaasppc 0b + .size _Z3bazP1A, .-_Z3bazP1A + + .globl _Z3barP1A + .type _Z3barP1A,@function +_Z3barP1A: + stp x29, x30, [sp, #-16]! + mov x29, sp + bl _Z3fooP1A + bl _Z3bazP1A + mul w0, w0, w0 + ldp x29, x30, [sp], #16 + ret + .size _Z3barP1A, .-_Z3barP1A + + .globl main + .p2align 2 + .type main,@function +main: + mov w0, wzr + ret + .size main, .-main