[Hexagon][XRay] Fix sled layout and trampoline to preserve retaddr (#188784)
The Hexagon XRay sled was 5 words (20 bytes) and the patched sequence clobbered r31 (the link register) via callr without saving it first. When the trampoline returned, the instrumented function's own allocframe would then save the wrong return address, causing a crash or misrouted return. Expand the sled to 7 words (28 bytes) and wrap the call with allocframe(#0)/deallocframe so the caller's r31:30 are preserved across the trampoline call. Detailed fixes: - HexagonAsmPrinter: emit 6 nop words after the jump (7 words total) - xray_hexagon.cpp: patch allocframe(#0) as first word, immext+r7 (func ID), immext+r6 (trampoline), callr r6, deallocframe; write the first word last for atomicity - xray_trampoline_hexagon.S: complete rewrite -- properly load and dereference the global handler pointer, save/restore r0-r5 and r31, add stack frame with correct 8-byte alignment, add jumpr r31 to actually return from trampolines - xray_interface.cpp: update Hexagon cSledLength from 20 to 28 - Update lit tests for 6-nop sled
This commit is contained in:
parent
3c1b9f278d
commit
5e7c66013b
@ -21,11 +21,13 @@ namespace __xray {
|
||||
|
||||
// The machine codes for some instructions used in runtime patching.
|
||||
enum PatchOpcodes : uint32_t {
|
||||
PO_JUMPI_14 = 0x5800c00a, // jump #0x014 (PC + 0x014)
|
||||
PO_CALLR_R6 = 0x50a6c000, // indirect call: callr r6
|
||||
PO_TFR_IMM = 0x78000000, // transfer immed
|
||||
// ICLASS 0x7 - S2-type A-type
|
||||
PO_IMMEXT = 0x00000000, // constant extender
|
||||
PO_JUMPI_1C = 0x5800c00e, // jump #0x01c (PC + 0x01c)
|
||||
PO_CALLR_R6 = 0x50a6c000, // indirect call: callr r6
|
||||
PO_TFR_IMM = 0x78000000, // transfer immed
|
||||
// ICLASS 0x7 - S2-type A-type
|
||||
PO_IMMEXT = 0x00000000, // constant extender
|
||||
PO_ALLOCFRAME_0 = 0xa09dc000, // allocframe(#0)
|
||||
PO_DEALLOCFRAME = 0x901ec01e, // deallocframe
|
||||
};
|
||||
|
||||
enum PacketWordParseBits : uint32_t {
|
||||
@ -92,43 +94,61 @@ inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
//
|
||||
// .L_xray_sled_N:
|
||||
// <xray_sled_base>:
|
||||
// { jump .Ltmp0 }
|
||||
// { nop
|
||||
// nop
|
||||
// nop
|
||||
// nop }
|
||||
// { jump .Ltmp0 }
|
||||
// { nop } x 6
|
||||
// .Ltmp0:
|
||||
|
||||
//
|
||||
// With the following runtime patch:
|
||||
//
|
||||
// xray_sled_n (32-bit):
|
||||
//
|
||||
// <xray_sled_n>:
|
||||
// { immext(#...) // upper 26-bits of func id
|
||||
// r7 = ##... // lower 6-bits of func id
|
||||
// immext(#...) // upper 26-bits of trampoline
|
||||
// r6 = ##... } // lower 6 bits of trampoline
|
||||
// { callr r6 }
|
||||
// { allocframe(#0) }
|
||||
// { immext(#...) // upper 26-bits of func id
|
||||
// r7 = ##... // lower 6-bits of func id
|
||||
// immext(#...) // upper 26-bits of trampoline
|
||||
// r6 = ##... } // lower 6-bits of trampoline
|
||||
// { callr r6 }
|
||||
// { deallocframe }
|
||||
//
|
||||
// allocframe(#0) saves the caller's r31:30 (LR:FP) before the callr
|
||||
// clobbers r31, and deallocframe restores them afterward. This ensures
|
||||
// the instrumented function's allocframe later saves the correct return
|
||||
// address.
|
||||
//
|
||||
// Replacement of the first 4-byte instruction should be the last and
|
||||
// atomic operation, so that user code reaching the sled concurrently
|
||||
// either jumps over the whole sled, or executes the whole sled when it
|
||||
// is ready.
|
||||
//
|
||||
// When |Enable|==false, we set back the first instruction in the sled to be
|
||||
// { jump .Ltmp0 }
|
||||
// { jump .Ltmp0 }
|
||||
|
||||
uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.address());
|
||||
if (Enable) {
|
||||
uint32_t *CurAddress = FirstAddress + 1;
|
||||
// Word 1: immext for r7 = FuncId
|
||||
*CurAddress = encodeConstantExtender(FuncId);
|
||||
CurAddress++;
|
||||
// Word 2: r7 = ##FuncId (low 6 bits)
|
||||
*CurAddress = encodeExtendedTransferImmediate(FuncId, RN_R7);
|
||||
CurAddress++;
|
||||
*CurAddress = encodeConstantExtender(reinterpret_cast<uint32_t>(TracingHook));
|
||||
CurAddress++;
|
||||
// Word 3: immext for r6 = TracingHook
|
||||
*CurAddress =
|
||||
encodeExtendedTransferImmediate(reinterpret_cast<uint32_t>(TracingHook), RN_R6, true);
|
||||
encodeConstantExtender(reinterpret_cast<uint32_t>(TracingHook));
|
||||
CurAddress++;
|
||||
|
||||
// Word 4: r6 = ##TracingHook (low 6 bits), packet end
|
||||
*CurAddress = encodeExtendedTransferImmediate(
|
||||
reinterpret_cast<uint32_t>(TracingHook), RN_R6, true);
|
||||
CurAddress++;
|
||||
// Word 5: callr r6
|
||||
*CurAddress = uint32_t(PO_CALLR_R6);
|
||||
CurAddress++;
|
||||
// Word 6: deallocframe
|
||||
*CurAddress = uint32_t(PO_DEALLOCFRAME);
|
||||
|
||||
WriteInstFlushCache(FirstAddress, uint32_t(encodeConstantExtender(FuncId)));
|
||||
// Word 0 (written last, atomically): allocframe(#0) replaces jump
|
||||
WriteInstFlushCache(FirstAddress, uint32_t(PO_ALLOCFRAME_0));
|
||||
} else {
|
||||
WriteInstFlushCache(FirstAddress, uint32_t(PatchOpcodes::PO_JUMPI_14));
|
||||
WriteInstFlushCache(FirstAddress, uint32_t(PO_JUMPI_1C));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -56,7 +56,7 @@ static const int16_t cSledLength = 64;
|
||||
#elif defined(__powerpc64__)
|
||||
static const int16_t cSledLength = 8;
|
||||
#elif defined(__hexagon__)
|
||||
static const int16_t cSledLength = 20;
|
||||
static const int16_t cSledLength = 28;
|
||||
#elif defined(__riscv) && (__riscv_xlen == 64)
|
||||
static const int16_t cSledLength = 68;
|
||||
#elif defined(__riscv) && (__riscv_xlen == 32)
|
||||
|
||||
@ -15,50 +15,86 @@
|
||||
#include "../builtins/assembly.h"
|
||||
#include "../sanitizer_common/sanitizer_asm.h"
|
||||
|
||||
// The patched sled sets:
|
||||
// r7 = function ID
|
||||
// r6 = trampoline address (used by callr, then dead)
|
||||
// r31 = return address back to sled (set by callr)
|
||||
//
|
||||
// The sled wraps the callr with allocframe(#0)/deallocframe to preserve
|
||||
// the caller's original r31:30 across the trampoline call.
|
||||
|
||||
.macro SAVE_REGISTERS
|
||||
memw(sp+#0)=r0
|
||||
memw(sp+#4)=r1
|
||||
memw(sp+#8)=r2
|
||||
memw(sp+#12)=r3
|
||||
memw(sp+#16)=r4
|
||||
// Allocate 32 bytes on the stack:
|
||||
// sp+#0: r0 (parameter / return value)
|
||||
// sp+#4: r1 (parameter / return value)
|
||||
// sp+#8: r2 (parameter)
|
||||
// sp+#12: r3 (parameter)
|
||||
// sp+#16: r4 (parameter)
|
||||
// sp+#20: r5 (parameter)
|
||||
// sp+#24: r31 (return address back to sled's deallocframe)
|
||||
// sp+#28: (padding for 8-byte alignment)
|
||||
{
|
||||
sp = add(sp, #-32)
|
||||
}
|
||||
memw(sp+#0) = r0
|
||||
memw(sp+#4) = r1
|
||||
memw(sp+#8) = r2
|
||||
memw(sp+#12) = r3
|
||||
memw(sp+#16) = r4
|
||||
memw(sp+#20) = r5
|
||||
memw(sp+#24) = r31
|
||||
.endm
|
||||
|
||||
.macro RESTORE_REGISTERS
|
||||
r0=memw(sp+#0)
|
||||
r1=memw(sp+#4)
|
||||
r2=memw(sp+#8)
|
||||
r3=memw(sp+#12)
|
||||
r4=memw(sp+#16)
|
||||
r0 = memw(sp+#0)
|
||||
r1 = memw(sp+#4)
|
||||
r2 = memw(sp+#8)
|
||||
r3 = memw(sp+#12)
|
||||
r4 = memw(sp+#16)
|
||||
r5 = memw(sp+#20)
|
||||
r31 = memw(sp+#24)
|
||||
{
|
||||
sp = add(sp, #32)
|
||||
}
|
||||
.endm
|
||||
|
||||
.macro CALL_PATCHED_FUNC entry_type
|
||||
// if (xray::XRayPatchedFunctionE != NULL)
|
||||
// xray::XRayPatchedFunctionE(FuncType);
|
||||
|
||||
r8 = #ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
|
||||
|
||||
// The patched sled puts the function type
|
||||
// into r6. Move it into r0 to pass it to
|
||||
// the patched function.
|
||||
{ r0 = r6
|
||||
r1 = \entry_type
|
||||
p0 = !cmp.eq(r8, #0)
|
||||
if (p0) callr r8 }
|
||||
// Load the address of the global handler function pointer,
|
||||
// then dereference it to get the actual handler.
|
||||
r8 = ##ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
|
||||
{
|
||||
r8 = memw(r8+#0)
|
||||
}
|
||||
// Skip if handler is not registered (null).
|
||||
{
|
||||
p0 = cmp.eq(r8, #0)
|
||||
if (p0.new) jump:nt 0f
|
||||
}
|
||||
// Set up arguments for the handler:
|
||||
// r0 = FuncId (was placed in r7 by the patched sled)
|
||||
// r1 = entry type (ENTRY=0, EXIT=1, TAIL=2)
|
||||
{
|
||||
r0 = r7
|
||||
r1 = \entry_type
|
||||
}
|
||||
{
|
||||
callr r8
|
||||
}
|
||||
0:
|
||||
.endm
|
||||
|
||||
.text
|
||||
.globl ASM_SYMBOL(__xray_FunctionEntry)
|
||||
ASM_HIDDEN(__xray_FunctionEntry)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionEntry)
|
||||
# LLVM-MCA-BEGIN __xray_FunctionEntry
|
||||
ASM_SYMBOL(__xray_FunctionEntry):
|
||||
CFI_STARTPROC
|
||||
SAVE_REGISTERS
|
||||
|
||||
CALL_PATCHED_FUNC #0 // XRayEntryType::ENTRY
|
||||
.Ltmp0:
|
||||
|
||||
RESTORE_REGISTERS
|
||||
// return
|
||||
# LLVM-MCA-END
|
||||
jumpr r31
|
||||
ASM_SIZE(__xray_FunctionEntry)
|
||||
CFI_ENDPROC
|
||||
|
||||
@ -66,17 +102,14 @@ ASM_SYMBOL(__xray_FunctionEntry):
|
||||
.globl ASM_SYMBOL(__xray_FunctionExit)
|
||||
ASM_HIDDEN(__xray_FunctionExit)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionExit)
|
||||
# LLVM-MCA-BEGIN __xray_FunctionExit
|
||||
ASM_SYMBOL(__xray_FunctionExit):
|
||||
CFI_STARTPROC
|
||||
SAVE_REGISTERS
|
||||
|
||||
CALL_PATCHED_FUNC #1 // XRayEntryType::EXIT
|
||||
.Ltmp1:
|
||||
|
||||
RESTORE_REGISTERS
|
||||
// return
|
||||
jumpr r31
|
||||
# LLVM-MCA-END
|
||||
ASM_SIZE(__xray_FunctionExit)
|
||||
CFI_ENDPROC
|
||||
|
||||
@ -84,16 +117,15 @@ ASM_SYMBOL(__xray_FunctionExit):
|
||||
.globl ASM_SYMBOL(__xray_FunctionTailExit)
|
||||
ASM_HIDDEN(__xray_FunctionTailExit)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
|
||||
# LLVM-MCA-BEGIN __xray_FunctionTailExit
|
||||
ASM_SYMBOL(__xray_FunctionTailExit):
|
||||
CFI_STARTPROC
|
||||
SAVE_REGISTERS
|
||||
|
||||
CALL_PATCHED_FUNC #2 // XRayEntryType::TAIL
|
||||
.Ltmp2:
|
||||
|
||||
RESTORE_REGISTERS
|
||||
// return
|
||||
jumpr r31
|
||||
# LLVM-MCA-END
|
||||
ASM_SIZE(__xray_FunctionTailExit)
|
||||
CFI_ENDPROC
|
||||
|
||||
NO_EXEC_STACK_DIRECTIVE
|
||||
|
||||
@ -802,28 +802,35 @@ void HexagonAsmPrinter::emitAttributes() {
|
||||
}
|
||||
|
||||
void HexagonAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
|
||||
static const int8_t NoopsInSledCount = 4;
|
||||
static const int8_t NoopsInSledCount = 6;
|
||||
// We want to emit the following pattern:
|
||||
//
|
||||
// .L_xray_sled_N:
|
||||
// <xray_sled_base>:
|
||||
// { jump .Ltmp0 }
|
||||
// { nop
|
||||
// nop
|
||||
// nop
|
||||
// nop }
|
||||
// { jump .Ltmp0 }
|
||||
// { nop }
|
||||
// { nop }
|
||||
// { nop }
|
||||
// { nop }
|
||||
// { nop }
|
||||
// { nop }
|
||||
// .Ltmp0:
|
||||
//
|
||||
// We need the 4 nop words because at runtime, we'd be patching over the
|
||||
// full 5 words with the following pattern:
|
||||
// We need the 6 nop words because at runtime, we'd be patching over the
|
||||
// full 7 words with the following pattern:
|
||||
//
|
||||
// <xray_sled_n>:
|
||||
// { immext(#...) // upper 26-bits of trampoline
|
||||
// r6 = ##... // lower 6-bits of trampoline
|
||||
// immext(#...) // upper 26-bits of func id
|
||||
// r7 = ##... } // lower 6 bits of func id
|
||||
// { callr r6 }
|
||||
// { allocframe(#0) }
|
||||
// { immext(#...) // upper 26-bits of func id
|
||||
// r7 = ##... // lower 6-bits of func id
|
||||
// immext(#...) // upper 26-bits of trampoline
|
||||
// r6 = ##... } // lower 6-bits of trampoline
|
||||
// { callr r6 }
|
||||
// { deallocframe }
|
||||
//
|
||||
// allocframe saves r31:30 (LR:FP) before the call, and deallocframe
|
||||
// restores them after the trampoline returns, ensuring the caller's
|
||||
// return address in r31 is preserved across the sled.
|
||||
//
|
||||
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
|
||||
OutStreamer->emitLabel(CurSled);
|
||||
|
||||
@ -4,14 +4,14 @@ define void @Foo(i32 signext %a, i32 signext %b) #0 {
|
||||
; CHECK-LABEL: @Foo
|
||||
; CHECK-LABEL: .Lxray_sled_0:
|
||||
; CHECK: jump .Ltmp0
|
||||
; CHECK-COUNT-4: nop
|
||||
; CHECK-COUNT-6: nop
|
||||
entry:
|
||||
%cmp = icmp sgt i32 %a, %b
|
||||
br i1 %cmp, label %return, label %if.end
|
||||
|
||||
; CHECK-LABEL: .Lxray_sled_1:
|
||||
; CHECK: jump .Ltmp1
|
||||
; CHECK-COUNT-4: nop
|
||||
; CHECK-COUNT-6: nop
|
||||
; CHECK-LABEL: .Ltmp1:
|
||||
; CHECK: if (p0) jumpr:nt r31
|
||||
if.end:
|
||||
|
||||
@ -8,6 +8,8 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always"
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: .Ltmp
|
||||
; CHECK-SAME: [[#l]]:
|
||||
ret i32 0
|
||||
@ -17,6 +19,8 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always"
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: nop
|
||||
; CHECK: .Ltmp
|
||||
; CHECK-SAME: [[#l]]:
|
||||
; CHECK: jumpr r31
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user