Momchil Velikov 50a97aacac [AArch64] Async unwind - function prologues
Re-commit of 32e8b550e5439c7e4aafa73894faffd5f25d0d05

This patch rearranges emission of CFI instructions, so the resulting
DWARF and `.eh_frame` information is precise at every instruction.

The current state is that the unwind info is emitted only after the
function prologue. This is fine for synchronous (e.g. C++) exceptions,
but the information is generally incorrect when the program counter is
at an instruction in the prologue or the epilogue, for example:

```
stp	x29, x30, [sp, #-16]!           // 16-byte Folded Spill
mov	x29, sp
.cfi_def_cfa w29, 16
...
```

after the `stp` is executed the (initial) rule for the CFA still says
the CFA is in the `sp`, even though it's already offset by 16 bytes

A correct unwind info could look like:
```
stp	x29, x30, [sp, #-16]!           // 16-byte Folded Spill
.cfi_def_cfa_offset 16
mov	x29, sp
.cfi_def_cfa w29, 16
...
```

Having this information precise up to an instruction is useful for
sampling profilers that would like to get a stack backtrace. The end
goal (towards this patch is just a step) is to have fully working
`-fasynchronous-unwind-tables`.

Reviewed By: danielkiss, MaskRay

Differential Revision: https://reviews.llvm.org/D111411
2022-03-24 16:16:44 +00:00

53 lines
2.6 KiB
LLVM

; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
@.str = private unnamed_addr constant [11 x i8] c"val = %ld\0A\00", align 1
; Function Attrs: noinline optnone
define dso_local void @set_large(i64 %val) #0 {
entry:
%val.addr = alloca i64, align 8
%large = alloca [268435456 x i64], align 8
%i = alloca i32, align 4
store i64 %val, i64* %val.addr, align 8
%0 = load i64, i64* %val.addr, align 8
%arrayidx = getelementptr inbounds [268435456 x i64], [268435456 x i64]* %large, i64 0, i64 %0
store i64 1, i64* %arrayidx, align 8
%1 = load i64, i64* %val.addr, align 8
%arrayidx1 = getelementptr inbounds [268435456 x i64], [268435456 x i64]* %large, i64 0, i64 %1
%2 = load i64, i64* %arrayidx1, align 8
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i64 %2)
ret void
}
declare dso_local i32 @printf(i8*, ...)
attributes #0 = { noinline optnone "frame-pointer"="all" }
; CHECK: stp x[[SPILL_REG1:[0-9]+]], x[[SPILL_REG2:[0-9]+]], [sp, #-[[SPILL_OFFSET1:[0-9]+]]]
; CHECK-NEXT: .cfi_def_cfa_offset [[SPILL_OFFSET1]]
; CHECK-NEXT: str x[[SPILL_REG3:[0-9]+]], [sp, #[[SPILL_OFFSET2:[0-9]+]]]
; CHECK-NEXT: mov x[[FRAME:[0-9]+]], sp
; CHECK-NEXT: .cfi_def_cfa w[[FRAME]], [[SPILL_OFFSET1]]
; CHECK-COUNT-128: sub sp, sp, #[[STACK1:[0-9]+]], lsl #12
; CHECK-NEXT: sub sp, sp, #[[STACK2:[0-9]+]], lsl #12
; CHECK-NEXT: sub sp, sp, #[[STACK3:[0-9]+]]
; CHECK: sub x[[INDEX:[0-9]+]], x[[FRAME]], #8
; CHECK-NEXT: str x0, [x[[INDEX]]]
; CHECK-NEXT: ldr x[[VAL1:[0-9]+]], [x[[INDEX]]]
; CHECK-NEXT: add x[[VAL3:[0-9]+]], sp, #8
; CHECK-NEXT: mov x[[VAL2:[0-9]+]], #8
; CHECK-NEXT: madd x[[VAL1]], x[[VAL1]], x[[VAL2]], x[[VAL3]]
; CHECK-NEXT: mov x[[TMP1:[0-9]+]], #1
; CHECK-NEXT: str x[[TMP1]], [x[[VAL1]]]
; CHECK-NEXT: ldr x[[INDEX]], [x[[INDEX]]]
; CHECK-NEXT: add x[[VAL3:[0-9]+]], sp, #8
; CHECK-NEXT: mov x[[VAL4:[0-9]+]], #8
; CHECK-NEXT: madd x[[INDEX]], x[[INDEX]], x[[VAL4]], x[[VAL3]]
; CHECK-NEXT: ldr x1, [x[[INDEX]]
; CHECK: bl printf
; CHECK-COUNT-128: add sp, sp, #[[STACK1]], lsl #12
; CHECK-NEXT: add sp, sp, #[[STACK2]], lsl #12
; CHECK-NEXT: add sp, sp, #[[STACK3]]
; CHECK-NEXT: ldr x[[SPILL_REG3]], [sp, #[[SPILL_OFFSET2]]]
; CHECK-NEXT: ldp x[[SPILL_REG1]], x[[SPILL_REG2]], [sp], #[[SPILL_OFFSET1]]