This patch adds the target hooks required by Instruction Referencing for the AArch64 target, as mentioned in https://llvm.org/docs/InstrRefDebugInfo.html#target-hooks Which allows the Instruction Referenced LiveDebugValues Pass to track spills and restore instructions. With this patch we can use the `llvm/utils/llvm-locstats/llvm-locstats.py` to see the coverage statistics on a clang.dSYM built with in RelWithDebInfo we can see: coverage with dbg_value: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 5828021 38% (0%,10%) 127739 0% [10%,20%) 143344 0% [20%,30%) 172100 1% [30%,40%) 193173 1% [40%,50%) 127366 0% [50%,60%) 308350 2% [60%,70%) 257055 1% [70%,80%) 212410 1% [80%,90%) 295316 1% [90%,100%) 349280 2% 100% 7313157 47% ================================================= -the number of debug variables processed: 15327311 -PC ranges covered: 67% ------------------------------------------------- -total availability: 62% ================================================= ``` coverage with InstrRef without target hooks fix: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 6052807 39% (0%,10%) 127710 0% [10%,20%) 129999 0% [20%,30%) 155011 1% [30%,40%) 171206 1% [40%,50%) 102861 0% [50%,60%) 264734 1% [60%,70%) 212386 1% [70%,80%) 176872 1% [80%,90%) 242120 1% [90%,100%) 254465 1% 100% 7437215 48% ================================================= -the number of debug variables processed: 15327386 -PC ranges covered: 67% ------------------------------------------------- -total availability: 60% ================================================= ``` coverage with InstrRef with target hooks fix: ``` ================================================= Debug Location Statistics ================================================= cov% samples percentage(~) ------------------------------------------------- 0% 5972267 39% (0%,10%) 118873 0% [10%,20%) 127138 0% [20%,30%) 153181 1% [30%,40%) 170102 1% [40%,50%) 102180 0% [50%,60%) 263672 1% [60%,70%) 212865 1% [70%,80%) 176633 1% [80%,90%) 242403 1% [90%,100%) 264441 1% 100% 7494527 48% ================================================= -the number of debug variables processed: 15298282 -PC ranges covered: 71% ------------------------------------------------- -total availability: 61% ================================================= ``` I believe this should be a good indication that Instruction Referencing should be turned on for AArch64?
368 lines
14 KiB
LLVM
368 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s
|
|
; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s
|
|
|
|
; Dynamically-sized allocation, needs a loop which can handle any size at
|
|
; runtime. The final iteration of the loop will temporarily put SP below the
|
|
; target address, but this doesn't break any of the ABI constraints on the
|
|
; stack, and also doesn't probe below the target SP value.
|
|
define void @dynamic(i64 %size, ptr %out) #0 {
|
|
; CHECK-LABEL: dynamic:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: add x9, x0, #15
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NEXT: sub x8, x8, x9
|
|
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB0_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB0_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB0_1
|
|
; CHECK-NEXT: .LBB0_3:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 1
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; This function has a fixed-size stack slot and a dynamic one. The fixed size
|
|
; slot isn't large enough that we would normally probe it, but we need to do so
|
|
; here otherwise the gap between the CSR save and the first probe of the
|
|
; dynamic allocation could be too far apart when the size of the dynamic
|
|
; allocation is close to the guard size.
|
|
define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
|
|
; CHECK-LABEL: dynamic_fixed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: str xzr, [sp, #-64]!
|
|
; CHECK-NEXT: add x9, x0, #15
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-DAG: sub x10, x29, #64
|
|
; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
|
|
; CHECK-DAG: str x10, [x1]
|
|
; CHECK-DAG: sub x8, x8, x9
|
|
; CHECK-NEXT: .LBB1_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB1_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB1_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB1_1
|
|
; CHECK-NEXT: .LBB1_3:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x2]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v1 = alloca i8, i64 64, align 1
|
|
store ptr %v1, ptr %out1, align 8
|
|
%v2 = alloca i8, i64 %size, align 1
|
|
store ptr %v2, ptr %out2, align 8
|
|
ret void
|
|
}
|
|
|
|
; Dynamic allocation, with an alignment requirement greater than the alignment
|
|
; of SP. Done by ANDing the target SP with a constant to align it down, then
|
|
; doing the loop as normal. Note that we also re-align the stack in the prolog,
|
|
; which isn't actually needed because the only aligned allocations are dynamic,
|
|
; this is done even without stack probing.
|
|
define void @dynamic_align_64(i64 %size, ptr %out) #0 {
|
|
; CHECK-LABEL: dynamic_align_64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
; CHECK-NEXT: .cfi_offset w19, -16
|
|
; CHECK-NEXT: .cfi_offset w30, -24
|
|
; CHECK-NEXT: .cfi_offset w29, -32
|
|
; CHECK-NEXT: sub x9, sp, #32
|
|
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
|
|
; CHECK-NEXT: add x9, x0, #15
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-DAG: str xzr, [sp]
|
|
; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
|
|
; CHECK-DAG: mov x19, sp
|
|
; CHECK-DAG: sub x8, x8, x9
|
|
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
|
|
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB2_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB2_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB2_1
|
|
; CHECK-NEXT: .LBB2_3:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w19
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 64
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; Dynamic allocation, with an alignment greater than the stack guard size. The
|
|
; only difference to the dynamic allocation is the constant used for aligning
|
|
; the target SP, the loop will probe the whole allocation without needing to
|
|
; know about the alignment padding.
|
|
define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
|
|
; CHECK-LABEL: dynamic_align_8192:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
; CHECK-NEXT: .cfi_offset w19, -16
|
|
; CHECK-NEXT: .cfi_offset w30, -24
|
|
; CHECK-NEXT: .cfi_offset w29, -32
|
|
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: sub x9, x9, #4064
|
|
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
|
|
; CHECK-NEXT: .LBB3_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x9
|
|
; CHECK-NEXT: b.le .LBB3_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB3_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB3_1
|
|
; CHECK-NEXT: .LBB3_3:
|
|
; CHECK-NEXT: mov sp, x9
|
|
; CHECK-NEXT: add x9, x0, #15
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-DAG: ldr xzr, [sp]
|
|
; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
|
|
; CHECK-DAG: mov x19, sp
|
|
; CHECK-DAG: sub x8, x8, x9
|
|
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
|
|
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB3_6
|
|
; CHECK-NEXT: // %bb.5: // in Loop: Header=BB3_4 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB3_4
|
|
; CHECK-NEXT: .LBB3_6:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w19
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 8192
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; For 64k guard pages, the only difference is the constant subtracted from SP
|
|
; in the loop.
|
|
define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536" {
|
|
; CHECK-LABEL: dynamic_64k_guard:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: add x9, x0, #15
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NEXT: sub x8, x8, x9
|
|
; CHECK-NEXT: .LBB4_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB4_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB4_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB4_1
|
|
; CHECK-NEXT: .LBB4_3:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 1
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; If a function has variable-sized stack objects, then any function calls which
|
|
; need to pass arguments on the stack must allocate the stack space for them
|
|
; dynamically, to ensure they are at the bottom of the frame. We need to probe
|
|
; that space when it is larger than the unprobed space allowed by the ABI (1024
|
|
; bytes), so this needs a very large number of arguments.
|
|
define void @no_reserved_call_frame(i64 %n) #0 {
|
|
; CHECK-LABEL: no_reserved_call_frame:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 16
|
|
; CHECK-NEXT: .cfi_offset w30, -8
|
|
; CHECK-NEXT: .cfi_offset w29, -16
|
|
; CHECK-NEXT: lsl x9, x0, #2
|
|
; CHECK-NEXT: mov x8, sp
|
|
; CHECK-NEXT: add x9, x9, #15
|
|
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NEXT: sub x0, x8, x9
|
|
; CHECK-NEXT: .LBB5_1: // %entry
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x0
|
|
; CHECK-NEXT: b.le .LBB5_3
|
|
; CHECK-NEXT: // %bb.2: // %entry
|
|
; CHECK-NEXT: // in Loop: Header=BB5_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB5_1
|
|
; CHECK-NEXT: .LBB5_3: // %entry
|
|
; CHECK-NEXT: mov sp, x0
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: sub sp, sp, #1104
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: bl callee_stack_args
|
|
; CHECK-NEXT: add sp, sp, #1104
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 16
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v = alloca i32, i64 %n
|
|
call void @callee_stack_args(ptr %v, [138 x i64] undef)
|
|
ret void
|
|
}
|
|
|
|
; Same as above but without a variable-sized allocation, so the reserved call
|
|
; frame can be folded into the fixed-size allocation in the prologue.
|
|
define void @reserved_call_frame(i64 %n) #0 {
|
|
; CHECK-LABEL: reserved_call_frame:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: str x28, [sp, #16] // 8-byte Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
; CHECK-NEXT: .cfi_offset w28, -16
|
|
; CHECK-NEXT: .cfi_offset w30, -24
|
|
; CHECK-NEXT: .cfi_offset w29, -32
|
|
; CHECK-NEXT: sub sp, sp, #1504
|
|
; CHECK-NEXT: add x0, sp, #1104
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: bl callee_stack_args
|
|
; CHECK-NEXT: add sp, sp, #1504
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Reload
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w28
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%v = alloca i32, i64 100
|
|
call void @callee_stack_args(ptr %v, [138 x i64] undef)
|
|
ret void
|
|
}
|
|
|
|
declare void @callee_stack_args(ptr, [138 x i64])
|
|
|
|
; Dynamic allocation of SVE vectors
|
|
define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" {
|
|
; CHECK-LABEL: dynamic_sve:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
|
|
; CHECK-NEXT: mov x29, sp
|
|
; CHECK-NEXT: .cfi_def_cfa w29, 32
|
|
; CHECK-NEXT: .cfi_offset w19, -16
|
|
; CHECK-NEXT: .cfi_offset w30, -24
|
|
; CHECK-NEXT: .cfi_offset w29, -32
|
|
; CHECK-NEXT: rdvl x9, #1
|
|
; CHECK-NEXT: mov x10, #15 // =0xf
|
|
; CHECK-DAG: mov x8, sp
|
|
; CHECK-DAG: madd x9, x0, x9, x10
|
|
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
|
|
; CHECK-NEXT: sub x8, x8, x9
|
|
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
|
|
; CHECK-NEXT: cmp sp, x8
|
|
; CHECK-NEXT: b.le .LBB7_3
|
|
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
|
|
; CHECK-NEXT: str xzr, [sp]
|
|
; CHECK-NEXT: b .LBB7_1
|
|
; CHECK-NEXT: .LBB7_3:
|
|
; CHECK-NEXT: mov sp, x8
|
|
; CHECK-NEXT: ldr xzr, [sp]
|
|
; CHECK-NEXT: str x8, [x1]
|
|
; CHECK-NEXT: mov sp, x29
|
|
; CHECK-NEXT: .cfi_def_cfa wsp, 32
|
|
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
|
|
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
|
; CHECK-NEXT: .cfi_restore w19
|
|
; CHECK-NEXT: .cfi_restore w30
|
|
; CHECK-NEXT: .cfi_restore w29
|
|
; CHECK-NEXT: ret
|
|
%v = alloca <vscale x 4 x float>, i64 %size, align 16
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
|
|
|