llvm-project/llvm/test/CodeGen/AArch64/stack-probing.ll
Shubham Sandeep Rastogi 44b94a4f75
[AArch64][DebugInfo]Add Target hooks for InstrRef on AArch64 (#165953)
This patch adds the target hooks required by Instruction Referencing for
the AArch64 target, as mentioned in
https://llvm.org/docs/InstrRefDebugInfo.html#target-hooks

Which allows the Instruction Referenced LiveDebugValues Pass to track
spills and restore instructions.

With this patch we can use the
`llvm/utils/llvm-locstats/llvm-locstats.py` to see the coverage
statistics on a clang.dSYM built with in RelWithDebInfo we can see:

coverage with dbg_value:
```
=================================================
            Debug Location Statistics       
 =================================================
     cov%           samples         percentage(~)  
 -------------------------------------------------
   0%              5828021               38%
   (0%,10%)         127739                0%
   [10%,20%)        143344                0%
   [20%,30%)        172100                1%
   [30%,40%)        193173                1%
   [40%,50%)        127366                0%
   [50%,60%)        308350                2%
   [60%,70%)        257055                1%
   [70%,80%)        212410                1%
   [80%,90%)        295316                1%
   [90%,100%)       349280                2%
   100%            7313157               47%
 =================================================
 -the number of debug variables processed: 15327311
 -PC ranges covered: 67%
 -------------------------------------------------
 -total availability: 62%
 =================================================
 ```
 
coverage with InstrRef without target hooks fix:
```
 =================================================
            Debug Location Statistics       
 =================================================
     cov%           samples         percentage(~)  
 -------------------------------------------------
   0%              6052807               39%
   (0%,10%)         127710                0%
   [10%,20%)        129999                0%
   [20%,30%)        155011                1%
   [30%,40%)        171206                1%
   [40%,50%)        102861                0%
   [50%,60%)        264734                1%
   [60%,70%)        212386                1%
   [70%,80%)        176872                1%
   [80%,90%)        242120                1%
   [90%,100%)       254465                1%
   100%            7437215               48%
 =================================================
 -the number of debug variables processed: 15327386
 -PC ranges covered: 67%
 -------------------------------------------------
 -total availability: 60%
 =================================================
 ```
 
coverage with InstrRef with target hooks fix:
```
 =================================================
            Debug Location Statistics       
 =================================================
     cov%           samples         percentage(~)  
 -------------------------------------------------
   0%              5972267               39%
   (0%,10%)         118873                0%
   [10%,20%)        127138                0%
   [20%,30%)        153181                1%
   [30%,40%)        170102                1%
   [40%,50%)        102180                0%
   [50%,60%)        263672                1%
   [60%,70%)        212865                1%
   [70%,80%)        176633                1%
   [80%,90%)        242403                1%
   [90%,100%)       264441                1%
   100%            7494527               48%
 =================================================
 -the number of debug variables processed: 15298282
 -PC ranges covered: 71%
 -------------------------------------------------
 -total availability: 61%
 =================================================
 ```
 
 I believe this should be a good indication that Instruction Referencing should be turned on for AArch64?
2025-11-14 10:36:47 -08:00

540 lines
18 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false | FileCheck %s
; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -enable-post-misched=false -global-isel | FileCheck %s
; Tests for prolog sequences for stack probing, when using a 4KiB stack guard.
; The stack probing parameters in function attributes take precedence over
; ones in the module flags.
; Small stack frame, no probing required.
define void @static_64(ptr %out) #0 {
; CHECK-LABEL: static_64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 64, align 1
store ptr %v, ptr %out, align 8
ret void
}
; At 256 bytes we start to always create a frame pointer. No frame smaller then
; this needs a probe, so we can use the saving of at least one CSR as a probe
; at the top of our frame.
define void @static_256(ptr %out) #0 {
; CHECK-LABEL: static_256:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #272
; CHECK-NEXT: .cfi_def_cfa_offset 272
; CHECK-NEXT: str x29, [sp, #256] // 8-byte Spill
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #272
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 256, align 1
store ptr %v, ptr %out, align 8
ret void
}
; At 1024 bytes, this is the largest frame which doesn't need probing.
define void @static_1024(ptr %out) #0 {
; CHECK-LABEL: static_1024:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 1024, align 1
store ptr %v, ptr %out, align 8
ret void
}
; At 1024+16 bytes, this is the smallest frame which needs probing.
define void @static_1040(ptr %out) #0 {
; CHECK-LABEL: static_1040:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 1040, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 4k bytes is the largest frame we can probe in one go.
define void @static_4096(ptr %out) #0 {
; CHECK-LABEL: static_4096:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 4096, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 4k+16 bytes, still needs just one probe.
define void @static_4112(ptr %out) #0 {
; CHECK-LABEL: static_4112:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp], #-16
; CHECK-NEXT: .cfi_def_cfa_offset 4128
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 4112, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 4k+1024 bytes, the largest frame which needs just one probe.
define void @static_5120(ptr %out) #0 {
; CHECK-LABEL: static_5120:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 5136
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 5120, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 4k+1024+16, the smallest frame which needs two probes.
define void @static_5136(ptr %out) #0 {
; CHECK-LABEL: static_5136:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 5152
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 5136, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 2*4k+1024, the largest frame needing two probes
define void @static_9216(ptr %out) #0 {
; CHECK-LABEL: static_9216:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 8208
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 9232
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 9216, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 5*4k-16, the largest frame probed without a loop
define void @static_20464(ptr %out) #0 {
; CHECK-LABEL: static_20464:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 4112
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 8208
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 12304
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: .cfi_def_cfa_offset 16400
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: sub sp, sp, #4080
; CHECK-NEXT: .cfi_def_cfa_offset 20480
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #4, lsl #12 // =16384
; CHECK-NEXT: .cfi_def_cfa_offset 4096
; CHECK-NEXT: add sp, sp, #4080
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 20464, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 5*4k, the smallest frame probed with a loop
define void @static_20480(ptr %out) #0 {
; CHECK-LABEL: static_20480:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa w9, 20496
; CHECK-NEXT: .LBB10_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB10_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 20480, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 5*4k + 1024, large enough to use a loop, but not a multiple of 4KiB
; so has a reminder, but no extra probe.
define void @static_21504(ptr %out) #0 {
; CHECK-LABEL: static_21504:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa w9, 20496
; CHECK-NEXT: .LBB11_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB11_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 21520
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa_offset 1040
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 21504, align 1
store ptr %v, ptr %out, align 8
ret void
}
; 5*4k+1040, large enough to use a loop, has a reminder and
; an extra probe.
define void @static_21520(ptr %out) #0 {
; CHECK-LABEL: static_21520:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa w9, 20496
; CHECK-NEXT: .LBB12_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.ne .LBB12_1
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: .cfi_def_cfa_register wsp
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 21536
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #5, lsl #12 // =20480
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 21520, align 1
store ptr %v, ptr %out, align 8
ret void
}
; A small allocation, but with a very large alignment requirement. We do this
; by moving SP far enough that a sufficiently-aligned block will exist
; somewhere in the stack frame, so must probe the whole of that larger SP move.
define void @static_16_align_8192(ptr %out) #0 {
; CHECK-LABEL: static_16_align_8192:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #1, lsl #12 // =4096
; CHECK-NEXT: sub x9, x9, #4080
; CHECK-NEXT: and x9, x9, #0xffffffffffffe000
; CHECK-NEXT: .LBB13_1: // %entry
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
; CHECK-NEXT: cmp sp, x9
; CHECK-NEXT: b.le .LBB13_3
; CHECK-NEXT: // %bb.2: // %entry
; CHECK-NEXT: // in Loop: Header=BB13_1 Depth=1
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: b .LBB13_1
; CHECK-NEXT: .LBB13_3: // %entry
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: ldr xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 16, align 8192
store ptr %v, ptr %out, align 8
ret void
}
; A small allocation with a very large alignment requirement, but
; nevertheless small enough as to not need a loop.
define void @static_16_align_2048(ptr %out) #0 {
; CHECK-LABEL: static_16_align_2048:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #2032
; CHECK-NEXT: and sp, x9, #0xfffffffffffff800
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 16, align 2048
store ptr %v, ptr %out, align 8
ret void
}
; A large(-ish) allocation with a very large alignment requirement, but
; nevertheless small enough as to not need a loop.
define void @static_2032_align_2048(ptr %out) #0 {
; CHECK-LABEL: static_2032_align_2048:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #2032
; CHECK-NEXT: and sp, x9, #0xfffffffffffff800
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 2032, align 2048
store ptr %v, ptr %out, align 8
ret void
}
; Test stack probing is enabled by module flags
define void @static_9232(ptr %out) uwtable(async) {
; CHECK-LABEL: static_9232:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub sp, sp, #2, lsl #12 // =8192
; CHECK-NEXT: .cfi_def_cfa_offset 8208
; CHECK-NEXT: sub sp, sp, #800
; CHECK-NEXT: .cfi_def_cfa_offset 9008
; CHECK-NEXT: str xzr, [sp], #-240
; CHECK-NEXT: .cfi_def_cfa_offset 9248
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: add sp, sp, #2, lsl #12 // =8192
; CHECK-NEXT: .cfi_def_cfa_offset 1056
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i64 9232, align 1
store ptr %v, ptr %out, align 8
ret void
}
; Test for a tight upper bound on the amount of stack adjustment
; due to stack realignment. No probes should appear.
define void @static_1008(ptr %out) #0 {
; CHECK-LABEL: static_1008:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x9, sp, #1008
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: str x8, [x0]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: .cfi_def_cfa wsp, 16
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
%v = alloca i8, i32 1008, align 32
store ptr %v, ptr %out, align 8
ret void
}
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 4, !"probe-stack", !"inline-asm"}
!1 = !{i32 8, !"stack-probe-size", i32 9000}