llvm-project/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll
Benjamin Maxwell 12f392cff1
[AArch64][SME] Support aarch64-split-sve-objects with VLAs/realignment (#163816)
This was left out of the original patch (#142392) to simplify the
initial implementation. However, after refactoring the SVE
prologue/epilogue code in #162253, it's not much of an extension to
support this case.

The main change here is when restoring the SP from the FP for the SVE
restores, we may need an additional frame offset to move from the start
of the ZPR callee-saves to the start of the PPR callee-saves.

This patch also fixes a previously latent bug where we'd add the
`RealignmentPadding` when allocating the PPR locals, then again for the
ZPR locals. This was unnecessary as the stack only needs to be realigned
after all SVE allocations.
2025-11-04 13:39:20 +00:00

1108 lines
60 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 | FileCheck %s
; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -aarch64-stack-hazard-in-non-streaming -aarch64-split-sve-objects -aarch64-streaming-hazard-size=1024 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
; <GPRs>
; %ppr_local sp+2048+30*vscale (= #15, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding sp+2048+16*vscale
; <hazard padding> sp+1024+16*vscale
; %zpr_local sp+1024
; <hazard padding>
; -> sp
define void @zpr_and_ppr_local(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: zpr_and_ppr_local:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #2048
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: add x8, sp, #2048
; CHECK-NEXT: str p0, [x8, #15, mul vl]
; CHECK-NEXT: add x8, sp, #1024
; CHECK-NEXT: str z0, [x8]
; CHECK-NEXT: add sp, sp, #2048
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: zpr_and_ppr_local_fp
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
; <GPRs>
; -> fp
; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding fp-16*vscale
; <hazard padding> fp-1024-16*vscale
; %zpr_local fp-1024-32*vscale (= #-2, mul vl for str/ldr ZPR)
; <hazard padding>
; -> sp
define void @zpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK-LABEL: zpr_and_ppr_local_fp:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #2048
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: str p0, [x29, #-1, mul vl]
; CHECK-NEXT: str z0, [x8, #-2, mul vl]
; CHECK-NEXT: add sp, sp, #2048
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024
; <GPRs>
; %ppr_local sp+2064+14*vscale (= #7, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding sp+2064
; <hazard padding> sp+1040
; %fpr_local sp+1032
; 8 bytes of padding sp+1024
; <hazard padding>
; -> sp
define void @fpr_and_ppr_local(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: fpr_and_ppr_local:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: add x8, sp, #2064
; CHECK-NEXT: str p0, [x8, #7, mul vl]
; CHECK-NEXT: str d0, [sp, #1032]
; CHECK-NEXT: add sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%fpr_local = alloca double
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile double %double, ptr %fpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: fpr_and_ppr_local_fp
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-16 x vscale], Type: Variable, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-16 x vscale], Type: Variable, Align: 16, Size: 1024
; <GPRs>
; -> fp
; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding
; <hazard padding>
; %fpr_local sp+1032
; 8 bytes of padding sp+1024
; <hazard padding>
; -> sp
define void @fpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, double %double) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK-LABEL: fpr_and_ppr_local_fp:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: str p0, [x29, #-1, mul vl]
; CHECK-NEXT: str d0, [sp, #1032]
; CHECK-NEXT: add sp, sp, #2064
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%fpr_local = alloca double
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile double %double, ptr %fpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8
; <CS GPRs>
; %ppr_local sp+2064+30*vscale (= #15, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding
; <hazard padding> sp+1040+16*vscale
; <fpr callee save: z8> sp+1040
; <hazard padding> sp+16
; %gpr_local sp+8
; 8 bytes of padding
; -> sp
define void @gpr_and_ppr_local(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: gpr_and_ppr_local:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2080 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
; CHECK-NEXT: add x8, sp, #2064
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str p0, [x8, #15, mul vl]
; CHECK-NEXT: str x0, [sp, #8]
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed
%ppr_local = alloca <vscale x 16 x i1>
%gpr_local = alloca i64
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile i64 %int, ptr %gpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: gpr_and_ppr_local_fp
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-32 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2064-32 x vscale], Type: Variable, Align: 16, Size: 1024
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2072-32 x vscale], Type: Variable, Align: 8, Size: 8
; <CS GPRs>
; -> fp
; %ppr_local fp-2*vscale (= #-1, mul vl for str/ldr PPR)
; 14 x vscale bytes of padding
; <hazard padding>
; <fpr callee save: z8>
; <hazard padding>
; %gpr_local sp+8
; 8 bytes of padding
; -> sp
define void @gpr_and_ppr_local_fp(<vscale x 16 x i1> %pred, i64 %int) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK-LABEL: gpr_and_ppr_local_fp:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1040
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str p0, [x29, #-1, mul vl]
; CHECK-NEXT: str x0, [sp, #8]
; CHECK-NEXT: add sp, sp, #1040
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
tail call void asm sideeffect "", "~{d8}"() #1 ; Spill an FPR so hazard padding is needed
%ppr_local = alloca <vscale x 16 x i1>
%gpr_local = alloca i64
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile i64 %int, ptr %gpr_local
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1040-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1048-320 x vscale], Type: Variable, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2080-320 x vscale], Type: Variable, Align: 16, Size: 1024
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2088-320 x vscale], Type: Variable, Align: 8, Size: 8
; <CS GPRs>
; <CS PPRs>
; %ppr_local sp+2080+286*vscale (addvl #17, addpl #7)
; 14 * vscale bytes of padding sp+2080+272*vscale
; <hazard padding> sp+1056+272*vscale
; <CS ZPRs> sp+1056+16*vscale
; %zpr_local sp+1056
; %fpr_local sp+1048
; 8 bytes of padding sp+1040
; <hazard padding> sp+16
; %gpr_local sp+8
; 8 bytes of padding sp
; -> sp
define void @all_stack_areas(<vscale x 16 x i1> %pred, double %fp) {
; CHECK-LABEL: all_stack_areas:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-17
; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1056
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0b, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0xa0, 0x01, 0x1e, 0x22 // sp + 2096 + 160 * VG
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1040
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1040
; CHECK-NEXT: add x0, sp, #2080
; CHECK-NEXT: add x8, sp, #2080
; CHECK-NEXT: add x1, sp, #1056
; CHECK-NEXT: addvl x0, x0, #17
; CHECK-NEXT: add x2, sp, #1048
; CHECK-NEXT: add x3, sp, #8
; CHECK-NEXT: addpl x0, x0, #7
; CHECK-NEXT: str d0, [sp, #1048]
; CHECK-NEXT: str p0, [x8, #143, mul vl]
; CHECK-NEXT: bl foo
; CHECK-NEXT: add sp, sp, #1056
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #17
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
%fpr_local = alloca double
; // Needed to sort %fpr_local into the FPR region
store double %fp, ptr %fpr_local
; // Needed to sort %ppr_local into the PPR region
store <vscale x 16 x i1> %pred, ptr %ppr_local
%gpr_local = alloca i64
call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local)
ret void
}
declare void @foo(ptr, ptr, ptr, ptr)
; CHECK-FRAMELAYOUT-LABEL: Function: all_stack_areas_fp
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 16, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-34 x vscale], Type: Variable, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-304 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1056-320 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1064-320 x vscale], Type: Variable, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2096-320 x vscale], Type: Variable, Align: 16, Size: 1024
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2104-320 x vscale], Type: Variable, Align: 8, Size: 8
; <CS GPRs>
; -> fp
; <CS PPRs> fp-32*vscale
; %ppr_local fp-34*vscale (addpl #-17)
; 14 * vscale bytes of padding fp-48*vscale
; <hazard padding> fp-1024-48*vscale
; <CS ZPRs> fp-1024-304*vscale
; %zpr_local sp-1024-320*vscale (addvl #-20)
; %fpr_local sp+1048
; 8 bytes of padding sp+1040
; <hazard padding> sp+16
; %gpr_local sp+8
; 8 bytes of padding sp
; -> sp
define void @all_stack_areas_fp(<vscale x 16 x i1> %pred, double %fp) "frame-pointer"="all" {
; CHECK-LABEL: all_stack_areas_fp:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x28, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-17
; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1056
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w28, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 32 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 40 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 48 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d11 @ cfa - 56 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d12 @ cfa - 64 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d13 @ cfa - 72 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d14 @ cfa - 80 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xa8, 0x7f, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d15 @ cfa - 88 * VG - 1056
; CHECK-NEXT: sub x1, x29, #1024
; CHECK-NEXT: addpl x0, x29, #-17
; CHECK-NEXT: add x2, sp, #1048
; CHECK-NEXT: addvl x1, x1, #-20
; CHECK-NEXT: add x3, sp, #8
; CHECK-NEXT: str d0, [sp, #1048]
; CHECK-NEXT: str p0, [x29, #-17, mul vl]
; CHECK-NEXT: bl foo
; CHECK-NEXT: add sp, sp, #1056
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #17
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
%fpr_local = alloca double
; // Needed to sort %fpr_local into the FPR region
store double %fp, ptr %fpr_local
; // Needed to sort %ppr_local into the PPR region
store <vscale x 16 x i1> %pred, ptr %ppr_local
%gpr_local = alloca i64
call void @foo(ptr %ppr_local, ptr %zpr_local, ptr %fpr_local, ptr %gpr_local)
ret void
}
; CHECK-FRAMELAYOUT-LABEL: Function: svecc_call
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48], Type: Spill, Align: 16, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-56], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-2 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-4 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-6 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-8 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-10 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-12 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-14 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-16 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-18 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-20 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-22 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-64-24 x vscale], Type: Spill, Align: 2, Size: vscale x 2
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-48 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-64 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-80 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-96 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-112 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-128 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-144 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-160 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-176 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-192 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-208 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-224 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-240 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-256 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-272 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-1088-288 x vscale], Type: Spill, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-2112-288 x vscale], Type: Variable, Align: 16, Size: 1024
define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: svecc_call:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: cntd x9
; CHECK-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: .cfi_def_cfa w29, 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w26, -16
; CHECK-NEXT: .cfi_offset w27, -24
; CHECK-NEXT: .cfi_offset w28, -32
; CHECK-NEXT: .cfi_offset vg, -48
; CHECK-NEXT: .cfi_offset w30, -56
; CHECK-NEXT: .cfi_offset w29, -64
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-16
; CHECK-NEXT: str z23, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: tbz w19, #0, .LBB8_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB8_2: // %entry
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: mov w1, #45 // =0x2d
; CHECK-NEXT: mov w2, #37 // =0x25
; CHECK-NEXT: bl memset
; CHECK-NEXT: tbz w19, #0, .LBB8_4
; CHECK-NEXT: // %bb.3: // %entry
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_4: // %entry
; CHECK-NEXT: mov w0, #22647 // =0x5877
; CHECK-NEXT: movk w0, #59491, lsl #16
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: ldr z23, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #16
; CHECK-NEXT: .cfi_restore z8
; CHECK-NEXT: .cfi_restore z9
; CHECK-NEXT: .cfi_restore z10
; CHECK-NEXT: .cfi_restore z11
; CHECK-NEXT: .cfi_restore z12
; CHECK-NEXT: .cfi_restore z13
; CHECK-NEXT: .cfi_restore z14
; CHECK-NEXT: .cfi_restore z15
; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: .cfi_def_cfa wsp, 64
; CHECK-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w26
; CHECK-NEXT: .cfi_restore w27
; CHECK-NEXT: .cfi_restore w28
; CHECK-NEXT: .cfi_restore vg
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: .cfi_restore w29
; CHECK-NEXT: ret
entry:
tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
%call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
ret i32 -396142473
}
declare ptr @memset(ptr, i32, i32)
define void @zpr_and_ppr_local_realignment(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: zpr_and_ppr_local_realignment:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: sub x9, sp, #2064
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl x9, x9, #-2
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: str p0, [x29, #-1, mul vl]
; CHECK-NEXT: str z0, [x8, #-2, mul vl]
; CHECK-NEXT: str x0, [sp]
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
%gpr_local = alloca i64, align 32
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
store volatile i64 %gpr, ptr %gpr_local
ret void
}
define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr)
; CHECK-LABEL: zpr_and_ppr_local_stack_probing:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #2848
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: add x8, sp, #2848
; CHECK-NEXT: str p0, [x8, #15, mul vl]
; CHECK-NEXT: add x8, sp, #1824
; CHECK-NEXT: str z0, [x8]
; CHECK-NEXT: str x0, [sp]
; CHECK-NEXT: add sp, sp, #2848
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
"probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible"
{
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
%gpr_local = alloca i64, i64 100, align 8
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
store volatile i64 %gpr, ptr %gpr_local
ret void
}
; Only PPR callee-saves + a VLA
; Expect: No hazard padding. Frame pointer (x29), p4-p6 callee saves allocated
; with `addvl #-1`, PPR saves restored using frame pointer `addvl sp, x29, #-1`.
define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) {
; CHECK-LABEL: only_ppr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: addvl sp, x29, #-1
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
call void (...) @llvm.fake.use(ptr %alloc)
tail call void asm sideeffect "", "~{p4},~{p5},~{p6}"()
ret void
}
; Only ZPR callee-saves + a VLA
; Expect: Hazard padding, Frame pointer (x29), z8-z10 callee saves allocated
; with `addvl #-3`. ZPR saves restored from `FP - 1024 + addvl #-3`.
define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) {
; CHECK-LABEL: only_zpr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #1056
; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
; CHECK-NEXT: add x29, sp, #1024
; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #1040] // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 24 * VG - 1056
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: addvl sp, x8, #-3
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: sub sp, x29, #1024
; CHECK-NEXT: ldr x19, [sp, #1040] // 8-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #1056
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
call void (...) @llvm.fake.use(ptr %alloc)
tail call void asm sideeffect "", "~{z8},~{z9},~{z10}"()
ret void
}
; PPR+ZPR callee-saves + a VLA
; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) and ZPR (z8-z10)
; callee-saves allocated separately, with hazard padding of 1024 between the
; areas. ZPR callee saves restored by `FP - 1024 + addvl #-4`, PPR callee saves
; restored by `FP + addvl #-1`.
define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) {
; CHECK-LABEL: zpr_ppr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1056
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: addvl sp, x8, #-4
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: addvl sp, x29, #-1
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
call void (...) @llvm.fake.use(ptr %alloc)
tail call void asm sideeffect "", "~{p4},~{p5},~{p6},~{z8},~{z9},~{z10}"()
ret void
}
; Only PPR callee-saves (and ZPR/PPR locals) + a VLA
; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) callee-saves, with
; hazard padding after the PPR callee saves (1024) and after the FPR local area
; (1024) -- coeleased to 2048. Only PPRs restored by moving the SP to
; `FP + addvl #-1`.
define void @sve_locals_only_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) {
; CHECK-LABEL: sve_locals_only_ppr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #2048
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str p0, [x29, #-9, mul vl]
; CHECK-NEXT: str z0, [x8, #-3, mul vl]
; CHECK-NEXT: addvl sp, x29, #-1
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
tail call void asm sideeffect "", "~{p4},~{p5},~{p6}"()
call void (...) @llvm.fake.use(ptr %alloc)
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
ret void
}
; Only ZPR callee-saves (and ZPR/PPR locals) + a VLA
; Expect: Hazard padding, Frame pointer (x29), ZPR (z8-z10) callee-saves, with
; hazard padding before the ZPR callee saves (1024) and after the ZPR local area
; (1024). Only ZPRs restored by moving the SP to `FP - 1024 + addvl #-4`.
define void @sve_locals_only_zpr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) {
; CHECK-LABEL: sve_locals_only_zpr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1056
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str p0, [x29, #-1, mul vl]
; CHECK-NEXT: str z0, [x8, #-5, mul vl]
; CHECK-NEXT: addvl sp, x8, #-4
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
tail call void asm sideeffect "", "~{z8},~{z9},~{z10}"()
call void (...) @llvm.fake.use(ptr %alloc)
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
ret void
}
; PPR+ZPR callee-saves (and ZPR/PPR locals) + a VLA
; Expect: Hazard padding, Frame pointer (x29), PPR (p4-p6) and ZPR (z8-z10)
; callee-saves, with hazard padding before the ZPR callee saves (1024) and after
; the ZPR local area (1024). ZPRs restored by moving the SP to
; `FP - 1024 + addvl #-5`, PPRs restored by moving SP to `FP + addvl #-1`.
define void @sve_locals_zpr_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector) {
; CHECK-LABEL: sve_locals_zpr_ppr_csr_vla:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill
; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #1024
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 24 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 32 * VG - 1056
; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 40 * VG - 1056
; CHECK-NEXT: add x9, x0, #15
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: // fake_use: $x8
; CHECK-NEXT: sub x8, x29, #1024
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str p0, [x29, #-9, mul vl]
; CHECK-NEXT: str z0, [x8, #-6, mul vl]
; CHECK-NEXT: addvl sp, x8, #-5
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: addvl sp, x29, #-1
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
%alloc = alloca i8, i64 %n, align 1
%ppr_local = alloca <vscale x 16 x i1>
%zpr_local = alloca <vscale x 16 x i8>
tail call void asm sideeffect "", "~{p4},~{p5},~{p6},~{z8},~{z9},~{z10}"()
call void (...) @llvm.fake.use(ptr %alloc)
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
ret void
}