We need to be able to address emergency spill slots without requiring a register scavenging. This requires the emergency spill slot to be near the SP or the FP to keep the offset small enough. If there is a large reserved call frame, we can't keep the emergency spill slot near SP. But we might not have a frame pointer. This patch forces the use of a frame pointer when the max reserved call frame is large so we can keep the emergency spill slot near it. This idea is borrowed from AArch64. Multiple MIR tests had to be updated to set the max call frame size as the reserved registers are frozen before mirFileLoaded is called. I copied mirFileLoaded from AArch64, but it appears the register freezing moved after the AArch64 code was written. Fixes #180199.
593 lines
21 KiB
LLVM
593 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
|
|
; RUN: | FileCheck %s -check-prefix=RV64I
|
|
; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \
|
|
; RUN: | FileCheck %s -check-prefix=RV32I
|
|
|
|
; Tests copied from AArch64.
|
|
|
|
; Dynamically-sized allocation, needs a loop which can handle any size at
|
|
; runtime. The final iteration of the loop will temporarily put SP below the
|
|
; target address, but this doesn't break any of the ABI constraints on the
|
|
; stack, and also doesn't probe below the target SP value.
|
|
define void @dynamic(i64 %size, ptr %out) #0 {
|
|
; RV64I-LABEL: dynamic:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: addi s0, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: addi a0, a0, 15
|
|
; RV64I-NEXT: andi a0, a0, -16
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB0_1
|
|
; RV64I-NEXT: # %bb.2:
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: sd a0, 0(a1)
|
|
; RV64I-NEXT: addi sp, s0, -16
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: dynamic:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: addi s0, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: addi a0, a0, 15
|
|
; RV32I-NEXT: andi a0, a0, -16
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB0_1
|
|
; RV32I-NEXT: # %bb.2:
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: sw a0, 0(a2)
|
|
; RV32I-NEXT: addi sp, s0, -16
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 1
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; This function has a fixed-size stack slot and a dynamic one. The fixed size
|
|
; slot isn't large enough that we would normally probe it, but we need to do so
|
|
; here otherwise the gap between the CSR save and the first probe of the
|
|
; dynamic allocation could be too far apart when the size of the dynamic
|
|
; allocation is close to the guard size.
|
|
define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 {
|
|
; RV64I-LABEL: dynamic_fixed:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -80
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 80
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: addi s0, sp, 80
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: addi a3, s0, -80
|
|
; RV64I-NEXT: addi a0, a0, 15
|
|
; RV64I-NEXT: sd a3, 0(a1)
|
|
; RV64I-NEXT: andi a0, a0, -16
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: lui a1, 1
|
|
; RV64I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a1
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB1_1
|
|
; RV64I-NEXT: # %bb.2:
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: sd a0, 0(a2)
|
|
; RV64I-NEXT: addi sp, s0, -80
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 80
|
|
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: addi sp, sp, 80
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: dynamic_fixed:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -80
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 80
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: addi s0, sp, 80
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: addi a1, s0, -72
|
|
; RV32I-NEXT: addi a0, a0, 15
|
|
; RV32I-NEXT: sw a1, 0(a2)
|
|
; RV32I-NEXT: andi a0, a0, -16
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB1_1
|
|
; RV32I-NEXT: # %bb.2:
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: sw a0, 0(a3)
|
|
; RV32I-NEXT: addi sp, s0, -80
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 80
|
|
; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: addi sp, sp, 80
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
%v1 = alloca i8, i64 64, align 1
|
|
store ptr %v1, ptr %out1, align 8
|
|
%v2 = alloca i8, i64 %size, align 1
|
|
store ptr %v2, ptr %out2, align 8
|
|
ret void
|
|
}
|
|
|
|
; Dynamic allocation, with an alignment requirement greater than the alignment
|
|
; of SP. Done by ANDing the target SP with a constant to align it down, then
|
|
; doing the loop as normal. Note that we also re-align the stack in the prolog,
|
|
; which isn't actually needed because the only aligned allocations are dynamic,
|
|
; this is done even without stack probing.
|
|
define void @dynamic_align_64(i64 %size, ptr %out) #0 {
|
|
; RV64I-LABEL: dynamic_align_64:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -64
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 64
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: .cfi_offset s1, -24
|
|
; RV64I-NEXT: addi s0, sp, 64
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: andi sp, sp, -64
|
|
; RV64I-NEXT: mv s1, sp
|
|
; RV64I-NEXT: addi a0, a0, 15
|
|
; RV64I-NEXT: andi a0, a0, -16
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: andi a0, a0, -64
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB2_1
|
|
; RV64I-NEXT: # %bb.2:
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: sd a0, 0(a1)
|
|
; RV64I-NEXT: addi sp, s0, -64
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 64
|
|
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: .cfi_restore s1
|
|
; RV64I-NEXT: addi sp, sp, 64
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: dynamic_align_64:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -64
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 64
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: .cfi_offset s1, -12
|
|
; RV32I-NEXT: addi s0, sp, 64
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: andi sp, sp, -64
|
|
; RV32I-NEXT: mv s1, sp
|
|
; RV32I-NEXT: addi a0, a0, 15
|
|
; RV32I-NEXT: andi a0, a0, -16
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: andi a0, a0, -64
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB2_1
|
|
; RV32I-NEXT: # %bb.2:
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: sw a0, 0(a2)
|
|
; RV32I-NEXT: addi sp, s0, -64
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 64
|
|
; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: .cfi_restore s1
|
|
; RV32I-NEXT: addi sp, sp, 64
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 64
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; Dynamic allocation, with an alignment greater than the stack guard size. The
|
|
; only difference to the dynamic allocation is the constant used for aligning
|
|
; the target SP, the loop will probe the whole allocation without needing to
|
|
; know about the alignment padding.
|
|
define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
|
|
; RV64I-LABEL: dynamic_align_8192:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: .cfi_offset s1, -24
|
|
; RV64I-NEXT: addi s0, sp, 2032
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: addi sp, sp, -2048
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: srli a2, sp, 13
|
|
; RV64I-NEXT: slli sp, a2, 13
|
|
; RV64I-NEXT: mv s1, sp
|
|
; RV64I-NEXT: addi a0, a0, 15
|
|
; RV64I-NEXT: lui a2, 1048574
|
|
; RV64I-NEXT: andi a0, a0, -16
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: and a0, a0, a2
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB3_1
|
|
; RV64I-NEXT: # %bb.2:
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: sd a0, 0(a1)
|
|
; RV64I-NEXT: addi sp, s0, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 2032
|
|
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: .cfi_restore s1
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: dynamic_align_8192:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: .cfi_offset s1, -12
|
|
; RV32I-NEXT: addi s0, sp, 2032
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: addi sp, sp, -2048
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: srli a1, sp, 13
|
|
; RV32I-NEXT: slli sp, a1, 13
|
|
; RV32I-NEXT: mv s1, sp
|
|
; RV32I-NEXT: addi a0, a0, 15
|
|
; RV32I-NEXT: lui a1, 1048574
|
|
; RV32I-NEXT: andi a0, a0, -16
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: and a0, a0, a1
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB3_1
|
|
; RV32I-NEXT: # %bb.2:
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: sw a0, 0(a2)
|
|
; RV32I-NEXT: addi sp, s0, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 2032
|
|
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s1, 2020(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: .cfi_restore s1
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
%v = alloca i8, i64 %size, align 8192
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; If a function has variable-sized stack objects, then any function calls which
|
|
; need to pass arguments on the stack must allocate the stack space for them
|
|
; dynamically, to ensure they are at the bottom of the frame.
|
|
define void @no_reserved_call_frame(i64 %n) #0 {
|
|
; RV64I-LABEL: no_reserved_call_frame:
|
|
; RV64I: # %bb.0: # %entry
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: addi s0, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: slli a0, a0, 2
|
|
; RV64I-NEXT: addi a0, a0, 15
|
|
; RV64I-NEXT: andi a0, a0, -16
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: lui a1, 1
|
|
; RV64I-NEXT: .LBB4_1: # %entry
|
|
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a1
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB4_1
|
|
; RV64I-NEXT: # %bb.2: # %entry
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: lui a1, 1
|
|
; RV64I-NEXT: sub sp, sp, a1
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: call callee_stack_args
|
|
; RV64I-NEXT: lui a0, 1
|
|
; RV64I-NEXT: add sp, sp, a0
|
|
; RV64I-NEXT: addi sp, s0, -16
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: no_reserved_call_frame:
|
|
; RV32I: # %bb.0: # %entry
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: addi s0, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: slli a0, a0, 2
|
|
; RV32I-NEXT: addi a0, a0, 15
|
|
; RV32I-NEXT: andi a0, a0, -16
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB4_1: # %entry
|
|
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB4_1
|
|
; RV32I-NEXT: # %bb.2: # %entry
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: addi sp, sp, -32
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: call callee_stack_args
|
|
; RV32I-NEXT: lui a0, 1
|
|
; RV32I-NEXT: addi a0, a0, 32
|
|
; RV32I-NEXT: add sp, sp, a0
|
|
; RV32I-NEXT: addi sp, s0, -16
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
entry:
|
|
%v = alloca i32, i64 %n
|
|
call void @callee_stack_args(ptr %v, [518 x i64] poison)
|
|
ret void
|
|
}
|
|
|
|
; Same as above but without a variable-sized allocation, so the reserved call
|
|
; frame can be folded into the fixed-size allocation in the prologue.
|
|
define void @reserved_call_frame(i64 %n) #0 {
|
|
; RV64I-LABEL: reserved_call_frame:
|
|
; RV64I: # %bb.0: # %entry
|
|
; RV64I-NEXT: addi sp, sp, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: addi s0, sp, 2032
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: addi sp, sp, -64
|
|
; RV64I-NEXT: lui a0, 1
|
|
; RV64I-NEXT: sub sp, sp, a0
|
|
; RV64I-NEXT: addi a0, s0, -2048
|
|
; RV64I-NEXT: addi a0, a0, -48
|
|
; RV64I-NEXT: call callee_stack_args
|
|
; RV64I-NEXT: lui a0, 1
|
|
; RV64I-NEXT: add sp, sp, a0
|
|
; RV64I-NEXT: addi sp, s0, -2032
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 2032
|
|
; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: addi sp, sp, 2032
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: reserved_call_frame:
|
|
; RV32I: # %bb.0: # %entry
|
|
; RV32I-NEXT: addi sp, sp, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 2032
|
|
; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: addi s0, sp, 2032
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: addi sp, sp, -64
|
|
; RV32I-NEXT: lui a0, 1
|
|
; RV32I-NEXT: sub sp, sp, a0
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: addi sp, sp, -32
|
|
; RV32I-NEXT: addi a0, s0, -2048
|
|
; RV32I-NEXT: addi a0, a0, -36
|
|
; RV32I-NEXT: call callee_stack_args
|
|
; RV32I-NEXT: lui a0, 1
|
|
; RV32I-NEXT: addi a0, a0, 32
|
|
; RV32I-NEXT: add sp, sp, a0
|
|
; RV32I-NEXT: addi sp, s0, -2032
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 2032
|
|
; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: addi sp, sp, 2032
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
entry:
|
|
%v = alloca i32, i64 518
|
|
call void @callee_stack_args(ptr %v, [518 x i64] poison)
|
|
ret void
|
|
}
|
|
|
|
declare void @callee_stack_args(ptr, [518 x i64])
|
|
|
|
; Dynamic allocation of vectors
|
|
define void @dynamic_vector(i64 %size, ptr %out) #0 {
|
|
; RV64I-LABEL: dynamic_vector:
|
|
; RV64I: # %bb.0:
|
|
; RV64I-NEXT: addi sp, sp, -16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
|
|
; RV64I-NEXT: .cfi_offset ra, -8
|
|
; RV64I-NEXT: .cfi_offset s0, -16
|
|
; RV64I-NEXT: addi s0, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV64I-NEXT: csrr a2, vlenb
|
|
; RV64I-NEXT: mul a0, a2, a0
|
|
; RV64I-NEXT: slli a0, a0, 1
|
|
; RV64I-NEXT: sub a0, sp, a0
|
|
; RV64I-NEXT: lui a2, 1
|
|
; RV64I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
|
|
; RV64I-NEXT: sub sp, sp, a2
|
|
; RV64I-NEXT: sd zero, 0(sp)
|
|
; RV64I-NEXT: blt a0, sp, .LBB6_1
|
|
; RV64I-NEXT: # %bb.2:
|
|
; RV64I-NEXT: mv sp, a0
|
|
; RV64I-NEXT: sd a0, 0(a1)
|
|
; RV64I-NEXT: addi sp, s0, -16
|
|
; RV64I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
|
|
; RV64I-NEXT: .cfi_restore ra
|
|
; RV64I-NEXT: .cfi_restore s0
|
|
; RV64I-NEXT: addi sp, sp, 16
|
|
; RV64I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV64I-NEXT: ret
|
|
;
|
|
; RV32I-LABEL: dynamic_vector:
|
|
; RV32I: # %bb.0:
|
|
; RV32I-NEXT: addi sp, sp, -16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 16
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
|
|
; RV32I-NEXT: .cfi_offset ra, -4
|
|
; RV32I-NEXT: .cfi_offset s0, -8
|
|
; RV32I-NEXT: addi s0, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa s0, 0
|
|
; RV32I-NEXT: csrr a1, vlenb
|
|
; RV32I-NEXT: mul a0, a1, a0
|
|
; RV32I-NEXT: slli a0, a0, 1
|
|
; RV32I-NEXT: sub a0, sp, a0
|
|
; RV32I-NEXT: lui a1, 1
|
|
; RV32I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
|
|
; RV32I-NEXT: sub sp, sp, a1
|
|
; RV32I-NEXT: sw zero, 0(sp)
|
|
; RV32I-NEXT: blt a0, sp, .LBB6_1
|
|
; RV32I-NEXT: # %bb.2:
|
|
; RV32I-NEXT: mv sp, a0
|
|
; RV32I-NEXT: sw a0, 0(a2)
|
|
; RV32I-NEXT: addi sp, s0, -16
|
|
; RV32I-NEXT: .cfi_def_cfa sp, 16
|
|
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
|
|
; RV32I-NEXT: .cfi_restore ra
|
|
; RV32I-NEXT: .cfi_restore s0
|
|
; RV32I-NEXT: addi sp, sp, 16
|
|
; RV32I-NEXT: .cfi_def_cfa_offset 0
|
|
; RV32I-NEXT: ret
|
|
%v = alloca <vscale x 4 x float>, i64 %size, align 16
|
|
store ptr %v, ptr %out, align 8
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" }
|