
This is based on other targets like PPC/AArch64 and some experiments. This PR will only enable bidirectional scheduling and tracking register pressure. Disclaimer: I haven't tested it on many cores, maybe we should make some options being features. I believe downstreams must have tried this before, so feedbacks are welcome.
637 lines
25 KiB
LLVM
637 lines
25 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
|
|
; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \
|
|
; RUN: -code-model=large -o - %s \
|
|
; RUN: | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP
|
|
; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s
|
|
|
|
; Perform tail call optimization for global address.
|
|
declare i32 @callee_tail(i32 %i)
|
|
define i32 @caller_tail(i32 %i) nounwind {
|
|
; CHECK-LABEL: caller_tail:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: tail callee_tail
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_tail:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi0:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI0_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi0)(a1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
entry:
|
|
%r = tail call i32 @callee_tail(i32 %i)
|
|
ret i32 %r
|
|
}
|
|
|
|
; Perform tail call optimization for external symbol.
|
|
@dest = global [2 x i8] zeroinitializer
|
|
declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1)
|
|
define void @caller_extern(ptr %src) optsize {
|
|
; CHECK-LABEL: caller_extern:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lui a1, %hi(dest)
|
|
; CHECK-NEXT: addi a1, a1, %lo(dest)
|
|
; CHECK-NEXT: li a2, 7
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: mv a0, a1
|
|
; CHECK-NEXT: mv a1, a3
|
|
; CHECK-NEXT: tail memcpy
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_extern:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi1:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi2:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI1_1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi2)(a2)
|
|
; CHECK-LARGE-ZICFILP-NEXT: li a2, 7
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Perform tail call optimization for external symbol.
|
|
@dest_pgso = global [2 x i8] zeroinitializer
|
|
define void @caller_extern_pgso(ptr %src) !prof !14 {
|
|
; CHECK-LABEL: caller_extern_pgso:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: lui a1, %hi(dest_pgso)
|
|
; CHECK-NEXT: addi a1, a1, %lo(dest_pgso)
|
|
; CHECK-NEXT: li a2, 7
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: mv a0, a1
|
|
; CHECK-NEXT: mv a1, a3
|
|
; CHECK-NEXT: tail memcpy
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi3:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI2_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi4:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI2_1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi3)(a1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi4)(a2)
|
|
; CHECK-LARGE-ZICFILP-NEXT: li a2, 7
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
entry:
|
|
tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Perform indirect tail call optimization (for function pointer call).
|
|
declare void @callee_indirect1()
|
|
declare void @callee_indirect2()
|
|
define void @caller_indirect_tail(i32 %a) nounwind {
|
|
; CHECK-LABEL: caller_indirect_tail:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: beqz a0, .LBB3_2
|
|
; CHECK-NEXT: # %bb.1: # %entry
|
|
; CHECK-NEXT: lui t1, %hi(callee_indirect2)
|
|
; CHECK-NEXT: addi t1, t1, %lo(callee_indirect2)
|
|
; CHECK-NEXT: jr t1
|
|
; CHECK-NEXT: .LBB3_2:
|
|
; CHECK-NEXT: lui t1, %hi(callee_indirect1)
|
|
; CHECK-NEXT: addi t1, t1, %lo(callee_indirect1)
|
|
; CHECK-NEXT: jr t1
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB3_2
|
|
; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi6:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi6)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t1
|
|
; CHECK-LARGE-ZICFILP-NEXT: .LBB3_2:
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi5:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI3_1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t1, %pcrel_lo(.Lpcrel_hi5)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t1
|
|
entry:
|
|
%tobool = icmp eq i32 %a, 0
|
|
%callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
|
|
tail call void %callee()
|
|
ret void
|
|
}
|
|
|
|
; Make sure we don't use t0 as the source for jr as that is a hint to pop the
|
|
; return address stack on some microarchitectures.
|
|
define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
|
|
; CHECK-LABEL: caller_indirect_no_t0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mv t1, a0
|
|
; CHECK-NEXT: mv a0, a1
|
|
; CHECK-NEXT: mv a1, a2
|
|
; CHECK-NEXT: mv a2, a3
|
|
; CHECK-NEXT: mv a3, a4
|
|
; CHECK-NEXT: mv a4, a5
|
|
; CHECK-NEXT: mv a5, a6
|
|
; CHECK-NEXT: mv a6, a7
|
|
; CHECK-NEXT: jr t1
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0:
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv t1, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a1, a2
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a2, a3
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a3, a4
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a4, a5
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a5, a6
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a6, a7
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t1
|
|
%9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
|
|
ret i32 %9
|
|
}
|
|
|
|
; Do not tail call optimize functions with varargs passed by stack.
|
|
declare i32 @callee_varargs(i32, ...)
|
|
define void @caller_varargs(i32 %a, i32 %b) nounwind {
|
|
; CHECK-LABEL: caller_varargs:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a0, 0(sp)
|
|
; CHECK-NEXT: mv a2, a1
|
|
; CHECK-NEXT: mv a3, a0
|
|
; CHECK-NEXT: mv a4, a0
|
|
; CHECK-NEXT: mv a5, a1
|
|
; CHECK-NEXT: mv a6, a1
|
|
; CHECK-NEXT: mv a7, a0
|
|
; CHECK-NEXT: call callee_varargs
|
|
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_varargs:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi7:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI5_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi7)(a2)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a2, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a4, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a5, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a6, a1
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a7, a0
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
|
|
ret void
|
|
}
|
|
|
|
; Do not tail call optimize if stack is used to pass parameters.
|
|
declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
|
|
define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
|
|
; CHECK-LABEL: caller_args:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -32
|
|
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: lw t0, 32(sp)
|
|
; CHECK-NEXT: lw t1, 36(sp)
|
|
; CHECK-NEXT: lw t2, 40(sp)
|
|
; CHECK-NEXT: lw t3, 44(sp)
|
|
; CHECK-NEXT: lw t4, 48(sp)
|
|
; CHECK-NEXT: lw t5, 52(sp)
|
|
; CHECK-NEXT: sw t4, 16(sp)
|
|
; CHECK-NEXT: sw t5, 20(sp)
|
|
; CHECK-NEXT: sw t0, 0(sp)
|
|
; CHECK-NEXT: sw t1, 4(sp)
|
|
; CHECK-NEXT: sw t2, 8(sp)
|
|
; CHECK-NEXT: sw t3, 12(sp)
|
|
; CHECK-NEXT: call callee_args
|
|
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 32
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_args:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
%r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
|
|
ret i32 %r
|
|
}
|
|
|
|
; Do not tail call optimize if parameters need to be passed indirectly.
|
|
declare i32 @callee_indirect_args(fp128 %a)
|
|
define void @caller_indirect_args() nounwind {
|
|
; CHECK-LABEL: caller_indirect_args:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -32
|
|
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: lui a1, 262128
|
|
; CHECK-NEXT: mv a0, sp
|
|
; CHECK-NEXT: sw zero, 0(sp)
|
|
; CHECK-NEXT: sw zero, 4(sp)
|
|
; CHECK-NEXT: sw zero, 8(sp)
|
|
; CHECK-NEXT: sw a1, 12(sp)
|
|
; CHECK-NEXT: call callee_indirect_args
|
|
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 32
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
|
|
ret void
|
|
}
|
|
|
|
; Perform tail call optimization for external weak symbol.
|
|
declare extern_weak void @callee_weak()
|
|
define void @caller_weak() nounwind {
|
|
; CHECK-LABEL: caller_weak:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: tail callee_weak
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_weak:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi10:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi10)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
entry:
|
|
tail call void @callee_weak()
|
|
ret void
|
|
}
|
|
|
|
; Exception-handling functions need a special set of instructions to indicate a
|
|
; return to the hardware. Tail-calling another function would probably break
|
|
; this.
|
|
declare void @callee_irq()
|
|
define void @caller_irq() nounwind "interrupt"="machine" {
|
|
; CHECK-LABEL: caller_irq:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -64
|
|
; CHECK-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t0, 56(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t1, 52(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t2, 48(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a6, 20(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw a7, 16(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t3, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t5, 4(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: sw t6, 0(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: call callee_irq
|
|
; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t0, 56(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t1, 52(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t2, 48(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a4, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw a7, 16(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t4, 8(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: lw t6, 0(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 64
|
|
; CHECK-NEXT: mret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_irq:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -64
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t0, 56(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t1, 52(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t2, 48(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a1, 40(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a2, 36(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a3, 32(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a4, 28(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a5, 24(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a6, 20(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a7, 16(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t4, 8(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t5, 4(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw t6, 0(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi11:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI9_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi11)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t0, 56(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t1, 52(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a0, 44(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a1, 40(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a2, 36(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a4, 28(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a5, 24(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a6, 20(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a7, 16(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t3, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t4, 8(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t5, 4(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t6, 0(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 64
|
|
; CHECK-LARGE-ZICFILP-NEXT: mret
|
|
entry:
|
|
tail call void @callee_irq()
|
|
ret void
|
|
}
|
|
|
|
; Byval parameters hand the function a pointer directly into the stack area
|
|
; we want to reuse during a tail call. Do not tail call optimize functions with
|
|
; byval parameters.
|
|
declare i32 @callee_byval(ptr byval(ptr) %a)
|
|
define i32 @caller_byval() nounwind {
|
|
; CHECK-LABEL: caller_byval:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: lw a0, 8(sp)
|
|
; CHECK-NEXT: sw a0, 4(sp)
|
|
; CHECK-NEXT: addi a0, sp, 4
|
|
; CHECK-NEXT: call callee_byval
|
|
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp)
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
%a = alloca ptr
|
|
%r = tail call i32 @callee_byval(ptr byval(ptr) %a)
|
|
ret i32 %r
|
|
}
|
|
|
|
; Do not tail call optimize if callee uses structret semantics.
|
|
%struct.A = type { i32 }
|
|
@a = global %struct.A zeroinitializer
|
|
|
|
declare void @callee_struct(ptr sret(%struct.A) %a)
|
|
define void @caller_nostruct() nounwind {
|
|
; CHECK-LABEL: caller_nostruct:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: lui a0, %hi(a)
|
|
; CHECK-NEXT: addi a0, a0, %lo(a)
|
|
; CHECK-NEXT: call callee_struct
|
|
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi13:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI11_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi14:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI11_1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi13)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi14)(a1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
tail call void @callee_struct(ptr sret(%struct.A) @a)
|
|
ret void
|
|
}
|
|
|
|
; Do not tail call optimize if caller uses structret semantics.
|
|
declare void @callee_nostruct()
|
|
define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
|
|
; CHECK-LABEL: caller_struct:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: call callee_nostruct
|
|
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: caller_struct:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi15:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI12_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi15)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
tail call void @callee_nostruct()
|
|
ret void
|
|
}
|
|
|
|
; Do not tail call optimize if disabled.
|
|
define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
|
|
; CHECK-LABEL: disable_tail_calls:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: addi sp, sp, -16
|
|
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-NEXT: call callee_tail
|
|
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-NEXT: addi sp, sp, 16
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
|
|
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi16:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI13_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi16)(a1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
|
|
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
|
|
; CHECK-LARGE-ZICFILP-NEXT: ret
|
|
entry:
|
|
%rv = tail call i32 @callee_tail(i32 %i)
|
|
ret i32 %rv
|
|
}
|
|
|
|
; Duplicate returns to enable tail call optimizations.
|
|
declare i32 @test()
|
|
declare i32 @test1()
|
|
declare i32 @test2()
|
|
declare i32 @test3()
|
|
define i32 @duplicate_returns(i32 %a, i32 %b) nounwind {
|
|
; CHECK-LABEL: duplicate_returns:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: beqz a0, .LBB14_4
|
|
; CHECK-NEXT: # %bb.1: # %if.else
|
|
; CHECK-NEXT: beqz a1, .LBB14_5
|
|
; CHECK-NEXT: # %bb.2: # %if.else4
|
|
; CHECK-NEXT: bge a1, a0, .LBB14_6
|
|
; CHECK-NEXT: # %bb.3: # %if.then6
|
|
; CHECK-NEXT: tail test2
|
|
; CHECK-NEXT: .LBB14_4: # %if.then
|
|
; CHECK-NEXT: tail test
|
|
; CHECK-NEXT: .LBB14_5: # %if.then2
|
|
; CHECK-NEXT: tail test1
|
|
; CHECK-NEXT: .LBB14_6: # %if.else8
|
|
; CHECK-NEXT: tail test3
|
|
;
|
|
; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns:
|
|
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
|
|
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
|
|
; CHECK-LARGE-ZICFILP-NEXT: beqz a0, .LBB14_4
|
|
; CHECK-LARGE-ZICFILP-NEXT: # %bb.1: # %if.else
|
|
; CHECK-LARGE-ZICFILP-NEXT: beqz a1, .LBB14_5
|
|
; CHECK-LARGE-ZICFILP-NEXT: # %bb.2: # %if.else4
|
|
; CHECK-LARGE-ZICFILP-NEXT: bge a1, a0, .LBB14_6
|
|
; CHECK-LARGE-ZICFILP-NEXT: # %bb.3: # %if.then6
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi19:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_1)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi19)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: .LBB14_4: # %if.then
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi17:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_3)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi17)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: .LBB14_5: # %if.then2
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi18:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_2)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi18)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
; CHECK-LARGE-ZICFILP-NEXT: .LBB14_6: # %if.else8
|
|
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi20:
|
|
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI14_0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi20)(a0)
|
|
; CHECK-LARGE-ZICFILP-NEXT: jr t2
|
|
entry:
|
|
%cmp = icmp eq i32 %a, 0
|
|
br i1 %cmp, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %entry
|
|
%call = tail call i32 @test()
|
|
br label %return
|
|
|
|
if.else: ; preds = %entry
|
|
%cmp1 = icmp eq i32 %b, 0
|
|
br i1 %cmp1, label %if.then2, label %if.else4
|
|
|
|
if.then2: ; preds = %if.else
|
|
%call3 = tail call i32 @test1()
|
|
br label %return
|
|
|
|
if.else4: ; preds = %if.else
|
|
%cmp5 = icmp sgt i32 %a, %b
|
|
br i1 %cmp5, label %if.then6, label %if.else8
|
|
|
|
if.then6: ; preds = %if.else4
|
|
%call7 = tail call i32 @test2()
|
|
br label %return
|
|
|
|
if.else8: ; preds = %if.else4
|
|
%call9 = tail call i32 @test3()
|
|
br label %return
|
|
|
|
return: ; preds = %if.else8, %if.then6, %if.then2, %if.then
|
|
%retval = phi i32 [ %call, %if.then ], [ %call3, %if.then2 ], [ %call7, %if.then6 ], [ %call9, %if.else8 ]
|
|
ret i32 %retval
|
|
}
|
|
|
|
!llvm.module.flags = !{!0}
|
|
!0 = !{i32 1, !"ProfileSummary", !1}
|
|
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
|
|
!2 = !{!"ProfileFormat", !"InstrProf"}
|
|
!3 = !{!"TotalCount", i64 10000}
|
|
!4 = !{!"MaxCount", i64 10}
|
|
!5 = !{!"MaxInternalCount", i64 1}
|
|
!6 = !{!"MaxFunctionCount", i64 1000}
|
|
!7 = !{!"NumCounts", i64 3}
|
|
!8 = !{!"NumFunctions", i64 3}
|
|
!9 = !{!"DetailedSummary", !10}
|
|
!10 = !{!11, !12, !13}
|
|
!11 = !{i32 10000, i64 100, i32 1}
|
|
!12 = !{i32 999000, i64 100, i32 1}
|
|
!13 = !{i32 999999, i64 1, i32 2}
|
|
!14 = !{!"function_entry_count", i64 0}
|