Noah Goldstein ee5585ed09 Recommit "Improve and enable folding of conditional branches with tail calls." (2nd Try)
Improve and enable folding of conditional branches with tail calls.

1. Make it so that conditional tail calls can be emitted even when
   there are multiple predecessors.

2. Don't guard the transformation behind -Os. The rationale for
   guarding it was static-prediction can be affected by whether the
   branch is forward of backward. This is no longer true for almost any
   X86 cpus (anything newer than `SnB`) so is no longer a meaningful
   concern.

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D140931
2023-02-06 14:09:17 -06:00

285 lines
8.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
; rdar://7475489
define i32 @test1(i32 %a, i32 %b) nounwind ssp {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorb {{[0-9]+}}(%esp), %al
; CHECK-NEXT: testb $64, %al
; CHECK-NEXT: jne _bar ## TAILCALL
; CHECK-NEXT: ## %bb.1: ## %bb
; CHECK-NEXT: jmp _foo ## TAILCALL
entry:
%0 = and i32 %a, 16384
%1 = icmp ne i32 %0, 0
%2 = and i32 %b, 16384
%3 = icmp ne i32 %2, 0
%4 = xor i1 %1, %3
br i1 %4, label %bb1, label %bb
bb: ; preds = %entry
%5 = tail call i32 (...) @foo() nounwind ; <i32> [#uses=1]
ret i32 %5
bb1: ; preds = %entry
%6 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
ret i32 %6
}
declare i32 @foo(...)
declare i32 @bar(...)
; <rdar://problem/7598384>:
;
; jCC L1
; jmp L2
; L1:
; ...
; L2:
; ...
;
; to:
;
; jnCC L2
; L1:
; ...
; L2:
; ...
define float @test4(float %x, float %y) nounwind readnone optsize ssp {
; CHECK-LABEL: test4:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: cvtss2sd {{[0-9]+}}(%esp), %xmm1
; CHECK-NEXT: cvtss2sd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: mulsd %xmm1, %xmm0
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: ucomisd %xmm1, %xmm0
; CHECK-NEXT: jne LBB1_1
; CHECK-NEXT: jnp LBB1_2
; CHECK-NEXT: LBB1_1: ## %bb1
; CHECK-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK-NEXT: LBB1_2: ## %bb2
; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0
; CHECK-NEXT: movss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
entry:
%0 = fpext float %x to double ; <double> [#uses=1]
%1 = fpext float %y to double ; <double> [#uses=1]
%2 = fmul double %0, %1 ; <double> [#uses=3]
%3 = fcmp oeq double %2, 0.000000e+00 ; <i1> [#uses=1]
br i1 %3, label %bb2, label %bb1
bb1: ; preds = %entry
%4 = fadd double %2, -1.000000e+00 ; <double> [#uses=1]
br label %bb2
bb2: ; preds = %entry, %bb1
%.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] ; <double> [#uses=1]
%.0 = fptrunc double %.0.in to float ; <float> [#uses=1]
ret float %.0
}
declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test5:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: jne LBB2_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB2_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test7:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: jne LBB3_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB3_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test8:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: jae LBB4_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB4_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test10:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: jae LBB5_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB5_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test11:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: jne LBB6_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB6_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp eq i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: test12:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: ptest %xmm0, %xmm0
; CHECK-NEXT: je LBB7_2
; CHECK-NEXT: ## %bb.1: ## %bb1
; CHECK-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
; CHECK-NEXT: LBB7_2: ## %bb2
; CHECK-NEXT: divps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retl
entry:
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}