To guarantee convergence of the algorithm each optimization step should decrease number of instructions when IR is modified. This property is not held in this test case. The problem is that SCEV Expander may do "unexpected" reassociation what results in creation of new min/max chains and introduction of extra instructions. As a result on each step we indefinitely optimize back and forth. The solution is to restrict SCEV Expander to perform uncontrolled reassociations by means of "Unknown" expressions. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D112060
105 lines
4.3 KiB
LLVM
105 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -nary-reassociate -S | FileCheck %s
|
|
; RUN: opt < %s -passes='nary-reassociate' -S | FileCheck %s
|
|
|
|
declare i32 @llvm.smax.i32(i32 %a, i32 %b)
|
|
declare i64 @llvm.umin.i64(i64, i64)
|
|
|
|
; This is a negative test. We should not optimize if intermediate result
|
|
; has a use outside of optimizable pattern. In other words %smax2 has one
|
|
; use from %smax3 and side use from %res2.
|
|
define i32 @smax_test1(i32 %a, i32 %b, i32 %c) {
|
|
; CHECK-LABEL: @smax_test1(
|
|
; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
|
|
; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[C1]], i32 [[A]], i32 [[B]]
|
|
; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[B]], [[C:%.*]]
|
|
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[C2]], i32 [[B]], i32 [[C]]
|
|
; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[SMAX2]], [[A]]
|
|
; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[C3]], i32 [[SMAX2]], i32 [[A]]
|
|
; CHECK-NEXT: [[RES:%.*]] = add i32 [[SMAX1]], [[SMAX3]]
|
|
; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES]], [[SMAX2]]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
%c1 = icmp sgt i32 %a, %b
|
|
%smax1 = select i1 %c1, i32 %a, i32 %b
|
|
%c2 = icmp sgt i32 %b, %c
|
|
%smax2 = select i1 %c2, i32 %b, i32 %c
|
|
%c3 = icmp sgt i32 %smax2, %a
|
|
%smax3 = select i1 %c3, i32 %smax2, i32 %a
|
|
%res = add i32 %smax1, %smax3
|
|
%res2 = add i32 %res, %smax2
|
|
ret i32 %res
|
|
}
|
|
|
|
; This is a negative test. It similar to the previous one
|
|
; but a bit more complex. In particular after first iteration
|
|
; e10 is replaced with %e10.nary = call i64 @llvm.umin.i64(i64 %e5, i64 %e).
|
|
; No more reassociation should be applied to %e10.nary since
|
|
; %e5 has side use in %e6.
|
|
define void @test2(i64 %arg) {
|
|
; CHECK-LABEL: @test2(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[E:%.*]] = sub i64 undef, 0
|
|
; CHECK-NEXT: [[E1:%.*]] = sub i64 [[ARG:%.*]], 0
|
|
; CHECK-NEXT: [[E2:%.*]] = call i64 @llvm.umin.i64(i64 [[E]], i64 [[E1]])
|
|
; CHECK-NEXT: [[E3:%.*]] = call i64 @llvm.umin.i64(i64 [[E2]], i64 16384)
|
|
; CHECK-NEXT: [[E4:%.*]] = sub i64 [[ARG]], 0
|
|
; CHECK-NEXT: [[E5:%.*]] = call i64 @llvm.umin.i64(i64 [[E4]], i64 16384)
|
|
; CHECK-NEXT: [[E6:%.*]] = icmp ugt i64 [[E5]], 0
|
|
; CHECK-NEXT: [[E7:%.*]] = sub i64 undef, 0
|
|
; CHECK-NEXT: [[E10_NARY:%.*]] = call i64 @llvm.umin.i64(i64 [[E5]], i64 [[E7]])
|
|
; CHECK-NEXT: unreachable
|
|
;
|
|
bb:
|
|
%e = sub i64 undef, 0
|
|
%e1 = sub i64 %arg, 0
|
|
%e2 = call i64 @llvm.umin.i64(i64 %e, i64 %e1)
|
|
%e3 = call i64 @llvm.umin.i64(i64 %e2, i64 16384)
|
|
%e4 = sub i64 %arg, 0
|
|
%e5 = call i64 @llvm.umin.i64(i64 %e4, i64 16384)
|
|
%e6 = icmp ugt i64 %e5, 0
|
|
%e7 = sub i64 undef, 0
|
|
%e8 = sub i64 %arg, 0
|
|
%e9 = call i64 @llvm.umin.i64(i64 %e7, i64 %e8)
|
|
%e10 = call i64 @llvm.umin.i64(i64 %e9, i64 16384)
|
|
unreachable
|
|
}
|
|
|
|
; Make sure we don't fall into infinte loop optimizing %sel5.
|
|
; The subtle thing is that %sel3 is min/max as well and
|
|
; there is "unexpected" reassociation coming from SCEV Expander
|
|
; during %sel5 rewrite. That results in a new chain of min/max
|
|
; which is matched on the next iteration.
|
|
define i32 @nary_infinite_loop_minmax(i32 %d0, i32 %d1, i32 %d2, i32 %d3) {
|
|
; CHECK-LABEL: @nary_infinite_loop_minmax(
|
|
; CHECK-NEXT: [[CMP0:%.*]] = icmp slt i32 [[D2:%.*]], [[D1:%.*]]
|
|
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 [[D1]], i32 [[D2]]
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[D3:%.*]], [[D0:%.*]]
|
|
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[D0]], i32 [[D3]]
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[SEL1]], [[SEL0]]
|
|
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[SEL1]], i32 [[SEL0]]
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[D3]], [[D0]]
|
|
; CHECK-NEXT: [[SEL3:%.*]] = select i1 [[CMP3]], i32 [[D0]], i32 [[D3]]
|
|
; CHECK-NEXT: [[SEL5_NARY:%.*]] = call i32 @llvm.smax.i32(i32 [[SEL0]], i32 [[SEL3]])
|
|
; CHECK-NEXT: ret i32 [[SEL5_NARY]]
|
|
;
|
|
%cmp0 = icmp slt i32 %d2, %d1
|
|
%sel0 = select i1 %cmp0, i32 %d1, i32 %d2
|
|
|
|
%cmp1 = icmp slt i32 %d3, %d0
|
|
%sel1 = select i1 %cmp1, i32 %d0, i32 %d3
|
|
|
|
%cmp2 = icmp slt i32 %sel1, %sel0
|
|
%sel2 = select i1 %cmp2, i32 %sel1, i32 %sel0
|
|
|
|
%cmp3 = icmp slt i32 %d3, %d0
|
|
%sel3 = select i1 %cmp3, i32 %d0, i32 %d3
|
|
|
|
%cmp4 = icmp slt i32 %sel3, %d2
|
|
%sel4 = select i1 %cmp4, i32 %d2, i32 %sel3
|
|
|
|
%cmp5 = icmp slt i32 %sel4, %d1
|
|
%sel5 = select i1 %cmp5, i32 %d1, i32 %sel4
|
|
ret i32 %sel5
|
|
}
|