Artur Pilipenko 889dc1e3a5 Rework loop predication pass
We've found a serious issue with the current implementation of loop predication.
The current implementation relies on SCEV and this turned out to be problematic.
To fix the problem we had to rework the pass substantially. We have had the
reworked implementation in our downstream tree for a while. This is the initial
patch of the series of changes to upstream the new implementation.

For now the transformation is limited to the following case:
  * The loop has a single latch with either ult or slt icmp condition.
  * The step of the IV used in the latch condition is 1.
  * The IV of the latch condition is the same as the post increment IV of the guard condition.
  * The guard condition is ult.

See the review or the LoopPredication.cpp header for the details about the
problem and the new implementation.

Reviewed By: sanjoy, mkazantsev

Differential Revision: https://reviews.llvm.org/D37569

llvm-svn: 313981
2017-09-22 13:13:57 +00:00

217 lines
8.4 KiB
LLVM

; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s
; RUN: opt -S -passes='require<scalar-evolution>,loop(loop-predication)' < %s 2>&1 | FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
define i32 @signed_loop_0_to_n_nested_0_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_inner_index_check
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
br label %outer.loop
outer.loop:
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
inner.loop.preheader:
; CHECK: inner.loop.preheader:
; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length
; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %inner.loop
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
%within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
%array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
%array.j = load i32, i32* %array.j.ptr, align 4
%inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
%j.next = add nsw i32 %j, 1
%inner.continue = icmp slt i32 %j.next, %l
br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
outer.loop.inc:
%outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
%i.next = add nsw i32 %i, 1
%outer.continue = icmp slt i32 %i.next, %n
br i1 %outer.continue, label %outer.loop, label %exit
exit:
%result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
ret i32 %result
}
define i32 @signed_loop_0_to_n_nested_0_to_l_outer_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_outer_index_check
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
; CHECK: outer.loop.preheader:
; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
inner.loop.preheader:
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
%within.bounds = icmp ult i32 %i, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
%array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
%array.i = load i32, i32* %array.i.ptr, align 4
%inner.loop.acc.next = add i32 %inner.loop.acc, %array.i
%j.next = add nsw i32 %j, 1
%inner.continue = icmp slt i32 %j.next, %l
br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
outer.loop.inc:
%outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
%i.next = add nsw i32 %i, 1
%outer.continue = icmp slt i32 %i.next, %n
br i1 %outer.continue, label %outer.loop, label %exit
exit:
%result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
ret i32 %result
}
define i32 @signed_loop_0_to_n_nested_i_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) {
; CHECK-LABEL: @signed_loop_0_to_n_nested_i_to_l_inner_index_check
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
; CHECK: outer.loop.preheader:
; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length
; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length
; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]]
; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
; CHECK: outer.loop:
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
inner.loop.preheader:
; CHECK: inner.loop.preheader:
; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length
; CHECK: br label %inner.loop
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]]
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
%within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
%array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
%array.j = load i32, i32* %array.j.ptr, align 4
%inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
%j.next = add nsw i32 %j, 1
%inner.continue = icmp slt i32 %j.next, %l
br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
outer.loop.inc:
%outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
%i.next = add nsw i32 %i, 1
%outer.continue = icmp slt i32 %i.next, %n
br i1 %outer.continue, label %outer.loop, label %exit
exit:
%result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
ret i32 %result
}
define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) {
; CHECK-LABEL: @cant_expand_guard_check_start
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
br label %outer.loop
outer.loop:
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
%div = udiv i32 %i, %maybezero
br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
inner.loop.preheader:
; CHECK: inner.loop.preheader:
; CHECK: br label %inner.loop
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
; CHECK: %within.bounds = icmp ult i32 %j, %length
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ]
%within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
%array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
%array.j = load i32, i32* %array.j.ptr, align 4
%inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
%j.next = add nsw i32 %j, 1
%inner.continue = icmp slt i32 %j.next, %l
br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
outer.loop.inc:
%outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
%i.next = add nsw i32 %i, 1
%outer.continue = icmp slt i32 %i.next, %n
br i1 %outer.continue, label %outer.loop, label %exit
exit:
%result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
ret i32 %result
}