Enabled loop interchange support for floating point reductions if it is allowed to reorder floating point operations. Previously when we encouter a floating point PHI node in the outer loop exit block, we bailed out since we could not detect floating point reductions in the early days. Now we remove this limiation since we are able to detect floating point reductions. Reviewed By: #loopoptwg, Meinersbur Differential Revision: https://reviews.llvm.org/D117450
310 lines
14 KiB
LLVM
310 lines
14 KiB
LLVM
; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
|
|
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
|
|
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
|
|
|
|
|
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
target triple = "x86_64-unknown-linux-gnu"
|
|
|
|
; REMARKS: --- !Passed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: Interchanged
|
|
; REMARKS-NEXT: Function: test1
|
|
|
|
define i64 @test1([100 x [100 x i64]]* %Arr) {
|
|
; CHECK-LABEL: @test1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[FOR2_PREHEADER:%.*]]
|
|
; CHECK: for1.header.preheader:
|
|
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
|
|
; CHECK: for1.header:
|
|
; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
|
|
; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]]
|
|
; CHECK: for2.preheader:
|
|
; CHECK-NEXT: br label [[FOR2:%.*]]
|
|
; CHECK: for2:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
|
|
; CHECK-NEXT: [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
|
|
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
|
|
; CHECK: for2.split1:
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
|
|
; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
|
|
; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100
|
|
; CHECK-NEXT: br label [[FOR1_INC]]
|
|
; CHECK: for2.split:
|
|
; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
|
|
; CHECK-NEXT: br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
|
|
; CHECK: for1.inc:
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
|
|
; CHECK-NEXT: [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
|
|
; CHECK-NEXT: br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
|
|
; CHECK: for1.loopexit:
|
|
; CHECK-NEXT: [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
|
|
; CHECK-NEXT: ret i64 [[SUM_INC_LCSSA2]]
|
|
;
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %for1.inc, %entry
|
|
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
|
|
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
|
|
br label %for2
|
|
|
|
for2: ; preds = %for2, %for1.header
|
|
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
|
|
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
|
|
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
|
%lv = load i64, i64* %arrayidx, align 4
|
|
%sum.inc = add i64 %sum.inner, %lv
|
|
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
|
|
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
|
|
br i1 %exit1, label %for1.inc, label %for2
|
|
|
|
for1.inc: ; preds = %for2
|
|
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
|
|
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
|
%exit2 = icmp eq i64 %indvars.iv.next24, 100
|
|
br i1 %exit2, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc
|
|
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
|
|
ret i64 %sum.inc.lcssa2
|
|
}
|
|
|
|
; In this test case, the inner reduction PHI %inner does not involve the outer
|
|
; reduction PHI %sum.outer, do not interchange.
|
|
; REMARKS: --- !Missed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
|
; REMARKS-NEXT: Function: test2
|
|
|
|
define i64 @test2([100 x [100 x i64]]* %Arr) {
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %for1.inc, %entry
|
|
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
|
|
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
|
|
br label %for2
|
|
|
|
for2: ; preds = %for2, %for1.header
|
|
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
|
|
%inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
|
|
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
|
%lv = load i64, i64* %arrayidx, align 4
|
|
%sum.inc = add i64 %inner, %lv
|
|
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
|
|
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
|
|
br i1 %exit1, label %for1.inc, label %for2
|
|
|
|
for1.inc: ; preds = %for2
|
|
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
|
|
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
|
%exit2 = icmp eq i64 %indvars.iv.next24, 100
|
|
br i1 %exit2, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc
|
|
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
|
|
ret i64 %sum.inc.lcssa2
|
|
}
|
|
|
|
; Check that we do not interchange if there is an additional instruction
|
|
; between the outer and inner reduction PHIs.
|
|
; REMARKS: --- !Missed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
|
; REMARKS-NEXT: Function: test3
|
|
|
|
define i64 @test3([100 x [100 x i64]]* %Arr) {
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %for1.inc, %entry
|
|
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
|
|
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
|
|
%so = add i64 %sum.outer, 10
|
|
br label %for2
|
|
|
|
for2: ; preds = %for2, %for1.header
|
|
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
|
|
%sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
|
|
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
|
%lv = load i64, i64* %arrayidx, align 4
|
|
%sum.inc = add i64 %sum.inner, %lv
|
|
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
|
|
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
|
|
br i1 %exit1, label %for1.inc, label %for2
|
|
|
|
for1.inc: ; preds = %for2
|
|
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
|
|
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
|
%exit2 = icmp eq i64 %indvars.iv.next24, 100
|
|
br i1 %exit2, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc
|
|
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
|
|
ret i64 %sum.inc.lcssa2
|
|
}
|
|
|
|
; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
|
|
; REMARKS: --- !Missed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
|
; REMARKS-NEXT: Function: test4
|
|
|
|
define i64 @test4([100 x [100 x i64]]* %Arr, i64* %dst) {
|
|
entry:
|
|
%gep.dst = getelementptr inbounds i64, i64* %dst, i64 42
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %for1.inc, %entry
|
|
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
|
|
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
|
|
br label %for2
|
|
|
|
for2: ; preds = %for2, %for1.header
|
|
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
|
|
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
|
|
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
|
%lv = load i64, i64* %arrayidx, align 4
|
|
%sum.inc = add i64 %sum.inner, %lv
|
|
store i64 %sum.inc, i64* %gep.dst, align 4
|
|
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
|
|
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
|
|
br i1 %exit1, label %for1.inc, label %for2
|
|
|
|
for1.inc: ; preds = %for2
|
|
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
|
|
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
|
%exit2 = icmp eq i64 %indvars.iv.next24, 100
|
|
br i1 %exit2, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc
|
|
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
|
|
ret i64 %sum.inc.lcssa2
|
|
}
|
|
|
|
; Check that we do not interchange or crash if the PHI in the outer loop gets a
|
|
; constant from the inner loop.
|
|
; REMARKS: --- !Missed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
|
; REMARKS-NEXT: Function: test_constant_inner_loop_res
|
|
|
|
define i64 @test_constant_inner_loop_res([100 x [100 x i64]]* %Arr) {
|
|
entry:
|
|
br label %for1.header
|
|
|
|
for1.header: ; preds = %for1.inc, %entry
|
|
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
|
|
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.amend, %for1.inc ]
|
|
br label %for2
|
|
|
|
for2: ; preds = %for2, %for1.header
|
|
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
|
|
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
|
|
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
|
|
%lv = load i64, i64* %arrayidx, align 4
|
|
%sum.inc = add i64 %sum.inner, %lv
|
|
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
|
|
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
|
|
br i1 %exit1, label %for1.inc, label %for2
|
|
|
|
for1.inc: ; preds = %for2
|
|
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
|
|
%const.lcssa = phi i64 [ 0, %for2 ]
|
|
%sum.inc.amend = add i64 %const.lcssa, %sum.inc.lcssa
|
|
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
|
|
%exit2 = icmp eq i64 %indvars.iv.next24, 100
|
|
br i1 %exit2, label %for1.loopexit, label %for1.header
|
|
|
|
for1.loopexit: ; preds = %for1.inc
|
|
%il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
|
|
ret i64 %il.res.lcssa2
|
|
}
|
|
|
|
; Floating point reductions are interchanged if all the fp instructions
|
|
; involved allow reassociation.
|
|
; REMARKS: --- !Passed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: Interchanged
|
|
; REMARKS-NEXT: Function: test5
|
|
|
|
define float @test5([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
|
|
entry:
|
|
br label %outer.header
|
|
|
|
outer.header: ; preds = %outer.inc, %entry
|
|
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
|
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.body3, %outer.header
|
|
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
|
|
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
|
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
|
|
%vA = load float, float* %arrayidx5
|
|
%float.inner.inc = fadd fast float %float.inner, %vA
|
|
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
|
|
%vB = load float, float* %arrayidx6
|
|
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
|
|
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
|
%exitcond = icmp eq i64 %iv.inner.next, 100
|
|
br i1 %exitcond, label %outer.inc, label %for.body3
|
|
|
|
outer.inc: ; preds = %for.body3
|
|
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
|
|
%iv.outer.next = add nsw i64 %iv.outer, 1
|
|
%cmp = icmp eq i64 %iv.outer.next, 100
|
|
br i1 %cmp, label %outer.header, label %for.exit
|
|
|
|
for.exit: ; preds = %outer.inc
|
|
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
|
|
ret float %float.outer.lcssa
|
|
}
|
|
|
|
; Floating point reductions are not interchanged if not all the fp instructions
|
|
; involved allow reassociation.
|
|
; REMARKS: --- !Missed
|
|
; REMARKS-NEXT: Pass: loop-interchange
|
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
|
; REMARKS-NEXT: Function: test6
|
|
|
|
define float @test6([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
|
|
entry:
|
|
br label %outer.header
|
|
|
|
outer.header: ; preds = %outer.inc, %entry
|
|
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
|
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
|
|
br label %for.body3
|
|
|
|
for.body3: ; preds = %for.body3, %outer.header
|
|
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
|
|
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
|
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
|
|
%vA = load float, float* %arrayidx5
|
|
%float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
|
|
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
|
|
%vB = load float, float* %arrayidx6
|
|
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
|
|
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
|
%exitcond = icmp eq i64 %iv.inner.next, 100
|
|
br i1 %exitcond, label %outer.inc, label %for.body3
|
|
|
|
outer.inc: ; preds = %for.body3
|
|
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
|
|
%iv.outer.next = add nsw i64 %iv.outer, 1
|
|
%cmp = icmp eq i64 %iv.outer.next, 100
|
|
br i1 %cmp, label %outer.header, label %for.exit
|
|
|
|
for.exit: ; preds = %outer.inc
|
|
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
|
|
ret float %float.outer.lcssa
|
|
}
|