llvm-project/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
Congzhe Cao 1ef04326ec [LoopInterchange] Support loop interchange with floating point reductions
Enabled loop interchange support for floating point reductions
if it is allowed to reorder floating point operations.

Previously when we encouter a floating point PHI node in the
outer loop exit block, we bailed out since we could not detect
floating point reductions in the early days. Now we remove this
limiation since we are able to detect floating point reductions.

Reviewed By: #loopoptwg, Meinersbur

Differential Revision: https://reviews.llvm.org/D117450
2022-02-06 17:04:47 -05:00

310 lines
14 KiB
LLVM

; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: test1
define i64 @test1([100 x [100 x i64]]* %Arr) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR2_PREHEADER:%.*]]
; CHECK: for1.header.preheader:
; CHECK-NEXT: br label [[FOR1_HEADER:%.*]]
; CHECK: for1.header:
; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]]
; CHECK: for2.preheader:
; CHECK-NEXT: br label [[FOR2:%.*]]
; CHECK: for2:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
; CHECK-NEXT: [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]]
; CHECK: for2.split1:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100
; CHECK-NEXT: br label [[FOR1_INC]]
; CHECK: for2.split:
; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
; CHECK-NEXT: br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
; CHECK: for1.inc:
; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
; CHECK-NEXT: [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
; CHECK-NEXT: br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
; CHECK: for1.loopexit:
; CHECK-NEXT: [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
; CHECK-NEXT: ret i64 [[SUM_INC_LCSSA2]]
;
entry:
br label %for1.header
for1.header: ; preds = %for1.inc, %entry
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
br label %for2
for2: ; preds = %for2, %for1.header
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
%lv = load i64, i64* %arrayidx, align 4
%sum.inc = add i64 %sum.inner, %lv
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
br i1 %exit1, label %for1.inc, label %for2
for1.inc: ; preds = %for2
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%exit2 = icmp eq i64 %indvars.iv.next24, 100
br i1 %exit2, label %for1.loopexit, label %for1.header
for1.loopexit: ; preds = %for1.inc
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
ret i64 %sum.inc.lcssa2
}
; In this test case, the inner reduction PHI %inner does not involve the outer
; reduction PHI %sum.outer, do not interchange.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test2
define i64 @test2([100 x [100 x i64]]* %Arr) {
entry:
br label %for1.header
for1.header: ; preds = %for1.inc, %entry
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
br label %for2
for2: ; preds = %for2, %for1.header
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
%inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
%lv = load i64, i64* %arrayidx, align 4
%sum.inc = add i64 %inner, %lv
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
br i1 %exit1, label %for1.inc, label %for2
for1.inc: ; preds = %for2
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%exit2 = icmp eq i64 %indvars.iv.next24, 100
br i1 %exit2, label %for1.loopexit, label %for1.header
for1.loopexit: ; preds = %for1.inc
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
ret i64 %sum.inc.lcssa2
}
; Check that we do not interchange if there is an additional instruction
; between the outer and inner reduction PHIs.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test3
define i64 @test3([100 x [100 x i64]]* %Arr) {
entry:
br label %for1.header
for1.header: ; preds = %for1.inc, %entry
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
%so = add i64 %sum.outer, 10
br label %for2
for2: ; preds = %for2, %for1.header
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
%sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
%lv = load i64, i64* %arrayidx, align 4
%sum.inc = add i64 %sum.inner, %lv
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
br i1 %exit1, label %for1.inc, label %for2
for1.inc: ; preds = %for2
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%exit2 = icmp eq i64 %indvars.iv.next24, 100
br i1 %exit2, label %for1.loopexit, label %for1.header
for1.loopexit: ; preds = %for1.inc
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
ret i64 %sum.inc.lcssa2
}
; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test4
define i64 @test4([100 x [100 x i64]]* %Arr, i64* %dst) {
entry:
%gep.dst = getelementptr inbounds i64, i64* %dst, i64 42
br label %for1.header
for1.header: ; preds = %for1.inc, %entry
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
br label %for2
for2: ; preds = %for2, %for1.header
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
%lv = load i64, i64* %arrayidx, align 4
%sum.inc = add i64 %sum.inner, %lv
store i64 %sum.inc, i64* %gep.dst, align 4
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
br i1 %exit1, label %for1.inc, label %for2
for1.inc: ; preds = %for2
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%exit2 = icmp eq i64 %indvars.iv.next24, 100
br i1 %exit2, label %for1.loopexit, label %for1.header
for1.loopexit: ; preds = %for1.inc
%sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
ret i64 %sum.inc.lcssa2
}
; Check that we do not interchange or crash if the PHI in the outer loop gets a
; constant from the inner loop.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test_constant_inner_loop_res
define i64 @test_constant_inner_loop_res([100 x [100 x i64]]* %Arr) {
entry:
br label %for1.header
for1.header: ; preds = %for1.inc, %entry
%indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
%sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.amend, %for1.inc ]
br label %for2
for2: ; preds = %for2, %for1.header
%indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
%sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
%arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
%lv = load i64, i64* %arrayidx, align 4
%sum.inc = add i64 %sum.inner, %lv
%indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
%exit1 = icmp eq i64 %indvars.iv.next.3, 100
br i1 %exit1, label %for1.inc, label %for2
for1.inc: ; preds = %for2
%sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
%const.lcssa = phi i64 [ 0, %for2 ]
%sum.inc.amend = add i64 %const.lcssa, %sum.inc.lcssa
%indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
%exit2 = icmp eq i64 %indvars.iv.next24, 100
br i1 %exit2, label %for1.loopexit, label %for1.header
for1.loopexit: ; preds = %for1.inc
%il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
ret i64 %il.res.lcssa2
}
; Floating point reductions are interchanged if all the fp instructions
; involved allow reassociation.
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: test5
define float @test5([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
entry:
br label %outer.header
outer.header: ; preds = %outer.inc, %entry
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
br label %for.body3
for.body3: ; preds = %for.body3, %outer.header
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
%vA = load float, float* %arrayidx5
%float.inner.inc = fadd fast float %float.inner, %vA
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
%vB = load float, float* %arrayidx6
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%exitcond = icmp eq i64 %iv.inner.next, 100
br i1 %exitcond, label %outer.inc, label %for.body3
outer.inc: ; preds = %for.body3
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
%iv.outer.next = add nsw i64 %iv.outer, 1
%cmp = icmp eq i64 %iv.outer.next, 100
br i1 %cmp, label %outer.header, label %for.exit
for.exit: ; preds = %outer.inc
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
ret float %float.outer.lcssa
}
; Floating point reductions are not interchanged if not all the fp instructions
; involved allow reassociation.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test6
define float @test6([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
entry:
br label %outer.header
outer.header: ; preds = %outer.inc, %entry
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
br label %for.body3
for.body3: ; preds = %for.body3, %outer.header
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
%vA = load float, float* %arrayidx5
%float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
%vB = load float, float* %arrayidx6
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%exitcond = icmp eq i64 %iv.inner.next, 100
br i1 %exitcond, label %outer.inc, label %for.body3
outer.inc: ; preds = %for.body3
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
%iv.outer.next = add nsw i64 %iv.outer, 1
%cmp = icmp eq i64 %iv.outer.next, 100
br i1 %cmp, label %outer.header, label %for.exit
for.exit: ; preds = %outer.inc
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
ret float %float.outer.lcssa
}