If we vectorize a e.g. store, we leave around a bunch of getelementptrs for the individual scalar stores which we removed. We can go ahead and delete them as well. This is purely for test output quality and readability. It should have no effect in any sane pipeline. Differential Revision: https://reviews.llvm.org/D122493
1467 lines
78 KiB
LLVM
1467 lines
78 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,CHECK
|
|
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefixes=ALL,STORE
|
|
|
|
; #include <stdint.h>
|
|
;
|
|
; int foo(float *A, int n) {
|
|
; float sum = 0;
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; sum += 7*A[i*4 ] +
|
|
; 7*A[i*4+1] +
|
|
; 7*A[i*4+2] +
|
|
; 7*A[i*4+3];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @add_red(float* %A, i32 %n) {
|
|
; ALL-LABEL: @add_red(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; ALL-NEXT: br i1 [[CMP31]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; ALL: for.body.lr.ph:
|
|
; ALL-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
|
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ALL: for.body:
|
|
; ALL-NEXT: [[I_033:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[SUM_032:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD17:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
|
|
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
|
; ALL-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
|
|
; ALL-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]])
|
|
; ALL-NEXT: [[ADD17]] = fadd fast float [[SUM_032]], [[TMP4]]
|
|
; ALL-NEXT: [[INC]] = add nsw i64 [[I_033]], 1
|
|
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
|
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
|
; ALL: for.cond.for.end_crit_edge:
|
|
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD17]] to i32
|
|
; ALL-NEXT: br label [[FOR_END]]
|
|
; ALL: for.end:
|
|
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp31 = icmp sgt i32 %n, 0
|
|
br i1 %cmp31, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%0 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.033 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
|
|
%mul = shl nsw i64 %i.033, 2
|
|
%arrayidx = getelementptr inbounds float, float* %A, i64 %mul
|
|
%1 = load float, float* %arrayidx, align 4
|
|
%mul2 = fmul float %1, 7.000000e+00
|
|
%add28 = or i64 %mul, 1
|
|
%arrayidx4 = getelementptr inbounds float, float* %A, i64 %add28
|
|
%2 = load float, float* %arrayidx4, align 4
|
|
%mul5 = fmul float %2, 7.000000e+00
|
|
%add6 = fadd fast float %mul2, %mul5
|
|
%add829 = or i64 %mul, 2
|
|
%arrayidx9 = getelementptr inbounds float, float* %A, i64 %add829
|
|
%3 = load float, float* %arrayidx9, align 4
|
|
%mul10 = fmul float %3, 7.000000e+00
|
|
%add11 = fadd fast float %add6, %mul10
|
|
%add1330 = or i64 %mul, 3
|
|
%arrayidx14 = getelementptr inbounds float, float* %A, i64 %add1330
|
|
%4 = load float, float* %arrayidx14, align 4
|
|
%mul15 = fmul float %4, 7.000000e+00
|
|
%add16 = fadd fast float %add11, %mul15
|
|
%add17 = fadd fast float %sum.032, %add16
|
|
%inc = add nsw i64 %i.033, 1
|
|
%exitcond = icmp eq i64 %inc, %0
|
|
br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
|
|
|
|
for.cond.for.end_crit_edge:
|
|
%phitmp = fptosi float %add17 to i32
|
|
br label %for.end
|
|
|
|
for.end:
|
|
%sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; int foo(float * restrict A, float * restrict B, int n) {
|
|
; float sum = 0;
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; sum *= B[0]*A[i*4 ] +
|
|
; B[1]*A[i*4+1] +
|
|
; B[2]*A[i*4+2] +
|
|
; B[3]*A[i*4+3];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @mul_red(float* noalias %A, float* noalias %B, i32 %n) {
|
|
; ALL-LABEL: @mul_red(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[CMP38:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; ALL-NEXT: br i1 [[CMP38]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; ALL: for.body.lr.ph:
|
|
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
|
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
|
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ALL: for.body:
|
|
; ALL-NEXT: [[I_040:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[SUM_039:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[MUL21:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_040]], 2
|
|
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
|
; ALL-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP1]], [[TMP4]]
|
|
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
|
; ALL-NEXT: [[MUL21]] = fmul float [[SUM_039]], [[TMP6]]
|
|
; ALL-NEXT: [[INC]] = add nsw i64 [[I_040]], 1
|
|
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
|
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
|
; ALL: for.cond.for.end_crit_edge:
|
|
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[MUL21]] to i32
|
|
; ALL-NEXT: br label [[FOR_END]]
|
|
; ALL: for.end:
|
|
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp38 = icmp sgt i32 %n, 0
|
|
br i1 %cmp38, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%0 = load float, float* %B, align 4
|
|
%arrayidx4 = getelementptr inbounds float, float* %B, i64 1
|
|
%1 = load float, float* %arrayidx4, align 4
|
|
%arrayidx9 = getelementptr inbounds float, float* %B, i64 2
|
|
%2 = load float, float* %arrayidx9, align 4
|
|
%arrayidx15 = getelementptr inbounds float, float* %B, i64 3
|
|
%3 = load float, float* %arrayidx15, align 4
|
|
%4 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.040 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
|
|
%mul = shl nsw i64 %i.040, 2
|
|
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
|
|
%5 = load float, float* %arrayidx2, align 4
|
|
%mul3 = fmul float %0, %5
|
|
%add35 = or i64 %mul, 1
|
|
%arrayidx6 = getelementptr inbounds float, float* %A, i64 %add35
|
|
%6 = load float, float* %arrayidx6, align 4
|
|
%mul7 = fmul float %1, %6
|
|
%add8 = fadd fast float %mul3, %mul7
|
|
%add1136 = or i64 %mul, 2
|
|
%arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1136
|
|
%7 = load float, float* %arrayidx12, align 4
|
|
%mul13 = fmul float %2, %7
|
|
%add14 = fadd fast float %add8, %mul13
|
|
%add1737 = or i64 %mul, 3
|
|
%arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1737
|
|
%8 = load float, float* %arrayidx18, align 4
|
|
%mul19 = fmul float %3, %8
|
|
%add20 = fadd fast float %add14, %mul19
|
|
%mul21 = fmul float %sum.039, %add20
|
|
%inc = add nsw i64 %i.040, 1
|
|
%exitcond = icmp eq i64 %inc, %4
|
|
br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
|
|
|
|
for.cond.for.end_crit_edge:
|
|
%phitmp = fptosi float %mul21 to i32
|
|
br label %for.end
|
|
|
|
for.end:
|
|
%sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; int foo(float * restrict A, float * restrict B, int n) {
|
|
; float sum = 0;
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; sum += B[0]*A[i*6 ] +
|
|
; B[1]*A[i*6+1] +
|
|
; B[2]*A[i*6+2] +
|
|
; B[3]*A[i*6+3] +
|
|
; B[4]*A[i*6+4] +
|
|
; B[5]*A[i*6+5] +
|
|
; B[6]*A[i*6+6] +
|
|
; B[7]*A[i*6+7] +
|
|
; B[8]*A[i*6+8];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
|
|
; ALL-LABEL: @long_red(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[CMP81:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; ALL-NEXT: br i1 [[CMP81]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; ALL: for.body.lr.ph:
|
|
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <8 x float>*
|
|
; ALL-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4
|
|
; ALL-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[B]], i64 8
|
|
; ALL-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX45]], align 4
|
|
; ALL-NEXT: [[TMP3:%.*]] = sext i32 [[N]] to i64
|
|
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ALL: for.body:
|
|
; ALL-NEXT: [[I_083:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[SUM_082:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[ADD51:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[MUL:%.*]] = mul nsw i64 [[I_083]], 6
|
|
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; ALL-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX2]] to <8 x float>*
|
|
; ALL-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4
|
|
; ALL-NEXT: [[TMP6:%.*]] = fmul fast <8 x float> [[TMP1]], [[TMP5]]
|
|
; ALL-NEXT: [[ADD47:%.*]] = add nsw i64 [[MUL]], 8
|
|
; ALL-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD47]]
|
|
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX48]], align 4
|
|
; ALL-NEXT: [[MUL49:%.*]] = fmul fast float [[TMP2]], [[TMP7]]
|
|
; ALL-NEXT: [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP6]])
|
|
; ALL-NEXT: [[TMP9:%.*]] = fadd fast float [[TMP8]], [[MUL49]]
|
|
; ALL-NEXT: [[ADD51]] = fadd fast float [[SUM_082]], [[TMP9]]
|
|
; ALL-NEXT: [[INC]] = add nsw i64 [[I_083]], 1
|
|
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP3]]
|
|
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
|
; ALL: for.cond.for.end_crit_edge:
|
|
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[ADD51]] to i32
|
|
; ALL-NEXT: br label [[FOR_END]]
|
|
; ALL: for.end:
|
|
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp81 = icmp sgt i32 %n, 0
|
|
br i1 %cmp81, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%0 = load float, float* %B, align 4
|
|
%arrayidx4 = getelementptr inbounds float, float* %B, i64 1
|
|
%1 = load float, float* %arrayidx4, align 4
|
|
%arrayidx9 = getelementptr inbounds float, float* %B, i64 2
|
|
%2 = load float, float* %arrayidx9, align 4
|
|
%arrayidx15 = getelementptr inbounds float, float* %B, i64 3
|
|
%3 = load float, float* %arrayidx15, align 4
|
|
%arrayidx21 = getelementptr inbounds float, float* %B, i64 4
|
|
%4 = load float, float* %arrayidx21, align 4
|
|
%arrayidx27 = getelementptr inbounds float, float* %B, i64 5
|
|
%5 = load float, float* %arrayidx27, align 4
|
|
%arrayidx33 = getelementptr inbounds float, float* %B, i64 6
|
|
%6 = load float, float* %arrayidx33, align 4
|
|
%arrayidx39 = getelementptr inbounds float, float* %B, i64 7
|
|
%7 = load float, float* %arrayidx39, align 4
|
|
%arrayidx45 = getelementptr inbounds float, float* %B, i64 8
|
|
%8 = load float, float* %arrayidx45, align 4
|
|
%9 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.083 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
|
|
%mul = mul nsw i64 %i.083, 6
|
|
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
|
|
%10 = load float, float* %arrayidx2, align 4
|
|
%mul3 = fmul fast float %0, %10
|
|
%add80 = or i64 %mul, 1
|
|
%arrayidx6 = getelementptr inbounds float, float* %A, i64 %add80
|
|
%11 = load float, float* %arrayidx6, align 4
|
|
%mul7 = fmul fast float %1, %11
|
|
%add8 = fadd fast float %mul3, %mul7
|
|
%add11 = add nsw i64 %mul, 2
|
|
%arrayidx12 = getelementptr inbounds float, float* %A, i64 %add11
|
|
%12 = load float, float* %arrayidx12, align 4
|
|
%mul13 = fmul fast float %2, %12
|
|
%add14 = fadd fast float %add8, %mul13
|
|
%add17 = add nsw i64 %mul, 3
|
|
%arrayidx18 = getelementptr inbounds float, float* %A, i64 %add17
|
|
%13 = load float, float* %arrayidx18, align 4
|
|
%mul19 = fmul fast float %3, %13
|
|
%add20 = fadd fast float %add14, %mul19
|
|
%add23 = add nsw i64 %mul, 4
|
|
%arrayidx24 = getelementptr inbounds float, float* %A, i64 %add23
|
|
%14 = load float, float* %arrayidx24, align 4
|
|
%mul25 = fmul fast float %4, %14
|
|
%add26 = fadd fast float %add20, %mul25
|
|
%add29 = add nsw i64 %mul, 5
|
|
%arrayidx30 = getelementptr inbounds float, float* %A, i64 %add29
|
|
%15 = load float, float* %arrayidx30, align 4
|
|
%mul31 = fmul fast float %5, %15
|
|
%add32 = fadd fast float %add26, %mul31
|
|
%add35 = add nsw i64 %mul, 6
|
|
%arrayidx36 = getelementptr inbounds float, float* %A, i64 %add35
|
|
%16 = load float, float* %arrayidx36, align 4
|
|
%mul37 = fmul fast float %6, %16
|
|
%add38 = fadd fast float %add32, %mul37
|
|
%add41 = add nsw i64 %mul, 7
|
|
%arrayidx42 = getelementptr inbounds float, float* %A, i64 %add41
|
|
%17 = load float, float* %arrayidx42, align 4
|
|
%mul43 = fmul fast float %7, %17
|
|
%add44 = fadd fast float %add38, %mul43
|
|
%add47 = add nsw i64 %mul, 8
|
|
%arrayidx48 = getelementptr inbounds float, float* %A, i64 %add47
|
|
%18 = load float, float* %arrayidx48, align 4
|
|
%mul49 = fmul fast float %8, %18
|
|
%add50 = fadd fast float %add44, %mul49
|
|
%add51 = fadd fast float %sum.082, %add50
|
|
%inc = add nsw i64 %i.083, 1
|
|
%exitcond = icmp eq i64 %inc, %9
|
|
br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
|
|
|
|
for.cond.for.end_crit_edge:
|
|
%phitmp = fptosi float %add51 to i32
|
|
br label %for.end
|
|
|
|
for.end:
|
|
%sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; int foo(float * restrict A, float * restrict B, int n) {
|
|
; float sum = 0;
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; sum += B[0]*A[i*4 ];
|
|
; sum += B[1]*A[i*4+1];
|
|
; sum += B[2]*A[i*4+2];
|
|
; sum += B[3]*A[i*4+3];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
|
|
; ALL-LABEL: @chain_red(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; ALL-NEXT: br i1 [[CMP41]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; ALL: for.body.lr.ph:
|
|
; ALL-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
|
|
; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
|
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ALL: for.body:
|
|
; ALL-NEXT: [[I_043:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[SUM_042:%.*]] = phi float [ 0.000000e+00, [[FOR_BODY_LR_PH]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ]
|
|
; ALL-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_043]], 2
|
|
; ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; ALL-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
|
; ALL-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP4]]
|
|
; ALL-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
|
; ALL-NEXT: [[OP_EXTRA]] = fadd fast float [[TMP6]], [[SUM_042]]
|
|
; ALL-NEXT: [[INC]] = add nsw i64 [[I_043]], 1
|
|
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
|
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
|
; ALL: for.cond.for.end_crit_edge:
|
|
; ALL-NEXT: [[PHITMP:%.*]] = fptosi float [[OP_EXTRA]] to i32
|
|
; ALL-NEXT: br label [[FOR_END]]
|
|
; ALL: for.end:
|
|
; ALL-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp41 = icmp sgt i32 %n, 0
|
|
br i1 %cmp41, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%0 = load float, float* %B, align 4
|
|
%arrayidx4 = getelementptr inbounds float, float* %B, i64 1
|
|
%1 = load float, float* %arrayidx4, align 4
|
|
%arrayidx10 = getelementptr inbounds float, float* %B, i64 2
|
|
%2 = load float, float* %arrayidx10, align 4
|
|
%arrayidx16 = getelementptr inbounds float, float* %B, i64 3
|
|
%3 = load float, float* %arrayidx16, align 4
|
|
%4 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.043 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
|
|
%mul = shl nsw i64 %i.043, 2
|
|
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
|
|
%5 = load float, float* %arrayidx2, align 4
|
|
%mul3 = fmul fast float %0, %5
|
|
%add = fadd fast float %sum.042, %mul3
|
|
%add638 = or i64 %mul, 1
|
|
%arrayidx7 = getelementptr inbounds float, float* %A, i64 %add638
|
|
%6 = load float, float* %arrayidx7, align 4
|
|
%mul8 = fmul fast float %1, %6
|
|
%add9 = fadd fast float %add, %mul8
|
|
%add1239 = or i64 %mul, 2
|
|
%arrayidx13 = getelementptr inbounds float, float* %A, i64 %add1239
|
|
%7 = load float, float* %arrayidx13, align 4
|
|
%mul14 = fmul fast float %2, %7
|
|
%add15 = fadd fast float %add9, %mul14
|
|
%add1840 = or i64 %mul, 3
|
|
%arrayidx19 = getelementptr inbounds float, float* %A, i64 %add1840
|
|
%8 = load float, float* %arrayidx19, align 4
|
|
%mul20 = fmul fast float %3, %8
|
|
%add21 = fadd fast float %add15, %mul20
|
|
%inc = add nsw i64 %i.043, 1
|
|
%exitcond = icmp eq i64 %inc, %4
|
|
br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
|
|
|
|
for.cond.for.end_crit_edge:
|
|
%phitmp = fptosi float %add21 to i32
|
|
br label %for.end
|
|
|
|
for.end:
|
|
%sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; void foo(const float *arg_A, unsigned arg_B, float *array) {
|
|
; for (uint32_t i = 0; i < 6; ++i) {
|
|
; const float *ptr = arg_A + i;
|
|
; float w0 = array[i * 4 + 0];
|
|
; float w1 = array[i * 4 + 1];
|
|
; float w2 = array[i * 4 + 2];
|
|
; float w3 = array[i * 4 + 3];
|
|
;
|
|
; for (unsigned j = 0; j < arg_B; ++j) {
|
|
; const float x1 = *ptr - (-1.1f * w0) - (1.2f * w1);
|
|
; const float x2 = (2.1f * x1) + (-2.2f * w0) + (2.3f * w1);
|
|
; const float x3 = x2 - (-3.1f * w2) - (3.2f * w3);
|
|
; const float x4 = x3 + (-4.0f * w2) + w3;
|
|
; w1 = w0;
|
|
; w0 = x1;
|
|
; w3 = w2;
|
|
; w2 = x3;
|
|
; }
|
|
;
|
|
; array[i * 4 + 0] = w0;
|
|
; array[i * 4 + 1] = w1;
|
|
; array[i * 4 + 2] = w2;
|
|
; array[i * 4 + 3] = w3;
|
|
; }
|
|
; }
|
|
|
|
define void @foo(float* nocapture readonly %arg_A, i32 %arg_B, float* nocapture %array) {
|
|
; ALL-LABEL: @foo(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[CMP1495:%.*]] = icmp eq i32 [[ARG_B:%.*]], 0
|
|
; ALL-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ALL: for.cond.cleanup:
|
|
; ALL-NEXT: ret void
|
|
; ALL: for.body:
|
|
; ALL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND_CLEANUP15:%.*]] ]
|
|
; ALL-NEXT: [[TMP0:%.*]] = shl i64 [[INDVARS_IV]], 2
|
|
; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[ARRAY:%.*]], i64 [[TMP0]]
|
|
; ALL-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX]], align 4
|
|
; ALL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
|
|
; ALL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP2]]
|
|
; ALL-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
|
; ALL-NEXT: [[TMP4:%.*]] = or i64 [[TMP0]], 2
|
|
; ALL-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP4]]
|
|
; ALL-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX8]], align 4
|
|
; ALL-NEXT: [[TMP6:%.*]] = or i64 [[TMP0]], 3
|
|
; ALL-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[ARRAY]], i64 [[TMP6]]
|
|
; ALL-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX12]], align 4
|
|
; ALL-NEXT: br i1 [[CMP1495]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16_LR_PH:%.*]]
|
|
; ALL: for.body16.lr.ph:
|
|
; ALL-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[ARG_A:%.*]], i64 [[INDVARS_IV]]
|
|
; ALL-NEXT: [[TMP8:%.*]] = load float, float* [[ADD_PTR]], align 4
|
|
; ALL-NEXT: br label [[FOR_BODY16:%.*]]
|
|
; ALL: for.cond.cleanup15:
|
|
; ALL-NEXT: [[W2_0_LCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ], [ [[SUB28:%.*]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W3_0_LCSSA:%.*]] = phi float [ [[TMP7]], [[FOR_BODY]] ], [ [[W2_096:%.*]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W1_0_LCSSA:%.*]] = phi float [ [[TMP3]], [[FOR_BODY]] ], [ [[W0_0100:%.*]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W0_0_LCSSA:%.*]] = phi float [ [[TMP1]], [[FOR_BODY]] ], [ [[SUB19:%.*]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: store float [[W0_0_LCSSA]], float* [[ARRAYIDX]], align 4
|
|
; ALL-NEXT: store float [[W1_0_LCSSA]], float* [[ARRAYIDX4]], align 4
|
|
; ALL-NEXT: store float [[W2_0_LCSSA]], float* [[ARRAYIDX8]], align 4
|
|
; ALL-NEXT: store float [[W3_0_LCSSA]], float* [[ARRAYIDX12]], align 4
|
|
; ALL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; ALL-NEXT: [[EXITCOND109:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 6
|
|
; ALL-NEXT: br i1 [[EXITCOND109]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
|
; ALL: for.body16:
|
|
; ALL-NEXT: [[W0_0100]] = phi float [ [[TMP1]], [[FOR_BODY16_LR_PH]] ], [ [[SUB19]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W1_099:%.*]] = phi float [ [[TMP3]], [[FOR_BODY16_LR_PH]] ], [ [[W0_0100]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[J_098:%.*]] = phi i32 [ 0, [[FOR_BODY16_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W3_097:%.*]] = phi float [ [[TMP7]], [[FOR_BODY16_LR_PH]] ], [ [[W2_096]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[W2_096]] = phi float [ [[TMP5]], [[FOR_BODY16_LR_PH]] ], [ [[SUB28]], [[FOR_BODY16]] ]
|
|
; ALL-NEXT: [[MUL17:%.*]] = fmul fast float [[W0_0100]], 0x3FF19999A0000000
|
|
; ALL-NEXT: [[MUL18_NEG:%.*]] = fmul fast float [[W1_099]], 0xBFF3333340000000
|
|
; ALL-NEXT: [[SUB92:%.*]] = fadd fast float [[MUL17]], [[MUL18_NEG]]
|
|
; ALL-NEXT: [[SUB19]] = fadd fast float [[SUB92]], [[TMP8]]
|
|
; ALL-NEXT: [[MUL20:%.*]] = fmul fast float [[SUB19]], 0x4000CCCCC0000000
|
|
; ALL-NEXT: [[MUL21_NEG:%.*]] = fmul fast float [[W0_0100]], 0xC0019999A0000000
|
|
; ALL-NEXT: [[MUL23:%.*]] = fmul fast float [[W1_099]], 0x4002666660000000
|
|
; ALL-NEXT: [[MUL25:%.*]] = fmul fast float [[W2_096]], 0x4008CCCCC0000000
|
|
; ALL-NEXT: [[MUL27_NEG:%.*]] = fmul fast float [[W3_097]], 0xC0099999A0000000
|
|
; ALL-NEXT: [[ADD2293:%.*]] = fadd fast float [[MUL27_NEG]], [[MUL25]]
|
|
; ALL-NEXT: [[ADD24:%.*]] = fadd fast float [[ADD2293]], [[MUL23]]
|
|
; ALL-NEXT: [[SUB2694:%.*]] = fadd fast float [[ADD24]], [[MUL21_NEG]]
|
|
; ALL-NEXT: [[SUB28]] = fadd fast float [[SUB2694]], [[MUL20]]
|
|
; ALL-NEXT: [[INC]] = add nuw i32 [[J_098]], 1
|
|
; ALL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[ARG_B]]
|
|
; ALL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP15]], label [[FOR_BODY16]]
|
|
;
|
|
entry:
|
|
%cmp1495 = icmp eq i32 %arg_B, 0
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.cond.cleanup15
|
|
ret void
|
|
|
|
for.body: ; preds = %for.cond.cleanup15, %entry
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond.cleanup15 ]
|
|
%0 = shl i64 %indvars.iv, 2
|
|
%arrayidx = getelementptr inbounds float, float* %array, i64 %0
|
|
%1 = load float, float* %arrayidx, align 4
|
|
%2 = or i64 %0, 1
|
|
%arrayidx4 = getelementptr inbounds float, float* %array, i64 %2
|
|
%3 = load float, float* %arrayidx4, align 4
|
|
%4 = or i64 %0, 2
|
|
%arrayidx8 = getelementptr inbounds float, float* %array, i64 %4
|
|
%5 = load float, float* %arrayidx8, align 4
|
|
%6 = or i64 %0, 3
|
|
%arrayidx12 = getelementptr inbounds float, float* %array, i64 %6
|
|
%7 = load float, float* %arrayidx12, align 4
|
|
br i1 %cmp1495, label %for.cond.cleanup15, label %for.body16.lr.ph
|
|
|
|
for.body16.lr.ph: ; preds = %for.body
|
|
%add.ptr = getelementptr inbounds float, float* %arg_A, i64 %indvars.iv
|
|
%8 = load float, float* %add.ptr, align 4
|
|
br label %for.body16
|
|
|
|
for.cond.cleanup15: ; preds = %for.body16, %for.body
|
|
%w2.0.lcssa = phi float [ %5, %for.body ], [ %sub28, %for.body16 ]
|
|
%w3.0.lcssa = phi float [ %7, %for.body ], [ %w2.096, %for.body16 ]
|
|
%w1.0.lcssa = phi float [ %3, %for.body ], [ %w0.0100, %for.body16 ]
|
|
%w0.0.lcssa = phi float [ %1, %for.body ], [ %sub19, %for.body16 ]
|
|
store float %w0.0.lcssa, float* %arrayidx, align 4
|
|
store float %w1.0.lcssa, float* %arrayidx4, align 4
|
|
store float %w2.0.lcssa, float* %arrayidx8, align 4
|
|
store float %w3.0.lcssa, float* %arrayidx12, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond109 = icmp eq i64 %indvars.iv.next, 6
|
|
br i1 %exitcond109, label %for.cond.cleanup, label %for.body
|
|
|
|
for.body16: ; preds = %for.body16, %for.body16.lr.ph
|
|
%w0.0100 = phi float [ %1, %for.body16.lr.ph ], [ %sub19, %for.body16 ]
|
|
%w1.099 = phi float [ %3, %for.body16.lr.ph ], [ %w0.0100, %for.body16 ]
|
|
%j.098 = phi i32 [ 0, %for.body16.lr.ph ], [ %inc, %for.body16 ]
|
|
%w3.097 = phi float [ %7, %for.body16.lr.ph ], [ %w2.096, %for.body16 ]
|
|
%w2.096 = phi float [ %5, %for.body16.lr.ph ], [ %sub28, %for.body16 ]
|
|
%mul17 = fmul fast float %w0.0100, 0x3FF19999A0000000
|
|
%mul18.neg = fmul fast float %w1.099, 0xBFF3333340000000
|
|
%sub92 = fadd fast float %mul17, %mul18.neg
|
|
%sub19 = fadd fast float %sub92, %8
|
|
%mul20 = fmul fast float %sub19, 0x4000CCCCC0000000
|
|
%mul21.neg = fmul fast float %w0.0100, 0xC0019999A0000000
|
|
%mul23 = fmul fast float %w1.099, 0x4002666660000000
|
|
%mul25 = fmul fast float %w2.096, 0x4008CCCCC0000000
|
|
%mul27.neg = fmul fast float %w3.097, 0xC0099999A0000000
|
|
%add2293 = fadd fast float %mul27.neg, %mul25
|
|
%add24 = fadd fast float %add2293, %mul23
|
|
%sub2694 = fadd fast float %add24, %mul21.neg
|
|
%sub28 = fadd fast float %sub2694, %mul20
|
|
%inc = add nuw i32 %j.098, 1
|
|
%exitcond = icmp eq i32 %inc, %arg_B
|
|
br i1 %exitcond, label %for.cond.cleanup15, label %for.body16
|
|
}
|
|
|
|
|
|
; void foo(double * restrict A, double * restrict B, double * restrict C,
|
|
; int n) {
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; C[i] = B[0] *A[i*4 ] + B[1] *A[i*4+1];
|
|
; }
|
|
; }
|
|
|
|
define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
|
|
; CHECK-LABEL: @store_red_double(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[B:%.*]], align 8
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX4]], align 8
|
|
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load double, double* [[ARRAYIDX2]], align 8
|
|
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[TMP0]], [[TMP3]]
|
|
; CHECK-NEXT: [[ADD16:%.*]] = or i64 [[MUL]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[ADD16]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX6]], align 8
|
|
; CHECK-NEXT: [[MUL7:%.*]] = fmul fast double [[TMP1]], [[TMP4]]
|
|
; CHECK-NEXT: [[ADD8:%.*]] = fadd fast double [[MUL3]], [[MUL7]]
|
|
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]]
|
|
; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_018]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @store_red_double(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; STORE-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; STORE: for.body.lr.ph:
|
|
; STORE-NEXT: [[TMP0:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
|
|
; STORE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
|
|
; STORE-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64
|
|
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
|
; STORE: for.body:
|
|
; STORE-NEXT: [[I_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_018]], 2
|
|
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[MUL]]
|
|
; STORE-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*
|
|
; STORE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8
|
|
; STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP4]]
|
|
; STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
|
|
; STORE-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
|
|
; STORE-NEXT: [[ADD8:%.*]] = fadd fast double [[TMP6]], [[TMP7]]
|
|
; STORE-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 [[I_018]]
|
|
; STORE-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8
|
|
; STORE-NEXT: [[INC]] = add nsw i64 [[I_018]], 1
|
|
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP2]]
|
|
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
|
; STORE: for.end:
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%cmp17 = icmp sgt i32 %n, 0
|
|
br i1 %cmp17, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%0 = load double, double* %B, align 8
|
|
%arrayidx4 = getelementptr inbounds double, double* %B, i64 1
|
|
%1 = load double, double* %arrayidx4, align 8
|
|
%2 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%mul = shl nsw i64 %i.018, 2
|
|
%arrayidx2 = getelementptr inbounds double, double* %A, i64 %mul
|
|
%3 = load double, double* %arrayidx2, align 8
|
|
%mul3 = fmul fast double %0, %3
|
|
%add16 = or i64 %mul, 1
|
|
%arrayidx6 = getelementptr inbounds double, double* %A, i64 %add16
|
|
%4 = load double, double* %arrayidx6, align 8
|
|
%mul7 = fmul fast double %1, %4
|
|
%add8 = fadd fast double %mul3, %mul7
|
|
%arrayidx9 = getelementptr inbounds double, double* %C, i64 %i.018
|
|
store double %add8, double* %arrayidx9, align 8
|
|
%inc = add nsw i64 %i.018, 1
|
|
%exitcond = icmp eq i64 %inc, %2
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
; int foo(float * restrict A, float * restrict B, float * restrict C, int n) {
|
|
; float sum = 0;
|
|
; for (intptr_t i=0; i < n; ++i) {
|
|
; C[i] = B[0] *A[i*4 ] +
|
|
; B[1] *A[i*4+1] +
|
|
; B[2] *A[i*4+2] +
|
|
; B[3] *A[i*4+3];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
|
|
; CHECK-LABEL: @store_red(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body.lr.ph:
|
|
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 1
|
|
; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
|
|
; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[B]], align 4
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
|
|
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX4]], align 4
|
|
; CHECK-NEXT: [[ADD34:%.*]] = or i64 [[MUL]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD34]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX6]], align 4
|
|
; CHECK-NEXT: [[MUL7:%.*]] = fmul fast float [[TMP3]], [[TMP4]]
|
|
; CHECK-NEXT: [[ADD8:%.*]] = fadd fast float [[MUL3]], [[MUL7]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX9]], align 4
|
|
; CHECK-NEXT: [[ADD1135:%.*]] = or i64 [[MUL]], 2
|
|
; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1135]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX12]], align 4
|
|
; CHECK-NEXT: [[MUL13:%.*]] = fmul fast float [[TMP5]], [[TMP6]]
|
|
; CHECK-NEXT: [[ADD14:%.*]] = fadd fast float [[ADD8]], [[MUL13]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[ARRAYIDX15]], align 4
|
|
; CHECK-NEXT: [[ADD1736:%.*]] = or i64 [[MUL]], 3
|
|
; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1736]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX18]], align 4
|
|
; CHECK-NEXT: [[MUL19:%.*]] = fmul fast float [[TMP7]], [[TMP8]]
|
|
; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[ADD14]], [[MUL19]]
|
|
; CHECK-NEXT: store float [[ADD20]], float* [[C_ADDR_038]], align 4
|
|
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1
|
|
; CHECK-NEXT: [[INC]] = add nsw i64 [[I_039]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
; STORE-LABEL: @store_red(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[CMP37:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; STORE-NEXT: br i1 [[CMP37]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
|
|
; STORE: for.body.lr.ph:
|
|
; STORE-NEXT: [[TMP0:%.*]] = sext i32 [[N]] to i64
|
|
; STORE-NEXT: br label [[FOR_BODY:%.*]]
|
|
; STORE: for.body:
|
|
; STORE-NEXT: [[I_039:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
|
|
; STORE-NEXT: [[C_ADDR_038:%.*]] = phi float* [ [[C:%.*]], [[FOR_BODY_LR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
|
|
; STORE-NEXT: [[MUL:%.*]] = shl nsw i64 [[I_039]], 2
|
|
; STORE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[MUL]]
|
|
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
|
|
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
|
; STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[ARRAYIDX2]] to <4 x float>*
|
|
; STORE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
|
|
; STORE-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP4]]
|
|
; STORE-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
|
|
; STORE-NEXT: store float [[TMP6]], float* [[C_ADDR_038]], align 4
|
|
; STORE-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[C_ADDR_038]], i64 1
|
|
; STORE-NEXT: [[INC]] = add nsw i64 [[I_039]], 1
|
|
; STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
|
|
; STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]]
|
|
; STORE: for.end:
|
|
; STORE-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%cmp37 = icmp sgt i32 %n, 0
|
|
br i1 %cmp37, label %for.body.lr.ph, label %for.end
|
|
|
|
for.body.lr.ph:
|
|
%arrayidx4 = getelementptr inbounds float, float* %B, i64 1
|
|
%arrayidx9 = getelementptr inbounds float, float* %B, i64 2
|
|
%arrayidx15 = getelementptr inbounds float, float* %B, i64 3
|
|
%0 = sext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
|
|
%C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
|
|
%1 = load float, float* %B, align 4
|
|
%mul = shl nsw i64 %i.039, 2
|
|
%arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
|
|
%2 = load float, float* %arrayidx2, align 4
|
|
%mul3 = fmul fast float %1, %2
|
|
%3 = load float, float* %arrayidx4, align 4
|
|
%add34 = or i64 %mul, 1
|
|
%arrayidx6 = getelementptr inbounds float, float* %A, i64 %add34
|
|
%4 = load float, float* %arrayidx6, align 4
|
|
%mul7 = fmul fast float %3, %4
|
|
%add8 = fadd fast float %mul3, %mul7
|
|
%5 = load float, float* %arrayidx9, align 4
|
|
%add1135 = or i64 %mul, 2
|
|
%arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1135
|
|
%6 = load float, float* %arrayidx12, align 4
|
|
%mul13 = fmul fast float %5, %6
|
|
%add14 = fadd fast float %add8, %mul13
|
|
%7 = load float, float* %arrayidx15, align 4
|
|
%add1736 = or i64 %mul, 3
|
|
%arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1736
|
|
%8 = load float, float* %arrayidx18, align 4
|
|
%mul19 = fmul fast float %7, %8
|
|
%add20 = fadd fast float %add14, %mul19
|
|
store float %add20, float* %C.addr.038, align 4
|
|
%incdec.ptr = getelementptr inbounds float, float* %C.addr.038, i64 1
|
|
%inc = add nsw i64 %i.039, 1
|
|
%exitcond = icmp eq i64 %inc, %0
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end:
|
|
ret i32 0
|
|
}
|
|
|
|
@arr_i32 = global [32 x i32] zeroinitializer, align 16
|
|
@arr_float = global [32 x float] zeroinitializer, align 16
|
|
|
|
define void @float_red_example4(float* %res) {
|
|
; CHECK-LABEL: @float_red_example4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: store float [[ADD_2]], float* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @float_red_example4(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([32 x float]* @arr_float to <4 x float>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP0]])
|
|
; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
%1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
%add = fadd fast float %1, %0
|
|
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
%add.1 = fadd fast float %2, %add
|
|
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
%add.2 = fadd fast float %3, %add.1
|
|
store float %add.2, float* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @float_red_example8(float* %res) {
|
|
; CHECK-LABEL: @float_red_example8(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4
|
|
; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8
|
|
; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4
|
|
; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]]
|
|
; CHECK-NEXT: store float [[ADD_6]], float* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @float_red_example8(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr_float to <8 x float>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[TMP0]])
|
|
; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
%1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
%add = fadd fast float %1, %0
|
|
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
%add.1 = fadd fast float %2, %add
|
|
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
%add.2 = fadd fast float %3, %add.1
|
|
%4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16
|
|
%add.3 = fadd fast float %4, %add.2
|
|
%5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4
|
|
%add.4 = fadd fast float %5, %add.3
|
|
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8
|
|
%add.5 = fadd fast float %6, %add.4
|
|
%7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4
|
|
%add.6 = fadd fast float %7, %add.5
|
|
store float %add.6, float* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @float_red_example16(float* %res) {
|
|
; CHECK-LABEL: @float_red_example16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP4]], [[ADD_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4
|
|
; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float [[TMP5]], [[ADD_3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8
|
|
; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float [[TMP6]], [[ADD_4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4
|
|
; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float [[TMP7]], [[ADD_5]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float [[TMP8]], [[ADD_6]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4
|
|
; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float [[TMP9]], [[ADD_7]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8
|
|
; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float [[TMP10]], [[ADD_8]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4
|
|
; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float [[TMP11]], [[ADD_9]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float [[TMP12]], [[ADD_10]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4
|
|
; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float [[TMP13]], [[ADD_11]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8
|
|
; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float [[TMP14]], [[ADD_12]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4
|
|
; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float [[TMP15]], [[ADD_13]]
|
|
; CHECK-NEXT: store float [[ADD_14]], float* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @float_red_example16(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr_float to <16 x float>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> [[TMP0]])
|
|
; STORE-NEXT: store float [[TMP1]], float* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 0), align 16
|
|
%1 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 1), align 4
|
|
%add = fadd fast float %1, %0
|
|
%2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 2), align 8
|
|
%add.1 = fadd fast float %2, %add
|
|
%3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 3), align 4
|
|
%add.2 = fadd fast float %3, %add.1
|
|
%4 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 4), align 16
|
|
%add.3 = fadd fast float %4, %add.2
|
|
%5 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 5), align 4
|
|
%add.4 = fadd fast float %5, %add.3
|
|
%6 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 6), align 8
|
|
%add.5 = fadd fast float %6, %add.4
|
|
%7 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 7), align 4
|
|
%add.6 = fadd fast float %7, %add.5
|
|
%8 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 8), align 16
|
|
%add.7 = fadd fast float %8, %add.6
|
|
%9 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 9), align 4
|
|
%add.8 = fadd fast float %9, %add.7
|
|
%10 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 10), align 8
|
|
%add.9 = fadd fast float %10, %add.8
|
|
%11 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 11), align 4
|
|
%add.10 = fadd fast float %11, %add.9
|
|
%12 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 12), align 16
|
|
%add.11 = fadd fast float %12, %add.10
|
|
%13 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 13), align 4
|
|
%add.12 = fadd fast float %13, %add.11
|
|
%14 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 14), align 8
|
|
%add.13 = fadd fast float %14, %add.12
|
|
%15 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr_float, i64 0, i64 15), align 4
|
|
%add.14 = fadd fast float %15, %add.13
|
|
store float %add.14, float* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @i32_red_example4(i32* %res) {
|
|
; CHECK-LABEL: @i32_red_example4(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: store i32 [[ADD_2]], i32* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @i32_red_example4(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr_i32 to <4 x i32>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
|
|
; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
store i32 %add.2, i32* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @i32_red_example8(i32* %res) {
|
|
; CHECK-LABEL: @i32_red_example8(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]
|
|
; CHECK-NEXT: store i32 [[ADD_6]], i32* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @i32_red_example8(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
|
; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
%add.3 = add nsw i32 %4, %add.2
|
|
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
%add.4 = add nsw i32 %5, %add.3
|
|
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
%add.5 = add nsw i32 %6, %add.4
|
|
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
%add.6 = add nsw i32 %7, %add.5
|
|
store i32 %add.6, i32* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @i32_red_example16(i32* %res) {
|
|
; CHECK-LABEL: @i32_red_example16(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4
|
|
; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8
|
|
; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4
|
|
; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4
|
|
; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8
|
|
; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4
|
|
; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]]
|
|
; CHECK-NEXT: store i32 [[ADD_14]], i32* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @i32_red_example16(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr_i32 to <16 x i32>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP0]])
|
|
; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
%add.3 = add nsw i32 %4, %add.2
|
|
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
%add.4 = add nsw i32 %5, %add.3
|
|
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
%add.5 = add nsw i32 %6, %add.4
|
|
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
%add.6 = add nsw i32 %7, %add.5
|
|
%8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16
|
|
%add.7 = add nsw i32 %8, %add.6
|
|
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4
|
|
%add.8 = add nsw i32 %9, %add.7
|
|
%10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8
|
|
%add.9 = add nsw i32 %10, %add.8
|
|
%11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4
|
|
%add.10 = add nsw i32 %11, %add.9
|
|
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16
|
|
%add.11 = add nsw i32 %12, %add.10
|
|
%13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4
|
|
%add.12 = add nsw i32 %13, %add.11
|
|
%14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8
|
|
%add.13 = add nsw i32 %14, %add.12
|
|
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4
|
|
%add.14 = add nsw i32 %15, %add.13
|
|
store i32 %add.14, i32* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @i32_red_example32(i32* %res) {
|
|
; CHECK-LABEL: @i32_red_example32(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP3]], [[ADD_1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP4]], [[ADD_2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP5]], [[ADD_3]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP6]], [[ADD_4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP7]], [[ADD_5]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16
|
|
; CHECK-NEXT: [[ADD_7:%.*]] = add nsw i32 [[TMP8]], [[ADD_6]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4
|
|
; CHECK-NEXT: [[ADD_8:%.*]] = add nsw i32 [[TMP9]], [[ADD_7]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8
|
|
; CHECK-NEXT: [[ADD_9:%.*]] = add nsw i32 [[TMP10]], [[ADD_8]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4
|
|
; CHECK-NEXT: [[ADD_10:%.*]] = add nsw i32 [[TMP11]], [[ADD_9]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16
|
|
; CHECK-NEXT: [[ADD_11:%.*]] = add nsw i32 [[TMP12]], [[ADD_10]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4
|
|
; CHECK-NEXT: [[ADD_12:%.*]] = add nsw i32 [[TMP13]], [[ADD_11]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8
|
|
; CHECK-NEXT: [[ADD_13:%.*]] = add nsw i32 [[TMP14]], [[ADD_12]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4
|
|
; CHECK-NEXT: [[ADD_14:%.*]] = add nsw i32 [[TMP15]], [[ADD_13]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16
|
|
; CHECK-NEXT: [[ADD_15:%.*]] = add nsw i32 [[TMP16]], [[ADD_14]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4
|
|
; CHECK-NEXT: [[ADD_16:%.*]] = add nsw i32 [[TMP17]], [[ADD_15]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8
|
|
; CHECK-NEXT: [[ADD_17:%.*]] = add nsw i32 [[TMP18]], [[ADD_16]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4
|
|
; CHECK-NEXT: [[ADD_18:%.*]] = add nsw i32 [[TMP19]], [[ADD_17]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16
|
|
; CHECK-NEXT: [[ADD_19:%.*]] = add nsw i32 [[TMP20]], [[ADD_18]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4
|
|
; CHECK-NEXT: [[ADD_20:%.*]] = add nsw i32 [[TMP21]], [[ADD_19]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8
|
|
; CHECK-NEXT: [[ADD_21:%.*]] = add nsw i32 [[TMP22]], [[ADD_20]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4
|
|
; CHECK-NEXT: [[ADD_22:%.*]] = add nsw i32 [[TMP23]], [[ADD_21]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16
|
|
; CHECK-NEXT: [[ADD_23:%.*]] = add nsw i32 [[TMP24]], [[ADD_22]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4
|
|
; CHECK-NEXT: [[ADD_24:%.*]] = add nsw i32 [[TMP25]], [[ADD_23]]
|
|
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8
|
|
; CHECK-NEXT: [[ADD_25:%.*]] = add nsw i32 [[TMP26]], [[ADD_24]]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4
|
|
; CHECK-NEXT: [[ADD_26:%.*]] = add nsw i32 [[TMP27]], [[ADD_25]]
|
|
; CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16
|
|
; CHECK-NEXT: [[ADD_27:%.*]] = add nsw i32 [[TMP28]], [[ADD_26]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4
|
|
; CHECK-NEXT: [[ADD_28:%.*]] = add nsw i32 [[TMP29]], [[ADD_27]]
|
|
; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8
|
|
; CHECK-NEXT: [[ADD_29:%.*]] = add nsw i32 [[TMP30]], [[ADD_28]]
|
|
; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4
|
|
; CHECK-NEXT: [[ADD_30:%.*]] = add nsw i32 [[TMP31]], [[ADD_29]]
|
|
; CHECK-NEXT: store i32 [[ADD_30]], i32* [[RES:%.*]], align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
; STORE-LABEL: @i32_red_example32(
|
|
; STORE-NEXT: entry:
|
|
; STORE-NEXT: [[TMP0:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr_i32 to <32 x i32>*), align 16
|
|
; STORE-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP0]])
|
|
; STORE-NEXT: store i32 [[TMP1]], i32* [[RES:%.*]], align 16
|
|
; STORE-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
%add.3 = add nsw i32 %4, %add.2
|
|
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
%add.4 = add nsw i32 %5, %add.3
|
|
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
%add.5 = add nsw i32 %6, %add.4
|
|
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
%add.6 = add nsw i32 %7, %add.5
|
|
%8 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 8), align 16
|
|
%add.7 = add nsw i32 %8, %add.6
|
|
%9 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 9), align 4
|
|
%add.8 = add nsw i32 %9, %add.7
|
|
%10 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 10), align 8
|
|
%add.9 = add nsw i32 %10, %add.8
|
|
%11 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 11), align 4
|
|
%add.10 = add nsw i32 %11, %add.9
|
|
%12 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 12), align 16
|
|
%add.11 = add nsw i32 %12, %add.10
|
|
%13 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 13), align 4
|
|
%add.12 = add nsw i32 %13, %add.11
|
|
%14 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 14), align 8
|
|
%add.13 = add nsw i32 %14, %add.12
|
|
%15 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 15), align 4
|
|
%add.14 = add nsw i32 %15, %add.13
|
|
%16 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 16), align 16
|
|
%add.15 = add nsw i32 %16, %add.14
|
|
%17 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 17), align 4
|
|
%add.16 = add nsw i32 %17, %add.15
|
|
%18 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 18), align 8
|
|
%add.17 = add nsw i32 %18, %add.16
|
|
%19 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 19), align 4
|
|
%add.18 = add nsw i32 %19, %add.17
|
|
%20 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 20), align 16
|
|
%add.19 = add nsw i32 %20, %add.18
|
|
%21 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 21), align 4
|
|
%add.20 = add nsw i32 %21, %add.19
|
|
%22 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 22), align 8
|
|
%add.21 = add nsw i32 %22, %add.20
|
|
%23 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 23), align 4
|
|
%add.22 = add nsw i32 %23, %add.21
|
|
%24 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 24), align 16
|
|
%add.23 = add nsw i32 %24, %add.22
|
|
%25 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 25), align 4
|
|
%add.24 = add nsw i32 %25, %add.23
|
|
%26 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 26), align 8
|
|
%add.25 = add nsw i32 %26, %add.24
|
|
%27 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 27), align 4
|
|
%add.26 = add nsw i32 %27, %add.25
|
|
%28 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 28), align 16
|
|
%add.27 = add nsw i32 %28, %add.26
|
|
%29 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 29), align 4
|
|
%add.28 = add nsw i32 %29, %add.27
|
|
%30 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 30), align 8
|
|
%add.29 = add nsw i32 %30, %add.28
|
|
%31 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 31), align 4
|
|
%add.30 = add nsw i32 %31, %add.29
|
|
store i32 %add.30, i32* %res, align 16
|
|
ret void
|
|
}
|
|
|
|
declare i32 @foobar(i32)
|
|
|
|
define void @i32_red_call(i32 %val) {
|
|
; ALL-LABEL: @i32_red_call(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
|
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
|
; ALL-NEXT: [[RES:%.*]] = call i32 @foobar(i32 [[TMP1]])
|
|
; ALL-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
%add.3 = add nsw i32 %4, %add.2
|
|
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
%add.4 = add nsw i32 %5, %add.3
|
|
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
%add.5 = add nsw i32 %6, %add.4
|
|
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
%add.6 = add nsw i32 %7, %add.5
|
|
%res = call i32 @foobar(i32 %add.6)
|
|
ret void
|
|
}
|
|
|
|
define void @i32_red_invoke(i32 %val) personality i32 (...)* @__gxx_personality_v0 {
|
|
; ALL-LABEL: @i32_red_invoke(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: [[TMP0:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr_i32 to <8 x i32>*), align 16
|
|
; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP0]])
|
|
; ALL-NEXT: [[RES:%.*]] = invoke i32 @foobar(i32 [[TMP1]])
|
|
; ALL-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]]
|
|
; ALL: exception:
|
|
; ALL-NEXT: [[CLEANUP:%.*]] = landingpad i8
|
|
; ALL-NEXT: cleanup
|
|
; ALL-NEXT: br label [[NORMAL]]
|
|
; ALL: normal:
|
|
; ALL-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 0), align 16
|
|
%1 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 1), align 4
|
|
%add = add nsw i32 %1, %0
|
|
%2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 2), align 8
|
|
%add.1 = add nsw i32 %2, %add
|
|
%3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 3), align 4
|
|
%add.2 = add nsw i32 %3, %add.1
|
|
%4 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 4), align 16
|
|
%add.3 = add nsw i32 %4, %add.2
|
|
%5 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 5), align 4
|
|
%add.4 = add nsw i32 %5, %add.3
|
|
%6 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 6), align 8
|
|
%add.5 = add nsw i32 %6, %add.4
|
|
%7 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr_i32, i64 0, i64 7), align 4
|
|
%add.6 = add nsw i32 %7, %add.5
|
|
%res = invoke i32 @foobar(i32 %add.6) to label %normal unwind label %exception
|
|
exception:
|
|
%cleanup = landingpad i8 cleanup
|
|
br label %normal
|
|
normal:
|
|
ret void
|
|
}
|
|
|
|
; Test case from PR47670. Reduction result is used as incoming value in phi.
|
|
define i32 @reduction_result_used_in_phi(i32* nocapture readonly %data, i1 zeroext %b) {
|
|
; ALL-LABEL: @reduction_result_used_in_phi(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
|
; ALL: bb:
|
|
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>*
|
|
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
|
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
|
; ALL-NEXT: br label [[EXIT]]
|
|
; ALL: exit:
|
|
; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_1]]
|
|
;
|
|
entry:
|
|
br i1 %b, label %bb, label %exit
|
|
|
|
bb:
|
|
%l.0 = load i32, i32* %data, align 4
|
|
%idx.1 = getelementptr inbounds i32, i32* %data, i64 1
|
|
%l.1 = load i32, i32* %idx.1, align 4
|
|
%add.1 = add i32 %l.1, %l.0
|
|
%idx.2 = getelementptr inbounds i32, i32* %data, i64 2
|
|
%l.2 = load i32, i32* %idx.2, align 4
|
|
%add.2 = add i32 %l.2, %add.1
|
|
%idx.3 = getelementptr inbounds i32, i32* %data, i64 3
|
|
%l.3 = load i32, i32* %idx.3, align 4
|
|
%add.3 = add i32 %l.3, %add.2
|
|
br label %exit
|
|
|
|
exit:
|
|
%sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb]
|
|
ret i32 %sum.1
|
|
}
|
|
|
|
define i32 @reduction_result_used_in_phi_loop(i32* nocapture readonly %data, i1 zeroext %b) {
|
|
; ALL-LABEL: @reduction_result_used_in_phi_loop(
|
|
; ALL-NEXT: entry:
|
|
; ALL-NEXT: br i1 [[B:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
|
|
; ALL: bb:
|
|
; ALL-NEXT: [[TMP0:%.*]] = bitcast i32* [[DATA:%.*]] to <4 x i32>*
|
|
; ALL-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
|
; ALL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
|
|
; ALL-NEXT: br label [[EXIT]]
|
|
; ALL: exit:
|
|
; ALL-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB]] ]
|
|
; ALL-NEXT: ret i32 [[SUM_1]]
|
|
;
|
|
entry:
|
|
br i1 %b, label %bb, label %exit
|
|
|
|
bb:
|
|
%l.0 = load i32, i32* %data, align 4
|
|
%idx.1 = getelementptr inbounds i32, i32* %data, i64 1
|
|
%l.1 = load i32, i32* %idx.1, align 4
|
|
%add.1 = add i32 %l.1, %l.0
|
|
%idx.2 = getelementptr inbounds i32, i32* %data, i64 2
|
|
%l.2 = load i32, i32* %idx.2, align 4
|
|
%add.2 = add i32 %l.2, %add.1
|
|
%idx.3 = getelementptr inbounds i32, i32* %data, i64 3
|
|
%l.3 = load i32, i32* %idx.3, align 4
|
|
%add.3 = add i32 %l.3, %add.2
|
|
br label %exit
|
|
|
|
exit:
|
|
%sum.1 = phi i32 [ 0, %entry ], [ %add.3, %bb]
|
|
ret i32 %sum.1
|
|
}
|
|
|
|
; Make sure we do not crash or infinite loop on ill-formed IR.
|
|
|
|
define void @unreachable_block() {
|
|
; ALL-LABEL: @unreachable_block(
|
|
; ALL-NEXT: bb.0:
|
|
; ALL-NEXT: br label [[BB_1:%.*]]
|
|
; ALL: dead:
|
|
; ALL-NEXT: [[T0:%.*]] = add i16 [[T0]], undef
|
|
; ALL-NEXT: br label [[BB_1]]
|
|
; ALL: bb.1:
|
|
; ALL-NEXT: [[T1:%.*]] = phi i16 [ undef, [[BB_0:%.*]] ], [ [[T0]], [[DEAD:%.*]] ]
|
|
; ALL-NEXT: ret void
|
|
;
|
|
bb.0:
|
|
br label %bb.1
|
|
|
|
dead:
|
|
%t0 = add i16 %t0, undef ; unreachable IR may depend on itself
|
|
br label %bb.1
|
|
|
|
bb.1:
|
|
%t1 = phi i16 [ undef, %bb.0 ], [ %t0, %dead ]
|
|
ret void
|
|
}
|
|
|
|
; The FMF on the reduction should match the incoming insts.
|
|
|
|
define float @fadd_v4f32_fmf(float* %p) {
|
|
; ALL-LABEL: @fadd_v4f32_fmf(
|
|
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
|
; ALL-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
|
; ALL-NEXT: ret float [[TMP3]]
|
|
;
|
|
%p1 = getelementptr inbounds float, float* %p, i64 1
|
|
%p2 = getelementptr inbounds float, float* %p, i64 2
|
|
%p3 = getelementptr inbounds float, float* %p, i64 3
|
|
%t0 = load float, float* %p, align 4
|
|
%t1 = load float, float* %p1, align 4
|
|
%t2 = load float, float* %p2, align 4
|
|
%t3 = load float, float* %p3, align 4
|
|
%add1 = fadd reassoc nsz float %t1, %t0
|
|
%add2 = fadd reassoc nsz float %t2, %add1
|
|
%add3 = fadd reassoc nsz float %t3, %add2
|
|
ret float %add3
|
|
}
|
|
|
|
; The minimal FMF for fadd reduction are "reassoc nsz".
|
|
; Only the common FMF of all operations in the reduction propagate to the result.
|
|
; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags.
|
|
|
|
define float @fadd_v4f32_fmf_intersect(float* %p) {
|
|
; ALL-LABEL: @fadd_v4f32_fmf_intersect(
|
|
; ALL-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
|
|
; ALL-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
|
|
; ALL-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
|
|
; ALL-NEXT: ret float [[TMP3]]
|
|
;
|
|
%p1 = getelementptr inbounds float, float* %p, i64 1
|
|
%p2 = getelementptr inbounds float, float* %p, i64 2
|
|
%p3 = getelementptr inbounds float, float* %p, i64 3
|
|
%t0 = load float, float* %p, align 4
|
|
%t1 = load float, float* %p1, align 4
|
|
%t2 = load float, float* %p2, align 4
|
|
%t3 = load float, float* %p3, align 4
|
|
%add1 = fadd ninf reassoc nsz nnan float %t1, %t0
|
|
%add2 = fadd ninf reassoc nsz nnan arcp float %t2, %add1
|
|
%add3 = fadd ninf reassoc nsz contract float %t3, %add2
|
|
ret float %add3
|
|
}
|
|
|
|
declare i32 @__gxx_personality_v0(...)
|