
Currently we only allow folding not (cmp eq) -> icmp ne if the not is the only user of the compare. However a common scenario is that some select might also use the compare. We can still fold the not if we also swizzle the arms of the selects. This helps avoid regressions in #150368
1822 lines
99 KiB
LLVM
1822 lines
99 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Float pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > (float)0.)
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_0_fadd_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt float %0, 0.000000e+00
|
|
%add = fadd fast float %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, float %add, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > 0.)
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_0_fadd_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt double %0, 0.000000e+00
|
|
%add = fadd fast double %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, double %add, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and a floating-point
|
|
; value.
|
|
;
|
|
; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > y)
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_val_fadd_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], float [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], [[Y]]
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt float %0, %y
|
|
%add = fadd fast float %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, float %add, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and a floating-point
|
|
; value.
|
|
;
|
|
; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > y)
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_val_fadd_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], double [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], [[Y]]
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt double %0, %y
|
|
%add = fadd fast double %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, double %add, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and another array
|
|
; element.
|
|
;
|
|
; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
|
|
; const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > y[i])
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_array_elm_fadd_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP7]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP10]], [[TMP11]]
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP10]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP12]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
|
|
%1 = load float, ptr %arrayidx.2, align 4
|
|
%cmp.2 = fcmp fast ogt float %0, %1
|
|
%add = fadd fast float %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, float %add, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %2
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check vectorization of reduction code which has an fadd instruction after
|
|
; an fcmp instruction which compares an array element and another array
|
|
; element.
|
|
;
|
|
; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
|
|
; const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > y[i])
|
|
; sum += x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_array_elm_fadd_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP3]], align 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x double> [[TMP6]], <4 x double> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP7]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
|
|
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP10]], [[TMP11]]
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP10]], [[SUM_1]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP12]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
|
|
%1 = load double, ptr %arrayidx.2, align 4
|
|
%cmp.2 = fcmp fast ogt double %0, %1
|
|
%add = fadd fast double %0, %sum.1
|
|
%sum.2 = select i1 %cmp.2, double %add, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %2
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check vectorization of reduction code which has an fsub instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > (float)0.)
|
|
; sum -= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_0_fsub_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[SUM_1]], [[TMP8]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt float %0, 0.000000e+00
|
|
%sub = fsub fast float %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, float %sub, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check that is not vectorized if fp-instruction has no fast-math property.
|
|
; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > (float)0.)
|
|
; sum -= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_0_fsub_select1_novectorize(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub float [[SUM_1]], [[TMP0]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP1]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt float %0, 0.000000e+00
|
|
%sub = fsub float %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, float %sub, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check vectorization of reduction code which has an fsub instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > 0.)
|
|
; sum -= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_0_fsub_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[SUM_1]], [[TMP8]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt double %0, 0.000000e+00
|
|
%sub = fsub fast double %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, double %sub, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check that is not vectorized if fp-instruction has no fast-math property.
|
|
;
|
|
; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > 0.)
|
|
; sum -= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_0_fsub_select2_notvectorize(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[SUM_1]], [[TMP0]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP1]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt double %0, 0.000000e+00
|
|
%sub = fsub double %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, double %sub, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check vectorization of reduction code which has an fmul instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > (float)0.)
|
|
; sum *= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_0_fmult_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[MULT:%.*]] = fmul fast float [[SUM_1]], [[TMP8]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt float %0, 0.000000e+00
|
|
%mult = fmul fast float %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, float %mult, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Float pattern:
|
|
; Check that is not vectorized if fp-instruction has no fast-math property.
|
|
;
|
|
; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
|
|
; float sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > (float)0.)
|
|
; sum *= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_0_fmult_select1_notvectorize(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
|
|
; CHECK-NEXT: [[MULT:%.*]] = fmul float [[SUM_1]], [[TMP0]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[TMP1]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt float %0, 0.000000e+00
|
|
%mult = fmul float %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, float %mult, float %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret float %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check vectorization of reduction code which has an fmul instruction after
|
|
; an fcmp instruction which compares an array element and 0.
|
|
;
|
|
; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > 0.)
|
|
; sum *= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_0_fmult_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[MULT:%.*]] = fmul fast double [[SUM_1]], [[TMP8]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp fast ogt double %0, 0.000000e+00
|
|
%mult = fmul fast double %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, double %mult, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Double pattern:
|
|
; Check that is not vectorized if fp-instruction has no fast-math property.
|
|
;
|
|
; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
|
|
; double sum = 0.
|
|
; for (int i = 0; i < N; ++i)
|
|
; if (x[i] > 0.)
|
|
; sum *= x[i];
|
|
; return sum;
|
|
; }
|
|
|
|
define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define double @fcmp_0_fmult_select2_notvectorize(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00
|
|
; CHECK-NEXT: [[MULT:%.*]] = fmul double [[SUM_1]], [[TMP0]]
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret double [[TMP1]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt double %0, 0.000000e+00
|
|
%mult = fmul double %sum.1, %0
|
|
%sum.2 = select i1 %cmp.2, double %mult, double %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
|
|
ret double %1
|
|
}
|
|
|
|
; Float multi pattern
|
|
; Check vectorisation of reduction code with a pair of selects to different
|
|
; fadd patterns.
|
|
;
|
|
; float fcmp_multi(ptr a, int n) {
|
|
; float sum=0.0;
|
|
; for (int i=0;i<n;i++) {
|
|
; if (a[i]>1.0)
|
|
; sum+=a[i];
|
|
; else if (a[i]<3.0)
|
|
; sum+=2*a[i];
|
|
; else
|
|
; sum+=3*a[i];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_multi(
|
|
; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> [[TMP8]]
|
|
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP10]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
|
|
; CHECK-NEXT: [[SUM_011:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP13]], 1.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_INC]], label %[[IF_ELSE:.*]]
|
|
; CHECK: [[IF_ELSE]]:
|
|
; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP13]], 3.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[IF_ELSE14:.*]]
|
|
; CHECK: [[IF_THEN10]]:
|
|
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP13]], 2.000000e+00
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[IF_ELSE14]]:
|
|
; CHECK-NEXT: [[MUL17:%.*]] = fmul fast float [[TMP13]], 3.000000e+00
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[FOR_INC]]:
|
|
; CHECK-NEXT: [[DOTPN:%.*]] = phi float [ [[MUL]], %[[IF_THEN10]] ], [ [[MUL17]], %[[IF_ELSE14]] ], [ [[TMP13]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1]] = fadd fast float [[DOTPN]], [[SUM_011]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp10 = icmp sgt i32 %n, 0
|
|
br i1 %cmp10, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
|
%sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
|
|
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp1 = fcmp ogt float %0, 1.000000e+00
|
|
br i1 %cmp1, label %for.inc, label %if.else
|
|
|
|
if.else: ; preds = %for.body
|
|
%cmp8 = fcmp olt float %0, 3.000000e+00
|
|
br i1 %cmp8, label %if.then10, label %if.else14
|
|
|
|
if.then10: ; preds = %if.else
|
|
%mul = fmul fast float %0, 2.000000e+00
|
|
br label %for.inc
|
|
|
|
if.else14: ; preds = %if.else
|
|
%mul17 = fmul fast float %0, 3.000000e+00
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body, %if.else14, %if.then10
|
|
%.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
|
|
%sum.1 = fadd fast float %.pn, %sum.011
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.inc, %entry
|
|
%sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
|
|
ret float %sum.0.lcssa
|
|
}
|
|
|
|
; Float fadd + fsub patterns
|
|
; Check vectorisation of reduction code with a pair of selects to different
|
|
; instructions { fadd, fsub } but equivalent (change in constant).
|
|
;
|
|
; float fcmp_multi(ptr a, int n) {
|
|
; float sum=0.0;
|
|
; for (int i=0;i<n;i++) {
|
|
; if (a[i]>1.0)
|
|
; sum+=a[i];
|
|
; else if (a[i]<3.0)
|
|
; sum-=a[i];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_fadd_fsub(
|
|
; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI1:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
|
|
; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]]
|
|
; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[TMP7]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI1]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
|
|
; CHECK-NEXT: [[SUM_010:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP12]], 1.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
|
|
; CHECK: [[IF_THEN]]:
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP12]], [[SUM_010]]
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[IF_ELSE]]:
|
|
; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP12]], 3.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]]
|
|
; CHECK: [[IF_THEN10]]:
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[SUM_010]], [[TMP12]]
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[FOR_INC]]:
|
|
; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[SUB]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp9 = icmp sgt i32 %n, 0
|
|
br i1 %cmp9, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
|
%sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
|
|
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp1 = fcmp ogt float %0, 1.000000e+00
|
|
br i1 %cmp1, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %for.body
|
|
%add = fadd fast float %0, %sum.010
|
|
br label %for.inc
|
|
|
|
if.else: ; preds = %for.body
|
|
%cmp8 = fcmp olt float %0, 3.000000e+00
|
|
br i1 %cmp8, label %if.then10, label %for.inc
|
|
|
|
if.then10: ; preds = %if.else
|
|
%sub = fsub fast float %sum.010, %0
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %if.then, %if.then10, %if.else
|
|
%sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.inc, %entry
|
|
%sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
|
|
ret float %sum.0.lcssa
|
|
}
|
|
|
|
; Float fadd + fmul patterns
|
|
; Check lack of vectorisation of reduction code with a pair of non-compatible
|
|
; instructions { fadd, fmul }.
|
|
;
|
|
; float fcmp_multi(ptr a, int n) {
|
|
; float sum=0.0;
|
|
; for (int i=0;i<n;i++) {
|
|
; if (a[i]>1.0)
|
|
; sum+=a[i];
|
|
; else if (a[i]<3.0)
|
|
; sum*=a[i];
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_fadd_fmul(
|
|
; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
|
|
; CHECK-NEXT: [[SUM_010:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
|
|
; CHECK: [[IF_THEN]]:
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP0]], [[SUM_010]]
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[IF_ELSE]]:
|
|
; CHECK-NEXT: [[CMP8:%.*]] = fcmp olt float [[TMP0]], 3.000000e+00
|
|
; CHECK-NEXT: br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]]
|
|
; CHECK: [[IF_THEN10]]:
|
|
; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP0]], [[SUM_010]]
|
|
; CHECK-NEXT: br label %[[FOR_INC]]
|
|
; CHECK: [[FOR_INC]]:
|
|
; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[MUL]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp9 = icmp sgt i32 %n, 0
|
|
br i1 %cmp9, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %n to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.inc, %for.body.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
|
|
%sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
|
|
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp1 = fcmp ogt float %0, 1.000000e+00
|
|
br i1 %cmp1, label %if.then, label %if.else
|
|
|
|
if.then: ; preds = %for.body
|
|
%add = fadd fast float %0, %sum.010
|
|
br label %for.inc
|
|
|
|
if.else: ; preds = %for.body
|
|
%cmp8 = fcmp olt float %0, 3.000000e+00
|
|
br i1 %cmp8, label %if.then10, label %for.inc
|
|
|
|
if.then10: ; preds = %if.else
|
|
%mul = fmul fast float %0, %sum.010
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %if.then, %if.then10, %if.else
|
|
%sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.inc, %entry
|
|
%sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
|
|
ret float %sum.0.lcssa
|
|
}
|
|
|
|
; Float fadd + store patterns
|
|
; Check lack of vectorisation of reduction code with a store back, given it
|
|
; has loop dependency on a[i].
|
|
;
|
|
; float fcmp_store_back(float a[], int LEN) {
|
|
; float sum = 0.0;
|
|
; for (int i = 0; i < LEN; i++) {
|
|
; sum += a[i];
|
|
; a[i] = sum;
|
|
; }
|
|
; return sum;
|
|
; }
|
|
|
|
define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
|
|
; CHECK-LABEL: define float @fcmp_store_back(
|
|
; CHECK-SAME: ptr captures(none) [[A:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN]] to i64
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_08:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP0]], [[SUM_08]]
|
|
; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret float [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp7 = icmp sgt i32 %LEN, 0
|
|
br i1 %cmp7, label %for.body.preheader, label %for.end
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%wide.trip.count = zext i32 %LEN to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.body.preheader
|
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
|
%sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%add = fadd fast float %0, %sum.08
|
|
store float %add, ptr %arrayidx, align 4
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
|
|
ret float %sum.0.lcssa
|
|
}
|
|
|
|
define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly {
|
|
; CHECK-LABEL: define i64 @fcmp_0_add_select2(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i64 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2)
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SUM_1]], 2
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i64 [[ADD]], i64 [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i64 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i64 [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i64 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi i64 [ 0, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt float %0, 0.000000e+00
|
|
%add = add nsw i64 %sum.1, 2
|
|
%sum.2 = select i1 %cmp.2, i64 %add, i64 %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %N
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi i64 [ 0, %entry ], [ %sum.2, %for.body ]
|
|
ret i64 %1
|
|
}
|
|
|
|
; FIXME: %indvars.iv.next is poison on first iteration due to sub nuw 0, 1.
|
|
define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define i32 @fcmp_0_sub_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[ZEXT]]
|
|
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[IND_END:%.*]] = sub i64 0, [[N_VEC]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP10]], 0.000000e+00
|
|
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[SUM_1]], 2
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i32 [[SUB]], i32 [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = sub nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[TMP11]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %header, %for.body
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt float %0, 0.000000e+00
|
|
%sub = sub nsw i32 %sum.1, 2
|
|
%sum.2 = select i1 %cmp.2, i32 %sub, i32 %sum.1
|
|
%indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
|
|
; CHECK-LABEL: define i32 @fcmp_0_mult_select1(
|
|
; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_HEADER]]:
|
|
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2)
|
|
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
|
|
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
|
|
; CHECK: [[FOR_BODY]]:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00
|
|
; CHECK-NEXT: [[MULT:%.*]] = mul nsw i32 [[SUM_1]], 2
|
|
; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], i32 [[MULT]], i32 [[SUM_1]]
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
|
|
; CHECK-NEXT: ret i32 [[TMP9]]
|
|
;
|
|
entry:
|
|
%cmp.1 = icmp sgt i32 %N, 0
|
|
br i1 %cmp.1, label %for.header, label %for.end
|
|
|
|
for.header: ; preds = %entry
|
|
%zext = zext i32 %N to i64
|
|
br label %for.body
|
|
|
|
for.body: ; preds = %for.body, %for.header
|
|
%indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
|
|
%sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
|
|
%arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
|
|
%0 = load float, ptr %arrayidx, align 4
|
|
%cmp.2 = fcmp ogt float %0, 0.000000e+00
|
|
%mult = mul nsw i32 %sum.1, 2
|
|
%sum.2 = select i1 %cmp.2, i32 %mult, i32 %sum.1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %zext
|
|
br i1 %exitcond, label %for.end, label %for.body
|
|
|
|
for.end: ; preds = %for.body, %entry
|
|
%1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
|
|
ret i32 %1
|
|
}
|
|
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
|
|
; CHECK: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
|
|
; CHECK: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]}
|
|
;.
|