The checks created by LAA only compute a pointer difference and do not need to capture provenance. Use SCEVPtrToAddr instead of SCEVPtrToInt for computations. To avoid regressions while parts of SCEV are migrated to use PtrToAddr this adds logic to rewrite all PtrToInt to PtrToAddr if possible in the created expressions. This is needed to avoid regressions. Similarly, if in the original IR we have a PtrToInt, SCEVExpander tries to re-use it if possible when expanding PtrToAddr. Depends on https://github.com/llvm/llvm-project/pull/178727. Fixes https://github.com/llvm/llvm-project/issues/156978. PR: https://github.com/llvm/llvm-project/pull/178861
270 lines
17 KiB
LLVM
270 lines
17 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
|
|
; Test VLA for reverse with fixed size vector
|
|
; This is the loop in c++ being vectorize in this file with
|
|
; shuffle reverse
|
|
; #pragma clang loop vectorize_width(8, fixed)
|
|
; for (int i = N-1; i >= 0; --i)
|
|
; a[i] = b[i] + 1.0;
|
|
|
|
; RUN: opt -passes=loop-vectorize,dce -mtriple aarch64-linux-gnu -S \
|
|
; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
|
|
|
|
define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 {
|
|
; CHECK-LABEL: define void @vector_reverse_f64(
|
|
; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
|
|
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
|
|
; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i64 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], [[FOR_COND_CLEANUP:label %.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
|
|
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 -7
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x double>, ptr [[TMP5]], align 8
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x double> [[WIDE_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP6:%.*]] = fadd <8 x double> [[REVERSE]], splat (double 1.000000e+00)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 -7
|
|
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: store <8 x double> [[REVERSE3]], ptr [[TMP9]], align 8
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
|
|
entry:
|
|
%cmp7 = icmp sgt i64 %N, 0
|
|
br i1 %cmp7, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond.cleanup, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.08.in = phi i64 [ %i.08, %for.body ], [ %N, %entry ]
|
|
%i.08 = add nsw i64 %i.08.in, -1
|
|
%arrayidx = getelementptr inbounds double, ptr %b, i64 %i.08
|
|
%0 = load double, ptr %arrayidx, align 8
|
|
%add = fadd double %0, 1.000000e+00
|
|
%arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.08
|
|
store double %add, ptr %arrayidx1, align 8
|
|
%cmp = icmp sgt i64 %i.08.in, 1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
|
|
}
|
|
|
|
define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
|
|
; CHECK-LABEL: define void @vector_reverse_i64(
|
|
; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
|
|
; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64
|
|
; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i64 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], [[FOR_COND_CLEANUP:label %.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
|
|
; CHECK: [[VECTOR_MEMCHECK]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]]
|
|
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
|
|
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -7
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i64>, ptr [[TMP5]], align 8
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i64> [[WIDE_LOAD]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: [[TMP6:%.*]] = add <8 x i64> [[REVERSE]], splat (i64 1)
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 0
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 -7
|
|
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <8 x i64> [[TMP6]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: store <8 x i64> [[REVERSE3]], ptr [[TMP9]], align 8
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
;
|
|
|
|
entry:
|
|
%cmp8 = icmp sgt i64 %N, 0
|
|
br i1 %cmp8, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond.cleanup, %entry
|
|
ret void
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.09.in = phi i64 [ %i.09, %for.body ], [ %N, %entry ]
|
|
%i.09 = add nsw i64 %i.09.in, -1
|
|
%arrayidx = getelementptr inbounds i64, ptr %b, i64 %i.09
|
|
%0 = load i64, ptr %arrayidx, align 8
|
|
%add = add i64 %0, 1
|
|
%arrayidx2 = getelementptr inbounds i64, ptr %a, i64 %i.09
|
|
store i64 %add, ptr %arrayidx2, align 8
|
|
%cmp = icmp sgt i64 %i.09.in, 1
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
|
|
}
|
|
|
|
define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias %src, i64 %n) #1 {
|
|
; CHECK-LABEL: define i32 @reverse_store_with_partial_reduction(
|
|
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
|
|
; CHECK-NEXT: [[ITER_CHECK:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
|
|
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 5
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3
|
|
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE6:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE7:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[SRC]], align 2
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[TMP7]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 8 x i16> [[BROADCAST_SPLAT]] to <vscale x 8 x i32>
|
|
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP8]])
|
|
; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> [[VEC_PHI2]], <vscale x 8 x i32> [[TMP8]])
|
|
; CHECK-NEXT: [[PARTIAL_REDUCE6]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> [[VEC_PHI3]], <vscale x 8 x i32> [[TMP8]])
|
|
; CHECK-NEXT: [[PARTIAL_REDUCE7]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv8i32(<vscale x 4 x i32> [[VEC_PHI4]], <vscale x 8 x i32> [[TMP8]])
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP6]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP11]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP10]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP12]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP16]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP15]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[TMP18]], i64 [[TMP17]]
|
|
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 -2, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP22:%.*]] = mul i64 -1, [[TMP21]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP20]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[TMP23]], i64 [[TMP22]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 -3, [[TMP4]]
|
|
; CHECK-NEXT: [[TMP26:%.*]] = sub i64 [[TMP4]], 1
|
|
; CHECK-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP26]]
|
|
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP9]], i64 [[TMP25]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i64 [[TMP27]]
|
|
; CHECK-NEXT: [[REVERSE:%.*]] = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[BROADCAST_SPLAT]])
|
|
; CHECK-NEXT: store <vscale x 8 x i16> [[REVERSE]], ptr [[TMP14]], align 2
|
|
; CHECK-NEXT: store <vscale x 8 x i16> [[REVERSE]], ptr [[TMP19]], align 2
|
|
; CHECK-NEXT: store <vscale x 8 x i16> [[REVERSE]], ptr [[TMP24]], align 2
|
|
; CHECK-NEXT: store <vscale x 8 x i16> [[REVERSE]], ptr [[TMP29]], align 2
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
|
|
; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]]
|
|
; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <vscale x 4 x i32> [[PARTIAL_REDUCE6]], [[BIN_RDX]]
|
|
; CHECK-NEXT: [[BIN_RDX9:%.*]] = add <vscale x 4 x i32> [[PARTIAL_REDUCE7]], [[BIN_RDX8]]
|
|
; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX9]])
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
|
|
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]]
|
|
; CHECK: [[VEC_EPILOG_PH]]:
|
|
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP31]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
|
; CHECK-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[TMP0]], 4
|
|
; CHECK-NEXT: [[N_VEC11:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF10]]
|
|
; CHECK-NEXT: [[TMP32:%.*]] = sub i64 [[N]], [[N_VEC11]]
|
|
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
|
|
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_PHI13:%.*]] = phi <2 x i32> [ [[TMP33]], %[[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX12]]
|
|
; CHECK-NEXT: [[TMP34:%.*]] = load i16, ptr [[SRC]], align 2
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i16> poison, i16 [[TMP34]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT14]], <4 x i16> poison, <4 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP35:%.*]] = sext <4 x i16> [[BROADCAST_SPLAT15]] to <4 x i32>
|
|
; CHECK-NEXT: [[PARTIAL_REDUCE16]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v4i32(<2 x i32> [[VEC_PHI13]], <4 x i32> [[TMP35]])
|
|
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[DST]], i64 [[OFFSET_IDX]]
|
|
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i16, ptr [[TMP36]], i64 0
|
|
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[TMP37]], i64 -3
|
|
; CHECK-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLAT15]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; CHECK-NEXT: store <4 x i16> [[REVERSE17]], ptr [[TMP38]], align 2
|
|
; CHECK-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX12]], 4
|
|
; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT18]], [[N_VEC11]]
|
|
; CHECK-NEXT: br i1 [[TMP39]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PARTIAL_REDUCE16]])
|
|
; CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC11]]
|
|
; CHECK-NEXT: br i1 [[CMP_N19]], [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
|
|
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ]
|
|
%sum = phi i32 [ 0, %entry ], [ %sum.next, %loop ]
|
|
%uniform.load = load i16, ptr %src, align 2
|
|
%ext = sext i16 %uniform.load to i32
|
|
%sum.next = add i32 %sum, %ext
|
|
%dst.gep = getelementptr i16, ptr %dst, i64 %iv
|
|
store i16 %uniform.load, ptr %dst.gep, align 2
|
|
%iv.next = add i64 %iv, -1
|
|
%cmp = icmp ugt i64 %iv, 0
|
|
br i1 %cmp, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i32 %sum.next
|
|
}
|
|
|
|
attributes #0 = { "target-cpu"="generic" "target-features"="+neon,+sve" }
|
|
attributes #1 = { "target-cpu"="neoverse-v2" }
|
|
|
|
!0 = distinct !{!0, !1, !2, !3, !4}
|
|
!1 = !{!"llvm.loop.mustprogress"}
|
|
!2 = !{!"llvm.loop.vectorize.width", i32 8}
|
|
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
|
|
!4 = !{!"llvm.loop.vectorize.enable", i1 true}
|