
In some cases, there might be a chain of uniform instructions producing the exit value. To generate correct code in all cases, consider the IV increment not uniform, if there are users outside the loop. Instead, let VPlan narrow the IV, if possible using the logic from 3ff1d01985752. Test case from #122602 verified with Alive2: https://alive2.llvm.org/ce/z/bA4EGj Fixes https://github.com/llvm/llvm-project/issues/122496. Fixes https://github.com/llvm/llvm-project/issues/122602.
91 lines
5.3 KiB
LLVM
91 lines
5.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck %s
|
|
|
|
define i32 @iv_live_out_wide(ptr %dst) {
|
|
; CHECK-LABEL: define i32 @iv_live_out_wide(
|
|
; CHECK-SAME: ptr [[DST:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
|
|
; CHECK-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 2000, [[TMP1]]
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP3]]
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
|
|
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
|
|
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
|
|
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
|
|
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
|
|
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
|
|
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP11]], align 2
|
|
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT2]], [[STEP_ADD]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
|
|
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
|
|
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
|
|
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 2
|
|
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
|
|
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i32> [[TMP15]], i32 [[TMP19]]
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
|
|
; CHECK: [[SCALAR_PH]]:
|
|
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]]
|
|
; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]]
|
|
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000
|
|
; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[E_EXIT]]:
|
|
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i32 [[RES]]
|
|
;
|
|
entry:
|
|
%step.1 = sext i8 0 to i32
|
|
%step.2 = add nsw i32 %step.1, 1
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%gep.dst = getelementptr inbounds i16, ptr %dst, i32 %iv
|
|
store i16 0, ptr %gep.dst, align 2
|
|
%iv.next = add i32 %step.2, %iv
|
|
%cmp.i = icmp slt i32 %iv.next, 2000
|
|
br i1 %cmp.i, label %loop, label %e.exit
|
|
|
|
e.exit:
|
|
%res = phi i32 [ %iv.next, %loop ]
|
|
ret i32 %res
|
|
}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
|
|
;.
|