llvm-project/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll
Florian Hahn 8df64ed777 [LV] Don't consider IV increments uniform if exit value is used outside.
In some cases, there might be a chain of uniform instructions producing
the exit value. To generate correct code in all cases, consider the IV
increment not uniform, if there are users outside the loop.

Instead, let VPlan narrow the IV, if possible using the logic from
3ff1d01985752.

Test case from #122602 verified with Alive2:
    https://alive2.llvm.org/ce/z/bA4EGj

Fixes https://github.com/llvm/llvm-project/issues/122496.
Fixes https://github.com/llvm/llvm-project/issues/122602.
2025-01-12 22:03:21 +00:00

91 lines
5.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -scalable-vectorization=on -force-target-supports-scalable-vectors=true -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S %s | FileCheck %s
define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-LABEL: define i32 @iv_live_out_wide(
; CHECK-SAME: ptr [[DST:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
; CHECK-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 2000, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP5]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[STEP_2]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP11]], align 2
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 2 x i32> [[BROADCAST_SPLAT2]], [[STEP_ADD]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], 2
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i32> [[TMP15]], i32 [[TMP19]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]]
; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000
; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[E_EXIT]]:
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
%step.1 = sext i8 0 to i32
%step.2 = add nsw i32 %step.1, 1
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%gep.dst = getelementptr inbounds i16, ptr %dst, i32 %iv
store i16 0, ptr %gep.dst, align 2
%iv.next = add i32 %step.2, %iv
%cmp.i = icmp slt i32 %iv.next, 2000
br i1 %cmp.i, label %loop, label %e.exit
e.exit:
%res = phi i32 [ %iv.next, %loop ]
ret i32 %res
}
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.