From 840e9a4ddda3ebd383cc05714ebf27042905bb99 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 27 Mar 2026 13:26:04 +0000 Subject: [PATCH] [VPlan] Fix wrap-flags on WidenInduction unroll (#187710) Due to a somewhat recent change, IntOrFpInduction recipes have associated VPIRFlags. The VPlanUnroll logic for WidenInduction recipes predates this change, and computes incomplete wrap-flags: update it to simply use the flags on IntOrFpInduction recipes; PointerInduction recipes have no associated flags, and indeed, no flags should be used. --- flang/test/Integration/unroll-loops.f90 | 4 +-- flang/test/Lower/HLFIR/unroll-loops.fir | 4 +-- llvm/lib/Transforms/Vectorize/VPlan.h | 14 ++++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 8 ++--- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 19 ++++++---- .../AArch64/epilog-iv-select-cmp.ll | 2 +- .../epilog-vectorization-widen-inductions.ll | 2 +- .../AArch64/pr60831-sve-inv-store-crash.ll | 4 +-- .../LoopVectorize/RISCV/riscv-interleaved.ll | 2 +- .../LoopVectorize/RISCV/strided-accesses.ll | 2 +- .../X86/fixed-order-recurrence.ll | 2 +- .../LoopVectorize/find-last-iv-interleave.ll | 4 +-- .../LoopVectorize/first-order-recurrence.ll | 2 +- .../Transforms/LoopVectorize/induction.ll | 36 +++++++++---------- .../LoopVectorize/iv-select-cmp-decreasing.ll | 28 +++++++-------- .../iv-select-cmp-non-const-iv-start.ll | 6 ++-- .../Transforms/LoopVectorize/iv-select-cmp.ll | 24 ++++++------- 17 files changed, 90 insertions(+), 73 deletions(-) diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index 23fefcd35c15..0ace460dbfe8 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -27,13 +27,13 @@ subroutine unroll(a) ! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 ! NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) ! - ! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) + ! UNROLL-NEXT: %[[VIND1:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) ! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr [8 x i8], ptr %[[ARG0]], i64 %[[IND]] ! UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 ! UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]] ! UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]] ! UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 - ! UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) + ! UNROLL-NEXT: %[[NVIND:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 4) ! ! CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 ! CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index ce2b133365d3..4ea90b2f2f23 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -29,13 +29,13 @@ func.func @unroll(%arg0: !fir.ref> {fir.bindc_name = "a // NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2 // NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) - // UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2) + // UNROLL-NEXT: %[[VIND1:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2) // UNROLL-NEXT: %[[GEP0:.*]] = getelementptr [8 x i8], ptr %[[ARG0]], i64 %[[IND]] // UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16 // UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]] // UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]] // UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4 - // UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4) + // UNROLL-NEXT: %[[NVIND:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 4) // CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000 // CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]] diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 92b7a2c3c6ba..ab47d927942d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1030,6 +1030,20 @@ public: } } + bool hasNoWrapFlags() const { + switch (OpType) { + case OperationType::OverflowingBinOp: + case OperationType::Trunc: + return true; + default: + return false; + } + } + + WrapFlagsTy getNoWrapFlags() const { + return {hasNoUnsignedWrap(), hasNoSignedWrap()}; + } + bool isDisjoint() const { assert(OpType == OperationType::DisjointOp && "recipe cannot have a disjoing flag"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9c9ffb8b0de6..555630426548 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3364,11 +3364,9 @@ void VPlanTransforms::addExplicitVectorLength( OpVPEVL = Builder.createScalarZExtOrTrunc( OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc()); - auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration, - CanonicalIVIncrement->getDebugLoc(), - "current.iteration.next", - {CanonicalIVIncrement->hasNoUnsignedWrap(), - CanonicalIVIncrement->hasNoSignedWrap()}); + auto *NextIter = Builder.createAdd( + OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(), + "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags()); CurrentIteration->addOperand(NextIter); VPValue *NextAVL = diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index e541ed364eba..867c7c8ef504 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -179,17 +179,22 @@ void UnrollState::unrollWidenInductionByUF( IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor()); Type *IVTy = TypeInfo.inferScalarType(IV); auto &ID = IV->getInductionDescriptor(); - VPIRFlags Flags; - if (isa_and_present(ID.getInductionBinOp())) - Flags = ID.getInductionBinOp()->getFastMathFlags(); + FastMathFlags FMF; + VPIRFlags::WrapFlagsTy WrapFlags(false, false); + if (auto *IntOrFPInd = dyn_cast(IV)) { + if (IntOrFPInd->hasFastMathFlags()) + FMF = IntOrFPInd->getFastMathFlags(); + if (IntOrFPInd->hasNoWrapFlags()) + WrapFlags = IntOrFPInd->getNoWrapFlags(); + } VPValue *ScalarStep = IV->getStepValue(); VPBuilder Builder(PH); Type *VectorStepTy = IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy; VPInstruction *VectorStep = Builder.createNaryOp( - VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, - Flags, IV->getDebugLoc()); + VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, FMF, + IV->getDebugLoc()); ToSkip.insert(VectorStep); @@ -214,10 +219,10 @@ void UnrollState::unrollWidenInductionByUF( AddFlags = GEPNoWrapFlags::none(); } else if (IVTy->isFloatingPointTy()) { AddOpc = ID.getInductionOpcode(); - AddFlags = Flags; // FMF flags + AddFlags = FMF; } else { AddOpc = Instruction::Add; - AddFlags = VPIRFlags::getDefaultFlags(AddOpc); + AddFlags = WrapFlags; if (cast(IV)->isCanonical()) AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index ae1f7829449d..92ccabc2031a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -24,7 +24,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <16 x i8> [[VEC_IND]], splat (i8 16) ; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 680dfb83271c..ea9799428f30 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -197,7 +197,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 25e18692da8c..af75087bd76f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -179,7 +179,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 { ; IC2: vector.body: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IC2-NEXT: [[DOTSPLAT:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; IC2-NEXT: [[TMP10:%.*]] = add [[DOTSPLAT]], [[TMP9]] +; IC2-NEXT: [[TMP10:%.*]] = add nsw [[DOTSPLAT]], [[TMP9]] ; IC2-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32() ; IC2-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4 ; IC2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1 @@ -303,7 +303,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 { ; IC2: vector.body: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IC2-NEXT: [[DOTSPLAT:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; IC2-NEXT: [[TMP14:%.*]] = add [[DOTSPLAT]], [[TMP13]] +; IC2-NEXT: [[TMP14:%.*]] = add nsw [[DOTSPLAT]], [[TMP13]] ; IC2-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() ; IC2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4 ; IC2-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index faaaa67094d6..c28d1706a31b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: foo ; CHECK: LV: IC is 2 -; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8) ; CHECK: %{{.*}} = add {{.*}}, 16 +; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8) ; Function Attrs: nofree norecurse nosync nounwind writeonly define void @foo(i32 signext %n, ptr nocapture %A) #0 { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 642e28a4823e..4af43e5fb085 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -156,7 +156,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-UF2: vector.body: ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[TMP6]] +; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw nsw [[VEC_IND]], [[TMP6]] ; CHECK-UF2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[P:%.*]], [[VEC_IND]] ; CHECK-UF2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], [[STEP_ADD]] ; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( align 4 [[TMP9]], splat (i1 true), poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 86444d3354fe..681cb31bc4ed 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -217,7 +217,7 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[GEP]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll index 7df89f725197..32c558214c82 100644 --- a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll @@ -90,7 +90,7 @@ define i64 @findlast_iv_start5_interleave(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 5, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 @@ -167,7 +167,7 @@ define i64 @findlast_iv_step2_interleave(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP38:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP39:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 9b7dde465044..72ce9e0055ff 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -880,7 +880,7 @@ define i32 @PR27246() { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i32> [[VEC_IND]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i32> [[STEP_ADD]], splat (i32 -4) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 9a3dbb4145f3..e7f57a1c9b14 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -108,13 +108,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL: middle.block: @@ -153,7 +153,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 @@ -197,13 +197,13 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; INTERLEAVE: middle.block: @@ -4545,7 +4545,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[TMP1]] @@ -4553,7 +4553,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; UNROLL: middle.block: @@ -4589,7 +4589,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 @@ -4630,7 +4630,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[TMP1]] @@ -4638,7 +4638,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] ; INTERLEAVE: middle.block: @@ -5409,7 +5409,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> ; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> @@ -5435,7 +5435,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] ; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4) ; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4) ; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 @@ -5484,7 +5484,7 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND5:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> @@ -5560,7 +5560,7 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD7]], <4 x i32> @@ -5586,7 +5586,7 @@ define i64 @trunc_with_first_order_recurrence() { ; INTERLEAVE-NEXT: [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]] ; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8) ; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 @@ -6041,7 +6041,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[TMP16]] +; UNROLL-NEXT: [[STEP_ADD]] = add nsw <2 x i32> [[VEC_IND]], [[TMP16]] ; UNROLL-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8], ptr [[PTR:%.*]], i64 [[INDEX]] @@ -6117,7 +6117,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[TMP19]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[TMP17]] +; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add nsw <2 x i32> [[VEC_IND]], [[TMP17]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] @@ -6190,7 +6190,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], [[TMP16]] +; INTERLEAVE-NEXT: [[STEP_ADD]] = add nsw <4 x i32> [[VEC_IND]], [[TMP16]] ; INTERLEAVE-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8], ptr [[PTR:%.*]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 49ca79bfa2b0..381dc657fc15 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -46,9 +46,9 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] -; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 @@ -727,9 +727,9 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] -; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3 @@ -755,7 +755,7 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808 -; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]]) ; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]]) @@ -984,7 +984,7 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -16 -; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP27]], <4 x i64> [[TMP28]]) ; IC4VF4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP29]]) @@ -1006,7 +1006,7 @@ define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr % ; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]] ; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] ; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; IC4VF4: [[EXIT]]: ; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ] ; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] @@ -1196,9 +1196,9 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[VECTOR_BODY]] ] ; IC4VF4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[VECTOR_BODY]] ] -; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4) -; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 -4) +; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -1) ; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; IC4VF4-NEXT: [[TMP17:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 -1) @@ -1245,7 +1245,7 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 -4) ; IC4VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IC4VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IC4VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IC4VF4: [[MIDDLE_BLOCK]]: ; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP34]], <4 x i64> [[TMP35]]) ; IC4VF4-NEXT: [[RDX_MINMAX23:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP36]]) @@ -1274,7 +1274,7 @@ define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 ; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]] ; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]] ; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; IC4VF4: [[EXIT]]: ; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; IC4VF4-NEXT: ret i64 [[COND_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll index 7effcb278ceb..ba53cc5bac16 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll @@ -92,9 +92,9 @@ define i64 @select_non_const_iv_start_signed_guard(ptr %a, i64 %rdx_start, i64 % ; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 206eb2c34892..4eb6b346c442 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -1477,9 +1477,9 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 @@ -1702,9 +1702,9 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 8 @@ -2142,9 +2142,9 @@ define i64 @not_vectorized_select_icmp_iv_out_of_bound(ptr %a, ptr %b, i64 %rdx. ; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 8 @@ -2411,9 +2411,9 @@ define i64 @not_vectorized_select_icmp_non_const_iv_start_value(ptr %a, ptr %b, ; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND1]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IVSTART]], [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4