[VPlan] Use unsigned integers for lane start indices (#175231)

a83c89495ba6fe0134dcaa02372c320cc7ff0dbf caused assertion failures here as if we have a single bit induction variable and two lanes (0 and 1), then the second lane index (1) will be out of bounds of what a signed 1-bit integer can hold. Lane indices are always >0 according to VPlanHelpers.h:125, and the lane representation in this code is also unsigned. The test case come from tensorflow/XLA.
2026-01-09 14:28:28 -08:00 · 2026-01-09 14:28:28 -08:00 · acb78bde6f
commit acb78bde6f
parent 480af73916
2 changed files with 51 additions and 4 deletions
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@ -2346,9 +2346,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,

 /// A helper function that returns an integer or floating-point constant with
 /// value C.
-static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
-  return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
-                           : ConstantFP::get(Ty, C);
+static Constant *getUnsignedIntOrFpConstant(Type *Ty, uint64_t C) {
+  return Ty->isIntegerTy() ? ConstantInt::get(Ty, C) : ConstantFP::get(Ty, C);
 }

 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -2453,7 +2452,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

  for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
    Value *StartIdx = Builder.CreateBinOp(
-        AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
+        AddOp, StartIdx0, getUnsignedIntOrFpConstant(BaseIVTy, Lane));
    // The step returned by `createStepForVF` is a runtime-evaluated value
    // when VF is scalable. Otherwise, it should be folded into a Constant.
    assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
--- a/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll
@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) {
+; CHECK-LABEL: define void @copy_bitcast_fusion(
+; CHECK-SAME: ptr noalias [[FOO:%.*]], ptr noalias [[BAR:%.*]]) {
+; CHECK-NEXT:  [[HEADER:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 false, i64 1, i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 true, i64 1, i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1
+; CHECK-NEXT:    store <2 x float> [[TMP7]], ptr [[BAR]], align 4
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+header:
+  br label %body
+
+body:
+  %iv = phi i64 [ 0, %header ], [ %iv.next, %body ]
+  %iv.trunc = trunc i64 %iv to i1
+  %iv.trunc2 = select i1 %iv.trunc, i64 1, i64 0
+  %load.addr = getelementptr float, ptr %foo, i64 %iv.trunc2
+  %l1 = load float, ptr %load.addr, align 4
+  %store.addr = getelementptr float, ptr %bar, i64 %iv
+  store float %l1, ptr %store.addr, align 4
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv, 1
+  br i1 %exitcond.not, label %exit, label %body
+
+exit:
+  ret void
+}
+