[VPlan] Use unsigned integers for lane start indices (#175231)

a83c89495ba6fe0134dcaa02372c320cc7ff0dbf caused assertion failures here
as if we have a single bit induction variable and two lanes (0 and 1),
then the second lane index (1) will be out of bounds of what a signed
1-bit integer can hold. Lane indices are always >0 according to
VPlanHelpers.h:125, and the lane representation in this code is also
unsigned.

The test case come from tensorflow/XLA.
This commit is contained in:
Aiden Grossman 2026-01-09 14:28:28 -08:00 committed by GitHub
parent 480af73916
commit acb78bde6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 51 additions and 4 deletions

View File

@ -2346,9 +2346,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
/// A helper function that returns an integer or floating-point constant with
/// value C.
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
: ConstantFP::get(Ty, C);
static Constant *getUnsignedIntOrFpConstant(Type *Ty, uint64_t C) {
return Ty->isIntegerTy() ? ConstantInt::get(Ty, C) : ConstantFP::get(Ty, C);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -2453,7 +2452,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
AddOp, StartIdx0, getUnsignedIntOrFpConstant(BaseIVTy, Lane));
// The step returned by `createStepForVF` is a runtime-evaluated value
// when VF is scalable. Otherwise, it should be folded into a Constant.
assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) {
; CHECK-LABEL: define void @copy_bitcast_fusion(
; CHECK-SAME: ptr noalias [[FOO:%.*]], ptr noalias [[BAR:%.*]]) {
; CHECK-NEXT: [[HEADER:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP0:%.*]] = select i1 false, i64 1, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, i64 1, i64 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1
; CHECK-NEXT: store <2 x float> [[TMP7]], ptr [[BAR]], align 4
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
header:
br label %body
body:
%iv = phi i64 [ 0, %header ], [ %iv.next, %body ]
%iv.trunc = trunc i64 %iv to i1
%iv.trunc2 = select i1 %iv.trunc, i64 1, i64 0
%load.addr = getelementptr float, ptr %foo, i64 %iv.trunc2
%l1 = load float, ptr %load.addr, align 4
%store.addr = getelementptr float, ptr %bar, i64 %iv
store float %l1, ptr %store.addr, align 4
%iv.next = add i64 %iv, 1
%exitcond.not = icmp eq i64 %iv, 1
br i1 %exitcond.not, label %exit, label %body
exit:
ret void
}