[VPlan] Use unsigned integers for lane start indices (#175231)
a83c89495ba6fe0134dcaa02372c320cc7ff0dbf caused assertion failures here as if we have a single bit induction variable and two lanes (0 and 1), then the second lane index (1) will be out of bounds of what a signed 1-bit integer can hold. Lane indices are always >0 according to VPlanHelpers.h:125, and the lane representation in this code is also unsigned. The test case come from tensorflow/XLA.
This commit is contained in:
parent
480af73916
commit
acb78bde6f
@ -2346,9 +2346,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
|
||||
|
||||
/// A helper function that returns an integer or floating-point constant with
|
||||
/// value C.
|
||||
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
|
||||
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
|
||||
: ConstantFP::get(Ty, C);
|
||||
static Constant *getUnsignedIntOrFpConstant(Type *Ty, uint64_t C) {
|
||||
return Ty->isIntegerTy() ? ConstantInt::get(Ty, C) : ConstantFP::get(Ty, C);
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
@ -2453,7 +2452,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
|
||||
|
||||
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
|
||||
Value *StartIdx = Builder.CreateBinOp(
|
||||
AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
|
||||
AddOp, StartIdx0, getUnsignedIntOrFpConstant(BaseIVTy, Lane));
|
||||
// The step returned by `createStepForVF` is a runtime-evaluated value
|
||||
// when VF is scalable. Otherwise, it should be folded into a Constant.
|
||||
assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-grtev4-linux-gnu"
|
||||
|
||||
define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) {
|
||||
; CHECK-LABEL: define void @copy_bitcast_fusion(
|
||||
; CHECK-SAME: ptr noalias [[FOO:%.*]], ptr noalias [[BAR:%.*]]) {
|
||||
; CHECK-NEXT: [[HEADER:.*:]]
|
||||
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = select i1 false, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1
|
||||
; CHECK-NEXT: store <2 x float> [[TMP7]], ptr [[BAR]], align 4
|
||||
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
header:
|
||||
br label %body
|
||||
|
||||
body:
|
||||
%iv = phi i64 [ 0, %header ], [ %iv.next, %body ]
|
||||
%iv.trunc = trunc i64 %iv to i1
|
||||
%iv.trunc2 = select i1 %iv.trunc, i64 1, i64 0
|
||||
%load.addr = getelementptr float, ptr %foo, i64 %iv.trunc2
|
||||
%l1 = load float, ptr %load.addr, align 4
|
||||
%store.addr = getelementptr float, ptr %bar, i64 %iv
|
||||
store float %l1, ptr %store.addr, align 4
|
||||
%iv.next = add i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv, 1
|
||||
br i1 %exitcond.not, label %exit, label %body
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user