[LAA] Move scalable vector check into getStrideFromAddRec()
(#154013)
This moves the check closer to the `.getFixedValue()` call and fixes #153797 (which is a regression from #126971).
This commit is contained in:
parent
18123cc91d
commit
bb3066d42b
@ -936,6 +936,12 @@ private:
|
||||
static std::optional<int64_t>
|
||||
getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
|
||||
Value *Ptr, PredicatedScalarEvolution &PSE) {
|
||||
if (isa<ScalableVectorType>(AccessTy)) {
|
||||
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
|
||||
<< "\n");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// The access function must stride over the innermost loop.
|
||||
if (Lp != AR->getLoop()) {
|
||||
LLVM_DEBUG({
|
||||
@ -1590,11 +1596,6 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
|
||||
return 0;
|
||||
|
||||
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
|
||||
if (isa<ScalableVectorType>(AccessTy)) {
|
||||
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
|
||||
<< "\n");
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
|
||||
if (Assume && !AR)
|
||||
|
@ -61,3 +61,29 @@ vector.body:
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: 'regression_test_is_no_wrap_access_scalable_typesize'
|
||||
; CHECK: LAA: Found an analyzable loop: loop
|
||||
; CHECK: LAA: Bad stride - Scalable object: <vscale x 4 x i32>
|
||||
define void @regression_test_is_no_wrap_access_scalable_typesize(ptr %ptr_a, i64 %n, ptr %ptr_b) {
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
||||
%2 = shl i64 %iv, 1
|
||||
%3 = add i64 %2, %n
|
||||
%4 = trunc i64 %iv to i32
|
||||
%5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
|
||||
%6 = getelementptr i32, ptr %ptr_a, i64 %3
|
||||
store <vscale x 4 x i32> %5, ptr %6, align 4
|
||||
%.reass3 = or i32 %4, 1
|
||||
%7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
|
||||
%8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
|
||||
%9 = getelementptr i32, ptr %ptr_b, i64 %3
|
||||
store <vscale x 4 x i32> %8, ptr %9, align 4
|
||||
%iv.next = add i64 %iv, 1
|
||||
%.not = icmp eq i64 %iv, 16
|
||||
br i1 %.not, label %end, label %loop
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user