[LAA] Move scalable vector check into getStrideFromAddRec() (#154013)

This moves the check closer to the `.getFixedValue()` call and fixes
#153797 (which is a regression from #126971).
This commit is contained in:
Benjamin Maxwell 2025-08-19 06:40:07 +01:00 committed by GitHub
parent 18123cc91d
commit bb3066d42b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 5 deletions

View File

@ -936,6 +936,12 @@ private:
static std::optional<int64_t>
getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
Value *Ptr, PredicatedScalarEvolution &PSE) {
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return std::nullopt;
}
// The access function must stride over the innermost loop.
if (Lp != AR->getLoop()) {
LLVM_DEBUG({
@ -1590,11 +1596,6 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
return 0;
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return std::nullopt;
}
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)

View File

@ -61,3 +61,29 @@ vector.body:
end:
ret void
}
; CHECK-LABEL: 'regression_test_is_no_wrap_access_scalable_typesize'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Bad stride - Scalable object: <vscale x 4 x i32>
define void @regression_test_is_no_wrap_access_scalable_typesize(ptr %ptr_a, i64 %n, ptr %ptr_b) {
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%2 = shl i64 %iv, 1
%3 = add i64 %2, %n
%4 = trunc i64 %iv to i32
%5 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %4, i64 0
%6 = getelementptr i32, ptr %ptr_a, i64 %3
store <vscale x 4 x i32> %5, ptr %6, align 4
%.reass3 = or i32 %4, 1
%7 = insertelement <vscale x 4 x i32> zeroinitializer, i32 %.reass3, i64 0
%8 = shufflevector <vscale x 4 x i32> %7, <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> zeroinitializer
%9 = getelementptr i32, ptr %ptr_b, i64 %3
store <vscale x 4 x i32> %8, ptr %9, align 4
%iv.next = add i64 %iv, 1
%.not = icmp eq i64 %iv, 16
br i1 %.not, label %end, label %loop
end:
ret void
}