llvm-project/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
Philip Reames b812e57ac3
[SCEV] Consolidate code for proving wrap flags of controlling finite IVs (#101404)
The canAssumeNoSelfWrap routine in howManyLessThans was doing two subtly
inter-related things. First, it was proving no-self-wrap. This exactly
duplicates the existing logic in the caller. Second, it was establishing
the precondition for the nw->nsw/nuw inference. Specifically, we need to
know that *this* exit must be taken for the inference to be sound.
Otherwise, another (possible abnormal) exit could be taken in the
iteration where this IV would become poison.

This change moves all of that logic into the caller, and caches the
resulting nuw/nsw flags in the AddRec. This centralizes the logic in one
place, and makes it clear that it all depends on controlling the sole
exit.

We do loose a couple cases with SCEV predication. Specifically, if SCEV
predication was able to convert e.g. zext(addrec) into an addrec(zext)
using predication, but didn't record the nuw fact on the new addrec,
then the consuming code can no longer fix this up. I don't think this
case particularly matters.

---------

Co-authored-by: Nikita Popov <github@npopov.com>
2024-08-12 11:40:24 -07:00

528 lines
31 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
define void @vscale_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_slt'
; CHECK-NEXT: Classifying expressions for: @vscale_slt
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %vscale
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_ult'
; CHECK-NEXT: Classifying expressions for: @vscale_ult
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_ult
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %vscale
%cmp = icmp ult i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_ule(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_ule'
; CHECK-NEXT: Classifying expressions for: @vscale_ule
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 + vscale)<nsw> umax %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_ule
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %vscale
%cmp = icmp ule i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_ne'
; CHECK-NEXT: Classifying expressions for: @vscale_ne
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 * vscale)<nsw> + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 * vscale)<nsw> + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 * vscale)<nsw> + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_ne
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %vscale
%cmp = icmp ne i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscalex4_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscalex4_slt'
; CHECK-NEXT: Classifying expressions for: @vscalex4_slt
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_slt
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %VF
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscalex4_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscalex4_ult'
; CHECK-NEXT: Classifying expressions for: @vscalex4_ult
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add nsw i32 %i.05, %VF
%cmp = icmp ult i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_slt_with_vp_plain(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_slt_with_vp_plain'
; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_plain
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_plain
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF)
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF)
%add = add nsw i32 %i.05, %VF
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_slt_with_vp_umin'
; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %left = sub i32 %n, %i.05
; CHECK-NEXT: --> {%n,+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
; CHECK-NEXT: --> ((4 * vscale)<nuw><nsw> umin {%n,+,(-4 * vscale)<nsw>}<nw><%for.body>) U: [0,4097) S: [0,4097) Exits: (((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) umin (4 * vscale)<nuw><nsw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%left = sub i32 %n, %i.05
%VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left)
%0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
%add = add nsw i32 %i.05, %VF
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_slt_with_vp_umin2'
; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin2
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> %i.05 U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> ((4 * %i.05) + %A) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %left = sub i32 %n, %i.05
; CHECK-NEXT: --> ((-1 * %i.05)<nsw> + %n) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
; CHECK-NEXT: --> (((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) U: [0,4097) S: [0,4097) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF.capped
; CHECK-NEXT: --> ((((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) + %i.05)<nuw><nsw> U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin2
; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count.
; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count.
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%left = sub i32 %n, %i.05
%VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left)
%0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped)
%add = add nsw i32 %i.05, %VF.capped
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
; The next two cases check to see if we can infer the flags on the IV
; of a countup loop using vscale strides. vscale is a power of two
; and these are finite loops by assumption.
define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_slt_noflags'
; CHECK-NEXT: Classifying expressions for: @vscale_slt_noflags
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add i32 %i.05, %vscale
%cmp = icmp slt i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscalex4_ult_noflags'
; CHECK-NEXT: Classifying expressions for: @vscalex4_ult_noflags
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = mul i32 %vscale, 4
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags
; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = mul i32 %vscale, 4
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
%0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%add = add i32 %i.05, %VF
%cmp = icmp ult i32 %add, %n
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
; The next two cases check to see if we can infer the flags on the IV
; of a countdown loop using vscale strides.
define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscale_countdown_ne'
; CHECK-NEXT: Classifying expressions for: @vscale_countdown_ne
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %start = sub i32 %n, %vscale
; CHECK-NEXT: --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %sub = sub i32 %iv, %vscale
; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%cmp4 = icmp sgt i32 %n, 0
%start = sub i32 %n, %vscale
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
%ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%sub = sub i32 %iv, %vscale
%cmp = icmp ne i32 %sub, 0
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}
define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
; CHECK-LABEL: 'vscalex4_countdown_ne'
; CHECK-NEXT: Classifying expressions for: @vscalex4_countdown_ne
; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32()
; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025)
; CHECK-NEXT: %VF = shl i32 %vscale, 2
; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
; CHECK-NEXT: %start = sub i32 %n, %VF
; CHECK-NEXT: --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %sub = sub i32 %iv, %VF
; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne
; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870911
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)
; CHECK-NEXT: Loop %for.body: Trip multiple is 1
;
entry:
%vscale = call i32 @llvm.vscale.i32()
%VF = shl i32 %vscale, 2
%cmp4 = icmp sgt i32 %n, 0
%start = sub i32 %n, %VF
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
%ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
%inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
%sub = sub i32 %iv, %VF
%cmp = icmp ne i32 %sub, 0
br i1 %cmp, label %for.body, label %for.end
for.end: ; preds = %for.body, %entry
ret void
}