Florian Hahn a434a7a4f1
Reapply "[LAA,Loads] Use loop guards and max BTC if needed when checking deref. (#155672)"
This reverts commit f0df1e3dd4ec064821f673ced7d83e5a2cf6afa1.

Recommit with extra check for SCEVCouldNotCompute. Test has been added in
b16930204b.

Original message:
Remove the fall-back to constant max BTC if the backedge-taken-count
cannot be computed.

The constant max backedge-taken count is computed considering loop
guards, so to avoid regressions we need to apply loop guards as needed.

Also remove the special handling for Mul in willNotOverflow, as this
should not longer be needed after 914374624f
(https://github.com/llvm/llvm-project/pull/155300).

PR: https://github.com/llvm/llvm-project/pull/155672
2025-09-03 12:45:28 +01:00

116 lines
3.5 KiB
LLVM

; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization --disable-output -stats -S 2>&1 | FileCheck %s
; REQUIRES: asserts
; We have 3 loops, two of them are vectorizable (with one being early-exit
; vectorized) and the third one is not.
; CHECK: 4 loop-vectorize - Number of loops analyzed for vectorization
; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
; CHECK: 3 loop-vectorize - Number of loops vectorized
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @vectorized(ptr nocapture %a, i64 %size) {
entry:
%cmp1 = icmp sle i64 %size, 0
%cmp21 = icmp sgt i64 0, %size
%or.cond = or i1 %cmp1, %cmp21
br i1 %or.cond, label %exit, label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%mul = fmul float %0, %0
store float %mul, ptr %arrayidx, align 4
%iv.next = add nuw nsw i64 %iv, 1
%cmp2 = icmp sgt i64 %iv.next, %size
br i1 %cmp2, label %exit, label %loop
exit: ; preds = %entry, %loop
ret void
}
define i32 @early_exit_vectorized(i64 %end) {
entry:
%p1 = alloca [1024 x i32]
%p2 = alloca [1024 x i32]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
%end.clamped = and i64 %end, 1023
br label %loop
loop:
%ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ]
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
%0 = load i32, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind
%1 = load i32, ptr %arrayidx2, align 4
%cmp.early = icmp eq i32 %0, %1
br i1 %cmp.early, label %found, label %for.inc
for.inc:
%ind.next = add i64 %ind, 1
%cmp = icmp ult i64 %ind.next, %end.clamped
br i1 %cmp, label %loop, label %exit
found:
ret i32 1
exit:
ret i32 0
}
define void @not_vectorized(ptr nocapture %a, i64 %size) {
entry:
%cmp1 = icmp sle i64 %size, 0
%cmp21 = icmp sgt i64 0, %size
%or.cond = or i1 %cmp1, %cmp21
br i1 %or.cond, label %exit, label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%0 = add nsw i64 %iv, -5
%arrayidx = getelementptr inbounds float, ptr %a, i64 %0
%1 = load float, ptr %arrayidx, align 4
%2 = add nsw i64 %iv, 2
%arrayidx2 = getelementptr inbounds float, ptr %a, i64 %2
%3 = load float, ptr %arrayidx2, align 4
%mul = fmul float %1, %3
%arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv
store float %mul, ptr %arrayidx4, align 4
%iv.next = add nuw nsw i64 %iv, 1
%cmp2 = icmp sgt i64 %iv.next, %size
br i1 %cmp2, label %exit, label %loop
exit:
ret void
}
define i1 @multiple_countable_exits_multiple_exit_block(ptr %A, ptr %B, i32 %N) {
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cond.0 = icmp eq i32 %iv, %N
br i1 %cond.0, label %exit.0, label %loop.latch
loop.latch:
%A.gep = getelementptr inbounds i32, ptr %A, i32 %iv
%lv = load i32, ptr %A.gep, align 4
%B.gep = getelementptr inbounds i32, ptr %B, i32 %iv
store i32 %lv, ptr %B.gep, align 4
%iv.next = add nuw i32 %iv, 1
%cond.1 = icmp ult i32 %iv.next, 1000
br i1 %cond.1, label %loop.header, label %exit.1
exit.0:
ret i1 false
exit.1:
ret i1 true
}
declare void @init_mem(ptr, i64);