Florian Hahn 5a4586f468
Reapply "[LAA] Remove loop-invariant check added in 234cc40adc61."
This reverts commit d43a80936d437d217d5a6dbbaa5fb131c27e7085.

With the correctness issue blocking the recommit finally fixed
(5d01697ec6cb), again unconditionally check if accesses are completely
before or after each other.
2025-07-14 21:21:22 +01:00

153 lines
5.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes="print<access-info>" %s 2>&1 | FileCheck %s
@a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16
; Generated from the following C code:
; #define LEN 256 * 256
; float a[LEN];
;
; void different_strides() {
; for (int i = 0; i < LEN - 1024 - 255; i++) {
; #pragma clang loop interleave(disable)
; #pragma clang loop unroll(disable)
; for (int j = 0; j < 256; j++)
; a[i + j + 1024] += a[j * 4 + i];
; }
; }
; The load and store have different strides(4 and 16 bytes respectively) but the store
; is always at safe positive distance away from the load, thus BackwardVectorizable
define void @different_strides_backward_vectorizable() {
; CHECK-LABEL: 'different_strides_backward_vectorizable'
; CHECK-NEXT: inner.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
br label %outer.header
outer.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
%0 = add nuw nsw i64 %i, 1024
br label %inner.body
inner.body:
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
%1 = shl nuw nsw i64 %j, 2
%2 = add nuw nsw i64 %1, %i
%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
%3 = load float, ptr %arrayidx, align 4
%4 = add nuw nsw i64 %0, %j
%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
%5 = load float, ptr %arrayidx8, align 4
%add9 = fadd fast float %5, %3
store float %add9, ptr %arrayidx8, align 4
%j.next = add nuw nsw i64 %j, 1
%exitcond.not = icmp eq i64 %j.next, 256
br i1 %exitcond.not, label %outer.latch, label %inner.body
outer.latch:
%i.next = add nuw nsw i64 %i, 1
%outerexitcond.not = icmp eq i64 %i.next, 64257
br i1 %outerexitcond.not, label %exit, label %outer.header
exit:
ret void
}
; Generated from following C code:
; void different_stride_and_not_vectorizable(){
; for(int i = 0; i < LEN2; i++){
; for(int j = 0 ; j < LEN; j++){
; a[i + j + LEN] += a[i + 4*j];
; }
; }
; }
; The load and store have different strides, but the store and load are not at a
; safe distance away from each other, thus not safe for vectorization.
define void @different_stride_and_not_vectorizable() {
; CHECK-LABEL: 'different_stride_and_not_vectorizable'
; CHECK-NEXT: inner.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Forward:
; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
br label %outer.header
outer.header:
%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ]
%0 = add nuw nsw i64 %i, 256
br label %inner.body
inner.body:
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ]
%1 = shl nuw nsw i64 %j, 2
%2 = add nuw nsw i64 %1, %i
%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2
%3 = load float, ptr %arrayidx, align 4
%4 = add nuw nsw i64 %0, %j
%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4
%5 = load float, ptr %arrayidx8, align 4
%add9 = fadd fast float %5, %3
store float %add9, ptr %arrayidx8, align 4
%j.next = add nuw nsw i64 %j, 1
%exitcond.not = icmp eq i64 %j.next, 256
br i1 %exitcond.not, label %outer.latch, label %inner.body
outer.latch:
%i.next = add nuw nsw i64 %i, 1
%exitcond29.not = icmp eq i64 %i.next, 65536
br i1 %exitcond29.not, label %exit, label %outer.header
exit:
ret void
}