Ramkumar Ramachandra bb2791609d
[LAA] Tweak debug output for UTC stability (#140764)
UpdateTestChecks has a make_analyzer_generalizer to replace pointer
addressess from the debug output of LAA with a pattern, which is an
acceptable solution when there is one RUN line. However, when there are
multiple RUN lines with a common pattern, UTC fails to recognize common
output due to mismatched pointer addresses. Instead of hacking UTC scrub
the output before comparing the outputs from the different RUN lines,
fix the issue once and for all by making LAA not output unstable pointer
addresses in the first place.

The removal of the now-dead make_analyzer_generalizer is left as a
non-trivial exercise for a follow-up.
2025-05-21 12:01:49 +01:00

658 lines
28 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -disable-output -passes='print<access-info>' %s 2>&1 | FileCheck %s
;
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; A forwarding in the presence of symbolic strides.
define void @single_stride(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop>
; CHECK-NEXT: --> {%A,+,4}<%loop>
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
%load = load i32, ptr %gep.A, align 4
%gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
%load_1 = load i32, ptr %gep.B, align 4
%add = add i32 %load_1, %load
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next
store i32 %add, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
; A forwarding in the presence of symbolic strides,
; with nusw instead of inbounds on the GEPs.
define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_nusw'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop>
; CHECK-NEXT: --> {%A,+,4}<%loop>
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr nusw i32, ptr %A, i64 %mul
%load = load i32, ptr %gep.A, align 4
%gep.B = getelementptr nusw i32, ptr %B, i64 %iv
%load_1 = load i32, ptr %gep.B, align 4
%add = add i32 %load_1, %load
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr nusw i32, ptr %A, i64 %iv.next
store i32 %add, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
; Similar to @single_stride, but with struct types.
define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_struct'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load { i32, i8 }, ptr %gep.A, align 4 ->
; CHECK-NEXT: store { i32, i8 } %ins, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul:
; CHECK-NEXT: {%A,+,(8 * %stride)}<%loop>
; CHECK-NEXT: --> {%A,+,8}<%loop>
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul
%load = load { i32, i8 }, ptr %gep.A, align 4
%gep.B = getelementptr inbounds { i32, i8 }, ptr %B, i64 %iv
%load_1 = load { i32, i8 }, ptr %gep.B, align 4
%v1 = extractvalue { i32, i8 } %load, 0
%v2 = extractvalue { i32, i8} %load_1, 0
%add = add i32 %v1, %v2
%ins = insertvalue { i32, i8 } undef, i32 %add, 0
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr inbounds { i32, i8 }, ptr %A, i64 %iv.next
store { i32, i8 } %ins, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
; Test with multiple GEP indices
define void @single_stride_array(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_array'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load [2 x i32], ptr %gep.A, align 4 ->
; CHECK-NEXT: store [2 x i32] %ins, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds [2 x i32], ptr %A, i64 %mul, i64 1:
; CHECK-NEXT: {(4 + %A),+,(8 * %stride)}<%loop>
; CHECK-NEXT: --> {(4 + %A),+,8}<%loop>
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr inbounds [2 x i32], ptr %A, i64 %mul, i64 1
%load = load [2 x i32], ptr %gep.A, align 4
%gep.B = getelementptr inbounds [2 x i32], ptr %B, i64 %iv
%load_1 = load [2 x i32], ptr %gep.B, align 4
%v1 = extractvalue [2 x i32] %load, 0
%v2 = extractvalue [2 x i32] %load_1, 0
%add = add i32 %v1, %v2
%ins = insertvalue [2 x i32] poison, i32 %add, 0
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr inbounds [2 x i32], ptr %A, i64 %iv.next
store [2 x i32] %ins, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @single_stride_castexpr(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
; CHECK-LABEL: 'single_stride_castexpr'
; CHECK-NEXT: inner.loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + %dst))
; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %src High: (804 + %src))
; CHECK-NEXT: Member: {%src,+,4}<nuw><%inner.loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %offset == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%offset.ext = sext i32 %offset to i64
br label %outer.header
outer.header:
%iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
br i1 %cond, label %inner.loop, label %exit
inner.loop:
%iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
%iv.3 = phi i32 [ 0, %outer.header ], [ %iv.3.next, %inner.loop ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv.3
%load = load i32, ptr %gep.src, align 8
%gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
store i32 %load, ptr %gep.dst, align 8
%iv.2.next = add i64 %iv.2, %offset.ext
%iv.3.next = add i32 %iv.3, 1
%ec = icmp eq i32 %iv.3, 200
br i1 %ec, label %outer.header, label %inner.loop
exit:
ret void
}
define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1 %cond) {
; CHECK-LABEL: 'single_stride_castexpr_multiuse'
; CHECK-NEXT: inner.loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %iv.1) + %dst) High: (804 + (4 * %iv.1) + (-4 * (zext i32 %offset to i64))<nsw> + %dst))
; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (4 + %src) High: (808 + (-4 * (zext i32 %offset to i64))<nsw> + %src))
; CHECK-NEXT: Member: {(4 + %src),+,4}<%inner.loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %offset == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3:
; CHECK-NEXT: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
; CHECK-NEXT: --> {(4 + %src),+,4}<%inner.loop>
; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: outer.header:
; CHECK-NEXT: Report: loop is not the innermost loop
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
%offset.ext = sext i32 %offset to i64
%offset.zext = zext i32 %offset to i64
br label %outer.header
outer.header:
%iv.1 = phi i64 [ 0, %entry ], [ %iv.2.next, %inner.loop ]
br i1 %cond, label %inner.loop, label %exit
inner.loop:
%iv.2 = phi i64 [ %iv.1, %outer.header ], [ %iv.2.next, %inner.loop ]
%iv.3 = phi i64 [ %offset.zext, %outer.header ], [ %iv.3.next, %inner.loop ]
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3
%load = load i32, ptr %gep.src, align 8
%gep.dst = getelementptr i32, ptr %dst, i64 %iv.2
store i32 %load, ptr %gep.dst, align 8
%iv.2.next = add i64 %iv.2, %offset.ext
%iv.3.next = add i64 %iv.3, 1
%ec = icmp eq i64 %iv.3, 200
br i1 %ec, label %outer.header, label %inner.loop
exit:
ret void
}
define double @single_iteration_unknown_stride(i32 %x, ptr %y, i1 %cond) {
; CHECK-LABEL: 'single_iteration_unknown_stride'
; CHECK-NEXT: loop.body:
; CHECK-NEXT: Memory dependences are safe
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %x == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul:
; CHECK-NEXT: {(8 + %y),+,(8 * (sext i32 %x to i64))<nsw>}<%loop.body>
; CHECK-NEXT: --> {(8 + %y),+,8}<%loop.body>
;
entry:
br i1 %cond, label %noloop.exit, label %loop.ph
loop.ph: ; preds = %entry
%sext7 = sext i32 %x to i64
%gep8 = getelementptr i8, ptr %y, i64 8
br label %loop.body
loop.body: ; preds = %loop.body, %loop.ph
%iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop.body ]
%mul = mul i64 %iv, %sext7
%gep10 = getelementptr double, ptr %gep8, i64 %mul
%load11 = load double, ptr %gep10, align 8
store double %load11, ptr %y, align 8
%iv.next = add i64 %iv, 1
%icmp = icmp eq i64 %iv, 0
br i1 %icmp, label %loop.exit, label %loop.body
noloop.exit: ; preds = %entry
%sext = sext i32 %x to i64
%gep = getelementptr double, ptr %y, i64 %sext
%load5 = load double, ptr %gep, align 8
ret double %load5
loop.exit: ; preds = %loop.body
%sext2 = sext i32 %x to i64
%gep2 = getelementptr double, ptr %y, i64 %sext2
%load6 = load double, ptr %gep2, align 8
ret double %load6
}
; A loop with two symbolic strides.
define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1, i64 %stride.2) {
; CHECK-LABEL: 'two_strides'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Backward loop carried data dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: Equal predicate: %stride.2 == 1
; CHECK-NEXT: Equal predicate: %stride.1 == 1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
; CHECK-NEXT: {%A,+,(4 * %stride.1)}<%loop>
; CHECK-NEXT: --> {%A,+,4}<%loop>
; CHECK-NEXT: [PSE] %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2:
; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop>
; CHECK-NEXT: --> {(4 + %A),+,4}<%loop>
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride.1
%gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
%load = load i32, ptr %gep.A, align 4
%gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
%load_1 = load i32, ptr %gep.B, align 4
%add = add i32 %load_1, %load
%iv.next = add nuw nsw i64 %iv, 1
%mul.2 = mul i64 %iv.next, %stride.2
%gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2
store i32 %add, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride) {
; CHECK-LABEL: 'single_stride_used_for_trip_count'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i64 %iv, %stride
%gep.A = getelementptr inbounds i32, ptr %A, i64 %mul
%load = load i32, ptr %gep.A, align 4
%gep.B = getelementptr inbounds i32, ptr %B, i64 %iv
%load_1 = load i32, ptr %gep.B, align 4
%add = add i32 %load_1, %load
%iv.next = add nuw nsw i64 %iv, 1
%gep.A.next = getelementptr inbounds i32, ptr %A, i64 %iv.next
store i32 %add, ptr %gep.A.next, align 4
%exitcond = icmp eq i64 %iv.next, %stride
br i1 %exitcond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
; Check the scenario where we have an unknown Stride, which happens to also be
; the loop iteration count. If we speculate Stride==1, it implies that the loop
; will iterate no more than a single iteration.
define void @unknown_stride_equalto_tc(i32 %N, ptr %A, ptr %B, i32 %j) {
; CHECK-LABEL: 'unknown_stride_equalto_tc'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + %N) to i64) * (sext i32 %N to i64)) + %B))))
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%j,+,%N}<%loop> Added Flags: <nssw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
; CHECK-NEXT: ((2 * (sext i32 {%j,+,%N}<%loop> to i64))<nsw> + %B)
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 %N to i64))<nsw>}<%loop>
;
entry:
%cmp = icmp eq i32 %N, 0
br i1 %cmp, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul i32 %iv, %N
%add = add i32 %mul, %j
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
%load = load i16, ptr %arrayidx
%sext = sext i16 %load to i32
store i32 %sext, ptr %A
%iv.next = add nuw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, %N
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
; Check the scenario where we have an unknown Stride, which happens to also be
; the loop iteration count, but the TC is zero-extended from a narrower type.
define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 %j) {
; CHECK-LABEL: 'unknown_stride_equalto_zext_tc'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (zext i16 %N to i32))<nsw> to i64) * (zext i16 %N to i64)) + %B))))
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%j,+,(zext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (zext i16 %N to i64))<nuw><nsw>}<%loop>
;
entry:
%N.ext = zext i16 %N to i32
%cmp = icmp eq i16 %N, 0
br i1 %cmp, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul nuw i32 %iv, %N.ext
%add = add i32 %mul, %j
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
%load = load i16, ptr %arrayidx
%sext = sext i16 %load to i32
store i32 %sext, ptr %A
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, %N.ext
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
; Check the scenario where we have an unknown Stride, which happens to also be
; the loop iteration count, but the TC is sign-extended from a narrower type.
define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) {
; CHECK-LABEL: 'unknown_stride_equalto_sext_tc'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (sext i16 %N to i32))<nsw> to i64) * (sext i16 %N to i64)) + %B))))
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%j,+,(sext i16 %N to i32)}<nw><%loop> Added Flags: <nssw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i16 %N to i64))<nsw>}<%loop>
;
entry:
%N.ext = sext i16 %N to i32
%cmp = icmp eq i16 %N, 0
br i1 %cmp, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul nuw i32 %iv, %N.ext
%add = add i32 %mul, %j
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
%load = load i16, ptr %arrayidx
%sext = sext i16 %load to i32
store i32 %sext, ptr %A
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, %N.ext
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}
; Check the scenario where we have an unknown Stride, which happens to also be
; the loop iteration count, but the TC is truncated from a wider type.
define void @unknown_stride_equalto_trunc_tc(i64 %N, ptr %A, ptr %B, i32 %j) {
; CHECK-LABEL: 'unknown_stride_equalto_trunc_tc'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64))<nsw> + %B) umin ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64))<nsw> + %B) umax ((2 * (sext i32 %j to i64))<nsw> + (2 * (zext i32 (-1 + (trunc i64 %N to i32)) to i64) * (sext i32 (trunc i64 %N to i32) to i64)) + %B))))
; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%j,+,(trunc i64 %N to i32)}<nw><%loop> Added Flags: <nssw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add:
; CHECK-NEXT: ((2 * (sext i32 {%j,+,(trunc i64 %N to i32)}<nw><%loop> to i64))<nsw> + %B)
; CHECK-NEXT: --> {((2 * (sext i32 %j to i64))<nsw> + %B),+,(2 * (sext i32 (trunc i64 %N to i32) to i64))<nsw>}<%loop>
;
entry:
%N.trunc = trunc i64 %N to i32
%cmp = icmp eq i64 %N, 0
br i1 %cmp, label %exit, label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%mul = mul nuw i32 %iv, %N.trunc
%add = add i32 %mul, %j
%arrayidx = getelementptr inbounds i16, ptr %B, i32 %add
%load = load i16, ptr %arrayidx
%sext = sext i16 %load to i32
store i32 %sext, ptr %A
%iv.next = add nuw nsw i32 %iv, 1
%exitcond = icmp eq i32 %iv.next, %N.trunc
br i1 %exitcond, label %exit, label %loop
exit:
ret void
}