There are some accesses we cannot easily catch with the existing SSA based tracking of uniform stores. Extend the dependency checker to reject accesses the same invariant address for cases SCEV can prove (distance is zero). For those cases, we would not generate runtime checks for the problematic pair, as they are part of the same group. Note that this adds a new InvariantUnsafe kind, similar to IndirectUnsafe, although maybe it would be sufficient to just have a single Unsafe kind, with slight loss of precision. Fixes https://github.com/llvm/llvm-project/issues/186922. PR: https://github.com/llvm/llvm-project/pull/187023
350 lines
13 KiB
LLVM
350 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
|
|
|
|
; Store and load to same invariant address through a phi.
|
|
define void @conditional_store_load_same_invariant_via_phi(ptr %p0, ptr %p1, ptr %p2, i64 %n, i1 %c) {
|
|
; CHECK-LABEL: 'conditional_store_load_same_invariant_via_phi'
|
|
; CHECK-NEXT: loop:
|
|
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
|
; CHECK-NEXT: Unknown data dependence.
|
|
; CHECK-NEXT: Dependences:
|
|
; CHECK-NEXT: Unknown:
|
|
; CHECK-NEXT: %x = load i32, ptr %gep0, align 4 ->
|
|
; CHECK-NEXT: store i32 %y, ptr %gep1, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: InvariantUnsafe:
|
|
; CHECK-NEXT: store i32 %x, ptr %p2, align 4 ->
|
|
; CHECK-NEXT: %y = load i32, ptr %phi, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Run-time memory checks:
|
|
; CHECK-NEXT: Check 0:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: ptr %p2
|
|
; CHECK-NEXT: ptr %p2
|
|
; CHECK-NEXT: Against group GRP1:
|
|
; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Check 1:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: ptr %p2
|
|
; CHECK-NEXT: ptr %p2
|
|
; CHECK-NEXT: Against group GRP2:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
; CHECK-NEXT: Grouped accesses:
|
|
; CHECK-NEXT: Group GRP0:
|
|
; CHECK-NEXT: (Low: %p2 High: (4 + %p2))
|
|
; CHECK-NEXT: Member: %p2
|
|
; CHECK-NEXT: Member: %p2
|
|
; CHECK-NEXT: Group GRP1:
|
|
; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0))
|
|
; CHECK-NEXT: Member: {%p0,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP2:
|
|
; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip))
|
|
; CHECK-NEXT: Member: {%phip,+,4}<%loop>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop.
|
|
; CHECK-NEXT: SCEV assumptions:
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Expressions re-written:
|
|
;
|
|
entry:
|
|
%phip = select i1 %c, ptr %p1, ptr %p0
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %latch ], [ 0, %entry ]
|
|
%gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
%x = load i32, ptr %gep0, align 4
|
|
%c2 = icmp eq i32 %x, 0
|
|
br i1 %c2, label %latch, label %if
|
|
|
|
if:
|
|
store i32 %x, ptr %p2, align 4
|
|
br label %latch
|
|
|
|
latch:
|
|
%phi = phi ptr [ %p2, %if ], [ %p2, %loop ]
|
|
%y = load i32, ptr %phi, align 4
|
|
%gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
store i32 %y, ptr %gep1, align 4
|
|
%iv.next = add nuw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %n
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Same invariant address via two distinct GEPs.
|
|
define void @store_load_same_invariant_via_different_geps(ptr %p0, ptr %p1, ptr %base, i64 %n, i1 %c) {
|
|
; CHECK-LABEL: 'store_load_same_invariant_via_different_geps'
|
|
; CHECK-NEXT: loop:
|
|
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
|
; CHECK-NEXT: Unknown data dependence.
|
|
; CHECK-NEXT: Dependences:
|
|
; CHECK-NEXT: Unknown:
|
|
; CHECK-NEXT: %x = load i32, ptr %gep0, align 4 ->
|
|
; CHECK-NEXT: store i32 %y, ptr %gep1, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: InvariantUnsafe:
|
|
; CHECK-NEXT: store i32 %x, ptr %gep.st, align 4 ->
|
|
; CHECK-NEXT: %y = load i32, ptr %gep.ld, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Run-time memory checks:
|
|
; CHECK-NEXT: Check 0:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1
|
|
; CHECK-NEXT: %gep.st = getelementptr i32, ptr %base, i64 1
|
|
; CHECK-NEXT: Against group GRP1:
|
|
; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Check 1:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1
|
|
; CHECK-NEXT: %gep.st = getelementptr i32, ptr %base, i64 1
|
|
; CHECK-NEXT: Against group GRP2:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
; CHECK-NEXT: Grouped accesses:
|
|
; CHECK-NEXT: Group GRP0:
|
|
; CHECK-NEXT: (Low: (4 + %base) High: (8 + %base))
|
|
; CHECK-NEXT: Member: (4 + %base)
|
|
; CHECK-NEXT: Member: (4 + %base)
|
|
; CHECK-NEXT: Group GRP1:
|
|
; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0))
|
|
; CHECK-NEXT: Member: {%p0,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP2:
|
|
; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip))
|
|
; CHECK-NEXT: Member: {%phip,+,4}<%loop>
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop.
|
|
; CHECK-NEXT: SCEV assumptions:
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Expressions re-written:
|
|
;
|
|
entry:
|
|
%phip = select i1 %c, ptr %p1, ptr %p0
|
|
%gep.st = getelementptr i32, ptr %base, i64 1
|
|
%gep.ld = getelementptr i32, ptr %base, i64 1
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
%x = load i32, ptr %gep0, align 4
|
|
store i32 %x, ptr %gep.st, align 4
|
|
%y = load i32, ptr %gep.ld, align 4
|
|
%gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
store i32 %y, ptr %gep1, align 4
|
|
%iv.next = add nuw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %n
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Phi with incoming values loaded from the same address
|
|
define void @phi_with_loads_from_same_addr(ptr %p0, ptr %p1, ptr %x, i64 %n, i1 %c0) {
|
|
; CHECK-LABEL: 'phi_with_loads_from_same_addr'
|
|
; CHECK-NEXT: loop:
|
|
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
|
|
; CHECK-NEXT: Unknown data dependence.
|
|
; CHECK-NEXT: Dependences:
|
|
; CHECK-NEXT: Unknown:
|
|
; CHECK-NEXT: %v = load i32, ptr %gep0, align 4 ->
|
|
; CHECK-NEXT: store i32 %y, ptr %gep1, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: InvariantUnsafe:
|
|
; CHECK-NEXT: store i32 %v, ptr %ld1, align 4 ->
|
|
; CHECK-NEXT: %y = load i32, ptr %phi, align 4
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Run-time memory checks:
|
|
; CHECK-NEXT: Check 0:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: Against group GRP1:
|
|
; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Check 1:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: Against group GRP2:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
; CHECK-NEXT: Check 2:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: Against group GRP3:
|
|
; CHECK-NEXT: %ld2 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: Check 3:
|
|
; CHECK-NEXT: Comparing group GRP2:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
; CHECK-NEXT: Against group GRP3:
|
|
; CHECK-NEXT: %ld2 = load ptr, ptr %x, align 8
|
|
; CHECK-NEXT: Grouped accesses:
|
|
; CHECK-NEXT: Group GRP0:
|
|
; CHECK-NEXT: (Low: %ld1 High: (4 + %ld1))
|
|
; CHECK-NEXT: Member: %ld1
|
|
; CHECK-NEXT: Member: %ld1
|
|
; CHECK-NEXT: Group GRP1:
|
|
; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0))
|
|
; CHECK-NEXT: Member: {%p0,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP2:
|
|
; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip))
|
|
; CHECK-NEXT: Member: {%phip,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP3:
|
|
; CHECK-NEXT: (Low: %ld2 High: (4 + %ld2))
|
|
; CHECK-NEXT: Member: %ld2
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop.
|
|
; CHECK-NEXT: SCEV assumptions:
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Expressions re-written:
|
|
;
|
|
entry:
|
|
%phip = select i1 %c0, ptr %p1, ptr %p0
|
|
%ld1 = load ptr, ptr %x
|
|
%ld2 = load ptr, ptr %x
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %latch ], [ 0, %entry ]
|
|
%gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
%v = load i32, ptr %gep0, align 4
|
|
br i1 %c0, label %if, label %else
|
|
|
|
if:
|
|
store i32 %v, ptr %ld1, align 4
|
|
br label %latch
|
|
|
|
else:
|
|
br label %latch
|
|
|
|
latch:
|
|
%phi = phi ptr [ %ld1, %if ], [ %ld2, %else ]
|
|
%y = load i32, ptr %phi, align 4
|
|
%gep1 = getelementptr i32, ptr %phip, i64 %iv
|
|
store i32 %y, ptr %gep1, align 4
|
|
%iv.next = add nuw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %n
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; GEPs derived from different loads of the same address; runtime check can
|
|
; disambiguate.
|
|
define void @gep_from_loads_same_addr(ptr %p0, ptr %p1, ptr %x, i64 %n) {
|
|
; CHECK-LABEL: 'gep_from_loads_same_addr'
|
|
; CHECK-NEXT: loop:
|
|
; CHECK-NEXT: Memory dependences are safe with run-time checks
|
|
; CHECK-NEXT: Dependences:
|
|
; CHECK-NEXT: Run-time memory checks:
|
|
; CHECK-NEXT: Check 0:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep.st = getelementptr i32, ptr %ld1, i64 1
|
|
; CHECK-NEXT: Against group GRP1:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %p1, i64 %iv
|
|
; CHECK-NEXT: Check 1:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep.st = getelementptr i32, ptr %ld1, i64 1
|
|
; CHECK-NEXT: Against group GRP2:
|
|
; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Check 2:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep.st = getelementptr i32, ptr %ld1, i64 1
|
|
; CHECK-NEXT: Against group GRP3:
|
|
; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %ld2, i64 1
|
|
; CHECK-NEXT: Check 3:
|
|
; CHECK-NEXT: Comparing group GRP1:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %p1, i64 %iv
|
|
; CHECK-NEXT: Against group GRP2:
|
|
; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Check 4:
|
|
; CHECK-NEXT: Comparing group GRP1:
|
|
; CHECK-NEXT: %gep1 = getelementptr i32, ptr %p1, i64 %iv
|
|
; CHECK-NEXT: Against group GRP3:
|
|
; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %ld2, i64 1
|
|
; CHECK-NEXT: Grouped accesses:
|
|
; CHECK-NEXT: Group GRP0:
|
|
; CHECK-NEXT: (Low: (4 + %ld1) High: (8 + %ld1))
|
|
; CHECK-NEXT: Member: (4 + %ld1)
|
|
; CHECK-NEXT: Group GRP1:
|
|
; CHECK-NEXT: (Low: %p1 High: ((4 * %n) + %p1))
|
|
; CHECK-NEXT: Member: {%p1,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP2:
|
|
; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0))
|
|
; CHECK-NEXT: Member: {%p0,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP3:
|
|
; CHECK-NEXT: (Low: (4 + %ld2) High: (8 + %ld2))
|
|
; CHECK-NEXT: Member: (4 + %ld2)
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
|
|
; CHECK-NEXT: SCEV assumptions:
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Expressions re-written:
|
|
;
|
|
entry:
|
|
%ld1 = load ptr, ptr %x
|
|
%ld2 = load ptr, ptr %x
|
|
%gep.st = getelementptr i32, ptr %ld1, i64 1
|
|
%gep.ld = getelementptr i32, ptr %ld2, i64 1
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%gep0 = getelementptr i32, ptr %p0, i64 %iv
|
|
%v = load i32, ptr %gep0, align 4
|
|
store i32 %v, ptr %gep.st, align 4
|
|
%y = load i32, ptr %gep.ld, align 4
|
|
%gep1 = getelementptr i32, ptr %p1, i64 %iv
|
|
store i32 %y, ptr %gep1, align 4
|
|
%iv.next = add nuw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %n
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; One invariant, one strided: runtime check can disambiguate.
|
|
define void @invariant_and_strided(ptr %p0, ptr %p1, i64 %n) {
|
|
; CHECK-LABEL: 'invariant_and_strided'
|
|
; CHECK-NEXT: loop:
|
|
; CHECK-NEXT: Memory dependences are safe with run-time checks
|
|
; CHECK-NEXT: Dependences:
|
|
; CHECK-NEXT: Run-time memory checks:
|
|
; CHECK-NEXT: Check 0:
|
|
; CHECK-NEXT: Comparing group GRP0:
|
|
; CHECK-NEXT: %gep = getelementptr i32, ptr %p0, i64 %iv
|
|
; CHECK-NEXT: Against group GRP1:
|
|
; CHECK-NEXT: ptr %p1
|
|
; CHECK-NEXT: Grouped accesses:
|
|
; CHECK-NEXT: Group GRP0:
|
|
; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0))
|
|
; CHECK-NEXT: Member: {%p0,+,4}<%loop>
|
|
; CHECK-NEXT: Group GRP1:
|
|
; CHECK-NEXT: (Low: %p1 High: (4 + %p1))
|
|
; CHECK-NEXT: Member: %p1
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
|
|
; CHECK-NEXT: SCEV assumptions:
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: Expressions re-written:
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
|
|
%val = load i32, ptr %p1, align 4
|
|
%gep = getelementptr i32, ptr %p0, i64 %iv
|
|
store i32 %val, ptr %gep, align 4
|
|
%iv.next = add nuw i64 %iv, 1
|
|
%exitcond = icmp eq i64 %iv.next, %n
|
|
br i1 %exitcond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|