diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 525a11987a00..6c9097c92a86 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -121,6 +121,11 @@ public: // the loop, like A[B[i]]. We cannot determine direction or distance in // those cases, and also are unable to generate any runtime checks. IndirectUnsafe, + // Both accesses to the same loop-invariant address and at least one is a + // write. Vectorization is unsafe because different vector lanes would + // read/write the same memory location, and the ordering of accesses + // across lanes matters. + InvariantUnsafe, // Lexically forward. // diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 5f4f305506d4..20f0cd404ab9 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1838,6 +1838,7 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { case Backward: case BackwardVectorizableButPreventsForwarding: case IndirectUnsafe: + case InvariantUnsafe: return VectorizationSafetyStatus::Unsafe; } llvm_unreachable("unexpected DepType!"); @@ -1850,6 +1851,7 @@ bool MemoryDepChecker::Dependence::isBackward() const { case ForwardButPreventsForwarding: case Unknown: case IndirectUnsafe: + case InvariantUnsafe: return false; case BackwardVectorizable: @@ -1861,7 +1863,8 @@ bool MemoryDepChecker::Dependence::isBackward() const { } bool MemoryDepChecker::Dependence::isPossiblyBackward() const { - return isBackward() || Type == Unknown || Type == IndirectUnsafe; + return isBackward() || Type == Unknown || Type == IndirectUnsafe || + Type == InvariantUnsafe; } bool MemoryDepChecker::Dependence::isForward() const { @@ -1876,6 +1879,7 @@ bool MemoryDepChecker::Dependence::isForward() const { case Backward: case BackwardVectorizableButPreventsForwarding: case IndirectUnsafe: + case InvariantUnsafe: return false; } llvm_unreachable("unexpected DepType!"); @@ -2132,9 +2136,15 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt << " Sink induction step: " << StrideBPtrInt << "\n"); // At least Src or Sink are loop invariant and the other is strided or - // invariant. We can generate a runtime check to disambiguate the accesses. - if (!StrideAPtrInt || !StrideBPtrInt) + // invariant. + if (!StrideAPtrInt || !StrideBPtrInt) { + // If both are loop-invariant and access the same location, we cannot + // vectorize. + if (!StrideAPtrInt && !StrideBPtrInt && Dist->isZero()) + return MemoryDepChecker::Dependence::InvariantUnsafe; + // Otherwise, we can generate a runtime check to disambiguate the accesses. return MemoryDepChecker::Dependence::Unknown; + } // Both Src and Sink have a constant stride, check if they are in the same // direction. @@ -2481,6 +2491,7 @@ const char *MemoryDepChecker::Dependence::DepName[] = { "NoDep", "Unknown", "IndirectUnsafe", + "InvariantUnsafe", "Forward", "ForwardButPreventsForwarding", "Backward", @@ -2835,6 +2846,25 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, } } + // Update the invariant address dependence flags based on dependences found + // by the dep checker. Even if dependences were not recorded (too many to + // track), any InvariantUnsafe dep would still have set the status to Unsafe + if (const auto *Deps = DepChecker->getDependences()) { + for (const auto &Dep : *Deps) { + if (Dep.Type != MemoryDepChecker::Dependence::InvariantUnsafe) + continue; + Instruction *Src = Dep.getSource(*DepChecker); + Instruction *Dst = Dep.getDestination(*DepChecker); + if (isa(Src) != isa(Dst)) { + HasLoadStoreDependenceInvolvingLoopInvariantAddress = true; + } else { + assert(isa(Src) && isa(Dst) && + "Expected both to be stores"); + HasStoreStoreDependenceInvolvingLoopInvariantAddress = true; + } + } + } + if (HasConvergentOp) { recordAnalysis("CantInsertRuntimeCheckWithConvergent") << "cannot add control dependency to convergent operation"; @@ -2909,6 +2939,9 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() { case MemoryDepChecker::Dependence::IndirectUnsafe: R << "\nUnsafe indirect dependence."; break; + case MemoryDepChecker::Dependence::InvariantUnsafe: + R << "\nUnsafe dependence on loop-invariant address."; + break; case MemoryDepChecker::Dependence::Unknown: R << "\nUnknown data dependence."; break; diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 82c55011df1c..f359e354c87c 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -199,7 +199,8 @@ public: Instruction *Destination = Dep.getDestination(DepChecker); if (Dep.Type == MemoryDepChecker::Dependence::Unknown || - Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe) { + Dep.Type == MemoryDepChecker::Dependence::IndirectUnsafe || + Dep.Type == MemoryDepChecker::Dependence::InvariantUnsafe) { if (isa(Source)) LoadsWithUnknownDependence.insert(Source); if (isa(Destination)) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/invariant-dep-same-ptr.ll b/llvm/test/Analysis/LoopAccessAnalysis/invariant-dep-same-ptr.ll index 5aeff497466f..777a247dd90d 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/invariant-dep-same-ptr.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/invariant-dep-same-ptr.ll @@ -2,40 +2,46 @@ ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s ; Store and load to same invariant address through a phi. -; FIXME: Incorrectly considered safe with runtime checks. define void @conditional_store_load_same_invariant_via_phi(ptr %p0, ptr %p1, ptr %p2, i64 %n, i1 %c) { ; CHECK-LABEL: 'conditional_store_load_same_invariant_via_phi' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %x = load i32, ptr %gep0, align 4 -> +; CHECK-NEXT: store i32 %y, ptr %gep1, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: InvariantUnsafe: +; CHECK-NEXT: store i32 %x, ptr %p2, align 4 -> +; CHECK-NEXT: %y = load i32, ptr %phi, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: ptr %p2 +; CHECK-NEXT: ptr %p2 ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv +; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: ptr %p2 +; CHECK-NEXT: ptr %p2 ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv -; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group GRP1: ; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv -; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %p2 High: (4 + %p2)) ; CHECK-NEXT: Member: %p2 +; CHECK-NEXT: Member: %p2 ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) -; CHECK-NEXT: Member: {%phip,+,4}<%loop> -; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0)) ; CHECK-NEXT: Member: {%p0,+,4}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) +; CHECK-NEXT: Member: {%phip,+,4}<%loop> ; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop. ; CHECK-NEXT: SCEV assumptions: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: @@ -69,53 +75,46 @@ exit: } ; Same invariant address via two distinct GEPs. -; FIXME: Incorrectly considered safe with runtime checks. define void @store_load_same_invariant_via_different_geps(ptr %p0, ptr %p1, ptr %base, i64 %n, i1 %c) { ; CHECK-LABEL: 'store_load_same_invariant_via_different_geps' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %x = load i32, ptr %gep0, align 4 -> +; CHECK-NEXT: store i32 %y, ptr %gep1, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: InvariantUnsafe: +; CHECK-NEXT: store i32 %x, ptr %gep.st, align 4 -> +; CHECK-NEXT: %y = load i32, ptr %gep.ld, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1 ; CHECK-NEXT: %gep.st = getelementptr i32, ptr %base, i64 1 ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv +; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: +; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1 ; CHECK-NEXT: %gep.st = getelementptr i32, ptr %base, i64 1 ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv -; CHECK-NEXT: Check 2: -; CHECK-NEXT: Comparing group GRP0: -; CHECK-NEXT: %gep.st = getelementptr i32, ptr %base, i64 1 -; CHECK-NEXT: Against group GRP3: -; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1 -; CHECK-NEXT: Check 3: -; CHECK-NEXT: Comparing group GRP1: ; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv -; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv -; CHECK-NEXT: Check 4: -; CHECK-NEXT: Comparing group GRP1: -; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv -; CHECK-NEXT: Against group GRP3: -; CHECK-NEXT: %gep.ld = getelementptr i32, ptr %base, i64 1 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: (4 + %base) High: (8 + %base)) ; CHECK-NEXT: Member: (4 + %base) +; CHECK-NEXT: Member: (4 + %base) ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) -; CHECK-NEXT: Member: {%phip,+,4}<%loop> -; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0)) ; CHECK-NEXT: Member: {%p0,+,4}<%loop> -; CHECK-NEXT: Group GRP3: -; CHECK-NEXT: (Low: (4 + %base) High: (8 + %base)) -; CHECK-NEXT: Member: (4 + %base) +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) +; CHECK-NEXT: Member: {%phip,+,4}<%loop> ; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop. ; CHECK-NEXT: SCEV assumptions: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: @@ -143,35 +142,41 @@ exit: } ; Phi with incoming values loaded from the same address -; FIXME: Incorrectly considered safe with runtime checks. define void @phi_with_loads_from_same_addr(ptr %p0, ptr %p1, ptr %x, i64 %n, i1 %c0) { ; CHECK-LABEL: 'phi_with_loads_from_same_addr' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %v = load i32, ptr %gep0, align 4 -> +; CHECK-NEXT: store i32 %y, ptr %gep1, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: InvariantUnsafe: +; CHECK-NEXT: store i32 %v, ptr %ld1, align 4 -> +; CHECK-NEXT: %y = load i32, ptr %phi, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 +; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv +; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 +; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv +; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 +; CHECK-NEXT: %ld1 = load ptr, ptr %x, align 8 ; CHECK-NEXT: Against group GRP3: ; CHECK-NEXT: %ld2 = load ptr, ptr %x, align 8 ; CHECK-NEXT: Check 3: -; CHECK-NEXT: Comparing group GRP1: -; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv -; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep0 = getelementptr i32, ptr %p0, i64 %iv -; CHECK-NEXT: Check 4: -; CHECK-NEXT: Comparing group GRP1: +; CHECK-NEXT: Comparing group GRP2: ; CHECK-NEXT: %gep1 = getelementptr i32, ptr %phip, i64 %iv ; CHECK-NEXT: Against group GRP3: ; CHECK-NEXT: %ld2 = load ptr, ptr %x, align 8 @@ -179,17 +184,18 @@ define void @phi_with_loads_from_same_addr(ptr %p0, ptr %p1, ptr %x, i64 %n, i1 ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %ld1 High: (4 + %ld1)) ; CHECK-NEXT: Member: %ld1 +; CHECK-NEXT: Member: %ld1 ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) -; CHECK-NEXT: Member: {%phip,+,4}<%loop> -; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: %p0 High: ((4 * %n) + %p0)) ; CHECK-NEXT: Member: {%p0,+,4}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: %phip High: ((4 * %n) + %phip)) +; CHECK-NEXT: Member: {%phip,+,4}<%loop> ; CHECK-NEXT: Group GRP3: ; CHECK-NEXT: (Low: %ld2 High: (4 + %ld2)) ; CHECK-NEXT: Member: %ld2 ; CHECK-EMPTY: -; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop. ; CHECK-NEXT: SCEV assumptions: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll index 6fbe0e45976b..db6013a69bf6 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -495,13 +495,13 @@ define void @phi_load_store_memdep_check(i1 %c, ptr %A, ptr %B, ptr %C) { ; CHECK-LABEL: 'phi_load_store_memdep_check' ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Unsafe dependence on loop-invariant address. ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: InvariantUnsafe: ; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> ; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 ; CHECK-EMPTY: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: InvariantUnsafe: ; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> ; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 ; CHECK-EMPTY: diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected index 65904d13c1c9..b2cd7cc79a70 100644 --- a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected @@ -6,10 +6,10 @@ define void @ldist(i1 %cond, ptr %A, ptr %B, ptr %C) { ; CHECK-LABEL: 'ldist' ; CHECK-NEXT: LDist: Found a candidate loop: for.body ; CHECK-NEXT: LDist: Backward dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: InvariantUnsafe: ; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> ; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 -; CHECK-NEXT: Unknown: +; CHECK-NEXT: InvariantUnsafe: ; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> ; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 ; CHECK-NEXT: LDist: Seeded partitions: