diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index cc40d2e83f2e0..43eac1474876f 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -200,9 +200,7 @@ class MemoryDepChecker { /// /// Only checks sets with elements in \p CheckDeps. bool areDepsSafe(const DepCandidates &AccessSets, - const MemAccessInfoList &CheckDeps, - const DenseMap> - &UnderlyingObjects); + const MemAccessInfoList &CheckDeps); /// No memory dependence was encountered that would inhibit /// vectorization. @@ -352,11 +350,8 @@ class MemoryDepChecker { /// element access it records this distance in \p MinDepDistBytes (if this /// distance is smaller than any other distance encountered so far). /// Otherwise, this function returns true signaling a possible dependence. - Dependence::DepType - isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, - unsigned BIdx, - const DenseMap> - &UnderlyingObjects); + Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx); /// Check whether the data dependence could prevent store-load /// forwarding. @@ -393,11 +388,9 @@ class MemoryDepChecker { /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite, /// BIsWrite). std::variant - getDependenceDistanceStrideAndSize( - const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B, - Instruction *BInst, - const DenseMap> - &UnderlyingObjects); + getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst, + const MemAccessInfo &B, + Instruction *BInst); }; class RuntimePointerChecking; @@ -799,7 +792,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, Value *Ptr); /// If the pointer has a constant stride return it in units of the access type -/// size. Otherwise return std::nullopt. +/// size. If the pointer is loop-invariant, return 0. Otherwise return +/// std::nullopt. /// /// Ensure that it does not wrap in the address space, assuming the predicate /// associated with \p PSE is true. diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 646d2f7ef3077..a2b124d3b10bf 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -725,11 +725,6 @@ class AccessAnalysis { const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; } - const DenseMap> & - getUnderlyingObjects() const { - return UnderlyingObjects; - } - private: typedef MapVector> PtrAccessMap; @@ -1455,22 +1450,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// Check whether the access through \p Ptr has a constant stride. -std::optional llvm::getPtrStride(PredicatedScalarEvolution &PSE, - Type *AccessTy, Value *Ptr, - const Loop *Lp, - const DenseMap &StridesMap, - bool Assume, bool ShouldCheckWrap) { +std::optional +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, + const DenseMap &StridesMap, + bool Assume, bool ShouldCheckWrap) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); + if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) + return {0}; + Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); - if (isa(AccessTy)) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy << "\n"); return std::nullopt; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); - const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); @@ -1897,23 +1893,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, return ScaledDist % Stride; } -/// Returns true if any of the underlying objects has a loop varying address, -/// i.e. may change in \p L. -static bool -isLoopVariantIndirectAddress(ArrayRef UnderlyingObjects, - ScalarEvolution &SE, const Loop *L) { - return any_of(UnderlyingObjects, [&SE, L](const Value *UO) { - return !SE.isLoopInvariant(SE.getSCEV(const_cast(UO)), L); - }); -} - std::variant MemoryDepChecker::getDependenceDistanceStrideAndSize( const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, - const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, - const DenseMap> - &UnderlyingObjects) { + const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { const auto &DL = InnermostLoop->getHeader()->getDataLayout(); auto &SE = *PSE.getSE(); const auto &[APtr, AIsWrite] = A; @@ -1931,12 +1915,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( BPtr->getType()->getPointerAddressSpace()) return MemoryDepChecker::Dependence::Unknown; - int64_t StrideAPtr = - getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); - int64_t StrideBPtr = - getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true) - .value_or(0); + std::optional StrideAPtr = + getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); + std::optional StrideBPtr = + getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); @@ -1944,26 +1926,19 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( // If the induction step is negative we have to invert source and sink of the // dependence when measuring the distance between them. We should not swap // AIsWrite with BIsWrite, as their uses expect them in program order. - if (StrideAPtr < 0) { + if (StrideAPtr && *StrideAPtr < 0) { std::swap(Src, Sink); std::swap(AInst, BInst); + std::swap(StrideAPtr, StrideBPtr); } const SCEV *Dist = SE.getMinusSCEV(Sink, Src); LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "\n"); LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst << ": " << *Dist << "\n"); - // Needs accesses where the addresses of the accessed underlying objects do - // not change within the loop. - if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE, - InnermostLoop) || - isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE, - InnermostLoop)) - return MemoryDepChecker::Dependence::IndirectUnsafe; - // Check if we can prove that Sink only accesses memory after Src's end or // vice versa. At the moment this is limited to cases where either source or // sink are loop invariant to avoid compile-time increases. This is not @@ -1985,12 +1960,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( } } - // Need accesses with constant strides and the same direction. We don't want - // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that - // could wrap in the address space. - if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || - (StrideAPtr < 0 && StrideBPtr > 0)) { + // Need accesses with constant strides and the same direction for further + // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and + // similar code or pointer arithmetic that could wrap in the address space. + + // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and + // not loop-invariant (stride will be 0 in that case), we cannot analyze the + // dependence further and also cannot generate runtime checks. + if (!StrideAPtr || !StrideBPtr) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); + return MemoryDepChecker::Dependence::IndirectUnsafe; + } + + int64_t StrideAPtrInt = *StrideAPtr; + int64_t StrideBPtrInt = *StrideBPtr; + LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt + << " Sink induction step: " << StrideBPtrInt << "\n"); + // At least Src or Sink are loop invariant and the other is strided or + // invariant. We can generate a runtime check to disambiguate the accesses. + if (StrideAPtrInt == 0 || StrideBPtrInt == 0) + return MemoryDepChecker::Dependence::Unknown; + + // Both Src and Sink have a constant stride, check if they are in the same + // direction. + if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) || + (StrideAPtrInt < 0 && StrideBPtrInt > 0)) { + LLVM_DEBUG( + dbgs() << "Pointer access with strides in different directions\n"); return MemoryDepChecker::Dependence::Unknown; } @@ -1999,22 +1995,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), - std::abs(StrideBPtr), TypeByteSize, + return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), + std::abs(StrideBPtrInt), TypeByteSize, AIsWrite, BIsWrite); } -MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( - const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, - unsigned BIdx, - const DenseMap> - &UnderlyingObjects) { +MemoryDepChecker::Dependence::DepType +MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, + const MemAccessInfo &B, unsigned BIdx) { assert(AIdx < BIdx && "Must pass arguments in program order"); // Get the dependence distance, stride, type size and what access writes for // the dependence between A and B. - auto Res = getDependenceDistanceStrideAndSize( - A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects); + auto Res = + getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); if (std::holds_alternative(Res)) return std::get(Res); @@ -2248,10 +2242,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return Dependence::BackwardVectorizable; } -bool MemoryDepChecker::areDepsSafe( - const DepCandidates &AccessSets, const MemAccessInfoList &CheckDeps, - const DenseMap> - &UnderlyingObjects) { +bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets, + const MemAccessInfoList &CheckDeps) { MinDepDistBytes = -1; SmallPtrSet Visited; @@ -2294,8 +2286,8 @@ bool MemoryDepChecker::areDepsSafe( if (*I1 > *I2) std::swap(A, B); - Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, - B.second, UnderlyingObjects); + Dependence::DepType Type = + isDependent(*A.first, A.second, *B.first, B.second); mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences @@ -2650,8 +2642,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI, if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, - Accesses.getDependenciesToCheck(), - Accesses.getUnderlyingObjects()); + Accesses.getDependenciesToCheck()); if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); diff --git a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll index 2e61a28039846..6d8e296ec72fa 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/load-store-index-loaded-in-loop.ll @@ -9,21 +9,19 @@ define void @B_indices_loaded_in_loop_A_stored(ptr %A, ptr noalias %B, i64 %N, i64 %off) { ; CHECK-LABEL: 'B_indices_loaded_in_loop_A_stored' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: IndirectUnsafe: +; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 -> +; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %indices = load i8, ptr %gep.A, align 1 -> +; CHECK-NEXT: store i32 %l, ptr %gep.C, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: -; CHECK-NEXT: Check 0: -; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.C = getelementptr inbounds i32, ptr %A, i64 %iv -; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): -; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.off ; CHECK-NEXT: Grouped accesses: -; CHECK-NEXT: Group [[GRP1]]: -; CHECK-NEXT: (Low: %A High: ((4 * %N) + %A)) -; CHECK-NEXT: Member: {%A,+,4}<%loop> -; CHECK-NEXT: Group [[GRP2]]: -; CHECK-NEXT: (Low: (%off + %A) High: (%N + %off + %A)) -; CHECK-NEXT: Member: {(%off + %A),+,1}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -59,9 +57,9 @@ define void @B_indices_loaded_in_loop_A_not_stored(ptr %A, ptr noalias %B, i64 % ; CHECK-LABEL: 'B_indices_loaded_in_loop_A_not_stored' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: IndirectUnsafe: ; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 -> ; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4 ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll index 546a75cf4efd5..28ee6c6f0a89a 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll @@ -13,9 +13,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop ; CHECK-NOT: Report: cannot identify array bounds -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: IndirectUnsafe: ; CHECK-NEXT: %loadA = load i16, ptr %arrayidxA, align 2 -> ; CHECK-NEXT: store i16 %mul, ptr %arrayidxA, align 2 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll index 18e45f469b4a3..8ca30383092c6 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll @@ -9,8 +9,9 @@ ; CHECK-LABEL: 'negative_step' ; CHECK: LAA: Found an analyzable loop: loop ; CHECK: LAA: Checking memory dependencies -; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<%loop>Sink Scev: {(4088 + %A),+,-4}<%loop>(Induction step: -1) +; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<%loop>Sink Scev: {(4088 + %A),+,-4}<%loop> ; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4 +; CHECK-NEXT: LAA: Src induction step: -1 Sink induction step: -1 ; CHECK-NEXT: LAA: Dependence is negative define void @negative_step(ptr nocapture %A) { @@ -41,8 +42,9 @@ exit: ; CHECK-LABEL: 'positive_step' ; CHECK: LAA: Found an analyzable loop: loop ; CHECK: LAA: Checking memory dependencies -; CHECK-NEXT: LAA: Src Scev: {(4 + %A),+,4}<%loop>Sink Scev: {%A,+,4}<%loop>(Induction step: 1) +; CHECK-NEXT: LAA: Src Scev: {(4 + %A),+,4}<%loop>Sink Scev: {%A,+,4}<%loop> ; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4 +; CHECK-NEXT: LAA: Src induction step: 1 Sink induction step: 1 ; CHECK-NEXT: LAA: Dependence is negative define void @positive_step(ptr nocapture %A) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll index 60fe8b4fcbed4..8bef7583c35c0 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll @@ -5,9 +5,9 @@ define void @test(ptr noalias %x, ptr noalias %y, ptr noalias %z) { ; CHECK-LABEL: 'test' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: IndirectUnsafe: ; CHECK-NEXT: %load = load double, ptr %gep.sel, align 8 -> ; CHECK-NEXT: store double %load, ptr %gep.sel2, align 8 ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll index 7c1b11e22aef2..f0aed2421a96e 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -276,9 +276,9 @@ define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i ; CHECK-LABEL: 'single_stride_used_for_trip_count' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: IndirectUnsafe: ; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4 ; CHECK-EMPTY: