Skip to content

[LAA] Refine stride checks for SCEVs during dependence analysis. #99577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 8 additions & 14 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,7 @@ class MemoryDepChecker {
/// Check whether the dependencies between the accesses are safe.
///
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps);

/// No memory dependence was encountered that would inhibit
/// vectorization.
Expand Down Expand Up @@ -351,11 +349,8 @@ class MemoryDepChecker {
/// element access it records this distance in \p MinDepDistBytes (if this
/// distance is smaller than any other distance encountered so far).
/// Otherwise, this function returns true signaling a possible dependence.
Dependence::DepType
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx);

/// Check whether the data dependence could prevent store-load
/// forwarding.
Expand Down Expand Up @@ -392,11 +387,9 @@ class MemoryDepChecker {
/// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
/// BIsWrite).
std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
getDependenceDistanceStrideAndSize(
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects);
getDependenceDistanceStrideAndSize(const MemAccessInfo &A, Instruction *AInst,
const MemAccessInfo &B,
Instruction *BInst);
};

class RuntimePointerChecking;
Expand Down Expand Up @@ -797,7 +790,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
Value *Ptr);

/// If the pointer has a constant stride return it in units of the access type
/// size. Otherwise return std::nullopt.
/// size. If the pointer is loop-invariant, return 0. Otherwise return
/// std::nullopt.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
Expand Down
118 changes: 55 additions & 63 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,6 @@ class AccessAnalysis {

MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }

const DenseMap<Value *, SmallVector<const Value *, 16>> &
getUnderlyingObjects() {
return UnderlyingObjects;
}

private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;

Expand Down Expand Up @@ -1460,21 +1455,23 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}

/// Check whether the access through \p Ptr has a constant stride.
std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
Type *AccessTy, Value *Ptr,
const Loop *Lp,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
std::optional<int64_t>
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
return {0};

Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");

if (isa<ScalableVectorType>(AccessTy)) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
<< "\n");
return std::nullopt;
}

const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);

const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
if (Assume && !AR)
Expand Down Expand Up @@ -1900,23 +1897,11 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
return ScaledDist % Stride;
}

/// Returns true if any of the underlying objects has a loop varying address,
/// i.e. may change in \p L.
static bool
isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
ScalarEvolution &SE, const Loop *L) {
return any_of(UnderlyingObjects, [&SE, L](const Value *UO) {
return !SE.isLoopInvariant(SE.getSCEV(const_cast<Value *>(UO)), L);
});
}

std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::DepDistanceStrideAndSizeInfo>
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
Expand All @@ -1934,39 +1919,30 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;

int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
int64_t StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
std::optional<int64_t> StrideAPtr = getPtrStride(
PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
std::optional<int64_t> StrideBPtr = getPtrStride(
PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);

const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);

// If the induction step is negative we have to invert source and sink of the
// dependence when measuring the distance between them. We should not swap
// AIsWrite with BIsWrite, as their uses expect them in program order.
if (StrideAPtr < 0) {
if (StrideAPtr && *StrideAPtr < 0) {
std::swap(Src, Sink);
std::swap(AInst, BInst);
std::swap(StrideAPtr, StrideBPtr);
}

const SCEV *Dist = SE.getMinusSCEV(Sink, Src);

LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
<< "(Induction step: " << StrideAPtr << ")\n");
<< "\n");
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
<< ": " << *Dist << "\n");

// Needs accesses where the addresses of the accessed underlying objects do
// not change within the loop.
if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
InnermostLoop) ||
isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
InnermostLoop))
return MemoryDepChecker::Dependence::IndirectUnsafe;

// Check if we can prove that Sink only accesses memory after Src's end or
// vice versa. At the moment this is limited to cases where either source or
// sink are loop invariant to avoid compile-time increases. This is not
Expand All @@ -1988,12 +1964,33 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
}
}

// Need accesses with constant strides and the same direction. We don't want
// to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that
// could wrap in the address space.
if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) ||
(StrideAPtr < 0 && StrideBPtr > 0)) {
// Need accesses with constant strides and the same direction for further
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
// similar code or pointer arithmetic that could wrap in the address space.

// If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
// not loop-invariant (stride will be 0 in that case), we cannot analyze the
// dependence further and also cannot generate runtime checks.
if (!StrideAPtr || !StrideBPtr) {
LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
return MemoryDepChecker::Dependence::IndirectUnsafe;
}

int64_t StrideAPtrInt = *StrideAPtr;
int64_t StrideBPtrInt = *StrideBPtr;
LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt
<< " Sink induction step: " << StrideBPtrInt << "\n");
// At least Src or Sink are loop invariant and the other is strided or
// invariant. We can generate a runtime check to disambiguate the accesses.
if (StrideAPtrInt == 0 || StrideBPtrInt == 0)
return MemoryDepChecker::Dependence::Unknown;

// Both Src and Sink have a constant stride, check if they are in the same
// direction.
if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) ||
(StrideAPtrInt < 0 && StrideBPtrInt > 0)) {
LLVM_DEBUG(
dbgs() << "Pointer access with strides in different directions\n");
return MemoryDepChecker::Dependence::Unknown;
}

Expand All @@ -2002,22 +1999,20 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
if (!HasSameSize)
TypeByteSize = 0;
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr),
std::abs(StrideBPtr), TypeByteSize,
return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt),
std::abs(StrideBPtrInt), TypeByteSize,
AIsWrite, BIsWrite);
}

MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx) {
assert(AIdx < BIdx && "Must pass arguments in program order");

// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
auto Res = getDependenceDistanceStrideAndSize(
A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
auto Res =
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);

Expand Down Expand Up @@ -2251,10 +2246,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
return Dependence::BackwardVectorizable;
}

bool MemoryDepChecker::areDepsSafe(
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
&UnderlyingObjects) {
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
MemAccessInfoList &CheckDeps) {

MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
Expand Down Expand Up @@ -2297,8 +2290,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);

Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
B.second, UnderlyingObjects);
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second);
mergeInStatus(Dependence::isSafeForVectorization(Type));

// Gather dependences unless we accumulated MaxDependences
Expand Down Expand Up @@ -2653,8 +2646,7 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
Accesses.getDependenciesToCheck(),
Accesses.getUnderlyingObjects());
Accesses.getDependenciesToCheck());

if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,19 @@
define void @B_indices_loaded_in_loop_A_stored(ptr %A, ptr noalias %B, i64 %N, i64 %off) {
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_stored'
; CHECK-NEXT: loop:
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %indices = load i8, ptr %gep.A, align 1 ->
; CHECK-NEXT: store i32 %l, ptr %gep.C, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.C = getelementptr inbounds i32, ptr %A, i64 %iv
; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]):
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.off
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP1]]:
; CHECK-NEXT: (Low: %A High: ((4 * %N) + %A))
; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop>
; CHECK-NEXT: Group [[GRP2]]:
; CHECK-NEXT: (Low: (%off + %A) High: (%N + %off + %A))
; CHECK-NEXT: Member: {(%off + %A),+,1}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down Expand Up @@ -59,9 +57,9 @@ define void @B_indices_loaded_in_loop_A_not_stored(ptr %A, ptr noalias %B, i64 %
; CHECK-LABEL: 'B_indices_loaded_in_loop_A_not_stored'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %l = load i32, ptr %gep.B, align 4 ->
; CHECK-NEXT: store i32 %inc, ptr %gep.B, align 4
; CHECK-EMPTY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Report: cannot identify array bounds
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %loadA = load i16, ptr %arrayidxA, align 2 ->
; CHECK-NEXT: store i16 %mul, ptr %arrayidxA, align 2

Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
; CHECK-LABEL: 'negative_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
; CHECK-NEXT: LAA: Src induction step: -1 Sink induction step: -1
; CHECK-NEXT: LAA: Dependence is negative

define void @negative_step(ptr nocapture %A) {
Expand Down Expand Up @@ -41,8 +42,9 @@ exit:
; CHECK-LABEL: 'positive_step'
; CHECK: LAA: Found an analyzable loop: loop
; CHECK: LAA: Checking memory dependencies
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
; CHECK-NEXT: LAA: Src induction step: 1 Sink induction step: 1
; CHECK-NEXT: LAA: Dependence is negative

define void @positive_step(ptr nocapture %A) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/select-dependence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ define void @test(ptr noalias %x, ptr noalias %y, ptr noalias %z) {
; CHECK-LABEL: 'test'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load double, ptr %gep.sel, align 8 ->
; CHECK-NEXT: store double %load, ptr %gep.sel2, align 8
; CHECK-EMPTY:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ define void @single_stride_used_for_trip_count(ptr noalias %A, ptr noalias %B, i
; CHECK-LABEL: 'single_stride_used_for_trip_count'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Unsafe indirect dependence.
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: IndirectUnsafe:
; CHECK-NEXT: %load = load i32, ptr %gep.A, align 4 ->
; CHECK-NEXT: store i32 %add, ptr %gep.A.next, align 4
; CHECK-EMPTY:
Expand Down
Loading