This patch moves the validation logic of delinearization results from DA to Delinearization. Also call it in `printDelinearization` to test its behavior. The motivation is as follows: - Almost the same code exists in `tryDelinearizeFixedSize` and `tryDelinearizeParametricSize`. Consolidating it in Delinearization avoids code duplication. - Currently this validation logic is not well tested. Moving it to Delinearization allows us to write regression tests easily. This patch changes the test outputs and debug messages, but otherwise NFCI.
889 lines
29 KiB
C++
889 lines
29 KiB
C++
//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This implements an analysis pass that tries to delinearize all GEP
|
|
// instructions in all loops using the SCEV analysis functionality. This pass is
|
|
// only used for testing purposes: if your pass needs delinearization, please
|
|
// use the on-demand SCEVAddRecExpr::delinearize() function.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Analysis/Delinearization.h"
|
|
#include "llvm/Analysis/LoopInfo.h"
|
|
#include "llvm/Analysis/ScalarEvolution.h"
|
|
#include "llvm/Analysis/ScalarEvolutionDivision.h"
|
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/InstIterator.h"
|
|
#include "llvm/IR/Instructions.h"
|
|
#include "llvm/IR/PassManager.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DL_NAME "delinearize"
|
|
#define DEBUG_TYPE DL_NAME
|
|
|
|
static cl::opt<bool> UseFixedSizeArrayHeuristic(
|
|
"delinearize-use-fixed-size-array-heuristic", cl::init(false), cl::Hidden,
|
|
cl::desc("When printing analysis, use the heuristic for fixed-size arrays "
|
|
"if the default delinearizetion fails."));
|
|
|
|
// Return true when S contains at least an undef value.
|
|
static inline bool containsUndefs(const SCEV *S) {
|
|
return SCEVExprContains(S, [](const SCEV *S) {
|
|
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
|
|
return isa<UndefValue>(SU->getValue());
|
|
return false;
|
|
});
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Collect all steps of SCEV expressions.
|
|
struct SCEVCollectStrides {
|
|
ScalarEvolution &SE;
|
|
SmallVectorImpl<const SCEV *> &Strides;
|
|
|
|
SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
|
|
: SE(SE), Strides(S) {}
|
|
|
|
bool follow(const SCEV *S) {
|
|
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
|
|
Strides.push_back(AR->getStepRecurrence(SE));
|
|
return true;
|
|
}
|
|
|
|
bool isDone() const { return false; }
|
|
};
|
|
|
|
// Collect all SCEVUnknown and SCEVMulExpr expressions.
|
|
struct SCEVCollectTerms {
|
|
SmallVectorImpl<const SCEV *> &Terms;
|
|
|
|
SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
|
|
|
|
bool follow(const SCEV *S) {
|
|
if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
|
|
isa<SCEVSignExtendExpr>(S)) {
|
|
if (!containsUndefs(S))
|
|
Terms.push_back(S);
|
|
|
|
// Stop recursion: once we collected a term, do not walk its operands.
|
|
return false;
|
|
}
|
|
|
|
// Keep looking.
|
|
return true;
|
|
}
|
|
|
|
bool isDone() const { return false; }
|
|
};
|
|
|
|
// Check if a SCEV contains an AddRecExpr.
|
|
struct SCEVHasAddRec {
|
|
bool &ContainsAddRec;
|
|
|
|
SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
|
|
ContainsAddRec = false;
|
|
}
|
|
|
|
bool follow(const SCEV *S) {
|
|
if (isa<SCEVAddRecExpr>(S)) {
|
|
ContainsAddRec = true;
|
|
|
|
// Stop recursion: once we collected a term, do not walk its operands.
|
|
return false;
|
|
}
|
|
|
|
// Keep looking.
|
|
return true;
|
|
}
|
|
|
|
bool isDone() const { return false; }
|
|
};
|
|
|
|
// Find factors that are multiplied with an expression that (possibly as a
|
|
// subexpression) contains an AddRecExpr. In the expression:
|
|
//
|
|
// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
|
|
//
|
|
// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
|
|
// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
|
|
// parameters as they form a product with an induction variable.
|
|
//
|
|
// This collector expects all array size parameters to be in the same MulExpr.
|
|
// It might be necessary to later add support for collecting parameters that are
|
|
// spread over different nested MulExpr.
|
|
struct SCEVCollectAddRecMultiplies {
|
|
SmallVectorImpl<const SCEV *> &Terms;
|
|
ScalarEvolution &SE;
|
|
|
|
SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
|
|
ScalarEvolution &SE)
|
|
: Terms(T), SE(SE) {}
|
|
|
|
bool follow(const SCEV *S) {
|
|
if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
|
|
bool HasAddRec = false;
|
|
SmallVector<const SCEV *, 0> Operands;
|
|
for (const SCEV *Op : Mul->operands()) {
|
|
const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
|
|
if (Unknown && !isa<CallInst>(Unknown->getValue())) {
|
|
Operands.push_back(Op);
|
|
} else if (Unknown) {
|
|
HasAddRec = true;
|
|
} else {
|
|
bool ContainsAddRec = false;
|
|
SCEVHasAddRec ContiansAddRec(ContainsAddRec);
|
|
visitAll(Op, ContiansAddRec);
|
|
HasAddRec |= ContainsAddRec;
|
|
}
|
|
}
|
|
if (Operands.size() == 0)
|
|
return true;
|
|
|
|
if (!HasAddRec)
|
|
return false;
|
|
|
|
Terms.push_back(SE.getMulExpr(Operands));
|
|
// Stop recursion: once we collected a term, do not walk its operands.
|
|
return false;
|
|
}
|
|
|
|
// Keep looking.
|
|
return true;
|
|
}
|
|
|
|
bool isDone() const { return false; }
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
|
|
/// two places:
|
|
/// 1) The strides of AddRec expressions.
|
|
/// 2) Unknowns that are multiplied with AddRec expressions.
|
|
void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<const SCEV *> &Terms) {
|
|
SmallVector<const SCEV *, 4> Strides;
|
|
SCEVCollectStrides StrideCollector(SE, Strides);
|
|
visitAll(Expr, StrideCollector);
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Strides:\n";
|
|
for (const SCEV *S : Strides)
|
|
dbgs().indent(2) << *S << "\n";
|
|
});
|
|
|
|
for (const SCEV *S : Strides) {
|
|
SCEVCollectTerms TermCollector(Terms);
|
|
visitAll(S, TermCollector);
|
|
}
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Terms:\n";
|
|
for (const SCEV *T : Terms)
|
|
dbgs().indent(2) << *T << "\n";
|
|
});
|
|
|
|
SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
|
|
visitAll(Expr, MulCollector);
|
|
}
|
|
|
|
static bool findArrayDimensionsRec(ScalarEvolution &SE,
|
|
SmallVectorImpl<const SCEV *> &Terms,
|
|
SmallVectorImpl<const SCEV *> &Sizes) {
|
|
int Last = Terms.size() - 1;
|
|
const SCEV *Step = Terms[Last];
|
|
|
|
// End of recursion.
|
|
if (Last == 0) {
|
|
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
|
|
SmallVector<const SCEV *, 2> Qs;
|
|
for (const SCEV *Op : M->operands())
|
|
if (!isa<SCEVConstant>(Op))
|
|
Qs.push_back(Op);
|
|
|
|
Step = SE.getMulExpr(Qs);
|
|
}
|
|
|
|
Sizes.push_back(Step);
|
|
return true;
|
|
}
|
|
|
|
for (const SCEV *&Term : Terms) {
|
|
// Normalize the terms before the next call to findArrayDimensionsRec.
|
|
const SCEV *Q, *R;
|
|
SCEVDivision::divide(SE, Term, Step, &Q, &R);
|
|
|
|
// Bail out when GCD does not evenly divide one of the terms.
|
|
if (!R->isZero())
|
|
return false;
|
|
|
|
Term = Q;
|
|
}
|
|
|
|
// Remove all SCEVConstants.
|
|
erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
|
|
|
|
if (Terms.size() > 0)
|
|
if (!findArrayDimensionsRec(SE, Terms, Sizes))
|
|
return false;
|
|
|
|
Sizes.push_back(Step);
|
|
return true;
|
|
}
|
|
|
|
// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
|
|
static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
|
|
for (const SCEV *T : Terms)
|
|
if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
// Return the number of product terms in S.
|
|
static inline int numberOfTerms(const SCEV *S) {
|
|
if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
|
|
return Expr->getNumOperands();
|
|
return 1;
|
|
}
|
|
|
|
static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
|
|
if (isa<SCEVConstant>(T))
|
|
return nullptr;
|
|
|
|
if (isa<SCEVUnknown>(T))
|
|
return T;
|
|
|
|
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
|
|
SmallVector<const SCEV *, 2> Factors;
|
|
for (const SCEV *Op : M->operands())
|
|
if (!isa<SCEVConstant>(Op))
|
|
Factors.push_back(Op);
|
|
|
|
return SE.getMulExpr(Factors);
|
|
}
|
|
|
|
return T;
|
|
}
|
|
|
|
void llvm::findArrayDimensions(ScalarEvolution &SE,
|
|
SmallVectorImpl<const SCEV *> &Terms,
|
|
SmallVectorImpl<const SCEV *> &Sizes,
|
|
const SCEV *ElementSize) {
|
|
if (Terms.size() < 1 || !ElementSize)
|
|
return;
|
|
|
|
// Early return when Terms do not contain parameters: we do not delinearize
|
|
// non parametric SCEVs.
|
|
if (!containsParameters(Terms))
|
|
return;
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Terms:\n";
|
|
for (const SCEV *T : Terms)
|
|
dbgs().indent(2) << *T << "\n";
|
|
});
|
|
|
|
// Remove duplicates.
|
|
array_pod_sort(Terms.begin(), Terms.end());
|
|
Terms.erase(llvm::unique(Terms), Terms.end());
|
|
|
|
// Put larger terms first.
|
|
llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
|
|
return numberOfTerms(LHS) > numberOfTerms(RHS);
|
|
});
|
|
|
|
// Try to divide all terms by the element size. If term is not divisible by
|
|
// element size, proceed with the original term.
|
|
for (const SCEV *&Term : Terms) {
|
|
const SCEV *Q, *R;
|
|
SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
|
|
if (!Q->isZero())
|
|
Term = Q;
|
|
}
|
|
|
|
SmallVector<const SCEV *, 4> NewTerms;
|
|
|
|
// Remove constant factors.
|
|
for (const SCEV *T : Terms)
|
|
if (const SCEV *NewT = removeConstantFactors(SE, T))
|
|
NewTerms.push_back(NewT);
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Terms after sorting:\n";
|
|
for (const SCEV *T : NewTerms)
|
|
dbgs().indent(2) << *T << "\n";
|
|
});
|
|
|
|
if (NewTerms.empty() || !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
|
|
Sizes.clear();
|
|
return;
|
|
}
|
|
|
|
// The last element to be pushed into Sizes is the size of an element.
|
|
Sizes.push_back(ElementSize);
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Sizes:\n";
|
|
for (const SCEV *S : Sizes)
|
|
dbgs().indent(2) << *S << "\n";
|
|
});
|
|
}
|
|
|
|
void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<const SCEV *> &Subscripts,
|
|
SmallVectorImpl<const SCEV *> &Sizes) {
|
|
// Early exit in case this SCEV is not an affine multivariate function.
|
|
if (Sizes.empty())
|
|
return;
|
|
|
|
if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
|
|
if (!AR->isAffine())
|
|
return;
|
|
|
|
LLVM_DEBUG(dbgs() << "\ncomputeAccessFunctions\n"
|
|
<< "Memory Access Function: " << *Expr << "\n");
|
|
|
|
const SCEV *Res = Expr;
|
|
int Last = Sizes.size() - 1;
|
|
|
|
for (int i = Last; i >= 0; i--) {
|
|
const SCEV *Size = Sizes[i];
|
|
const SCEV *Q, *R;
|
|
|
|
SCEVDivision::divide(SE, Res, Size, &Q, &R);
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Computing 'MemAccFn / Sizes[" << i << "]':\n";
|
|
dbgs() << " MemAccFn: " << *Res << "\n";
|
|
dbgs() << " Sizes[" << i << "]: " << *Size << "\n";
|
|
dbgs() << " Quotient (Leftover): " << *Q << "\n";
|
|
dbgs() << " Remainder (Subscript Access Function): " << *R << "\n";
|
|
});
|
|
|
|
Res = Q;
|
|
|
|
// Do not record the last subscript corresponding to the size of elements in
|
|
// the array.
|
|
if (i == Last) {
|
|
|
|
// Bail out if the byte offset is non-zero.
|
|
if (!R->isZero()) {
|
|
Subscripts.clear();
|
|
Sizes.clear();
|
|
return;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
// Record the access function for the current subscript.
|
|
Subscripts.push_back(R);
|
|
}
|
|
|
|
// Also push in last position the remainder of the last division: it will be
|
|
// the access function of the innermost dimension.
|
|
Subscripts.push_back(Res);
|
|
|
|
std::reverse(Subscripts.begin(), Subscripts.end());
|
|
|
|
LLVM_DEBUG({
|
|
dbgs() << "Subscripts:\n";
|
|
for (const SCEV *S : Subscripts)
|
|
dbgs().indent(2) << *S << "\n";
|
|
dbgs() << "\n";
|
|
});
|
|
}
|
|
|
|
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
|
|
/// sizes of an array access. Returns the remainder of the delinearization that
|
|
/// is the offset start of the array. The SCEV->delinearize algorithm computes
|
|
/// the multiples of SCEV coefficients: that is a pattern matching of sub
|
|
/// expressions in the stride and base of a SCEV corresponding to the
|
|
/// computation of a GCD (greatest common divisor) of base and stride. When
|
|
/// SCEV->delinearize fails, it returns the SCEV unchanged.
|
|
///
|
|
/// For example: when analyzing the memory access A[i][j][k] in this loop nest
|
|
///
|
|
/// void foo(long n, long m, long o, double A[n][m][o]) {
|
|
///
|
|
/// for (long i = 0; i < n; i++)
|
|
/// for (long j = 0; j < m; j++)
|
|
/// for (long k = 0; k < o; k++)
|
|
/// A[i][j][k] = 1.0;
|
|
/// }
|
|
///
|
|
/// the delinearization input is the following AddRec SCEV:
|
|
///
|
|
/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
|
|
///
|
|
/// From this SCEV, we are able to say that the base offset of the access is %A
|
|
/// because it appears as an offset that does not divide any of the strides in
|
|
/// the loops:
|
|
///
|
|
/// CHECK: Base offset: %A
|
|
///
|
|
/// and then SCEV->delinearize determines the size of some of the dimensions of
|
|
/// the array as these are the multiples by which the strides are happening:
|
|
///
|
|
/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
|
|
/// bytes.
|
|
///
|
|
/// Note that the outermost dimension remains of UnknownSize because there are
|
|
/// no strides that would help identifying the size of the last dimension: when
|
|
/// the array has been statically allocated, one could compute the size of that
|
|
/// dimension by dividing the overall size of the array by the size of the known
|
|
/// dimensions: %m * %o * 8.
|
|
///
|
|
/// Finally delinearize provides the access functions for the array reference
|
|
/// that does correspond to A[i][j][k] of the above C testcase:
|
|
///
|
|
/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
|
|
///
|
|
/// The testcases are checking the output of a function pass:
|
|
/// DelinearizationPass that walks through all loads and stores of a function
|
|
/// asking for the SCEV of the memory access with respect to all enclosing
|
|
/// loops, calling SCEV->delinearize on that and printing the results.
|
|
void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<const SCEV *> &Subscripts,
|
|
SmallVectorImpl<const SCEV *> &Sizes,
|
|
const SCEV *ElementSize) {
|
|
// First step: collect parametric terms.
|
|
SmallVector<const SCEV *, 4> Terms;
|
|
collectParametricTerms(SE, Expr, Terms);
|
|
|
|
if (Terms.empty())
|
|
return;
|
|
|
|
// Second step: find subscript sizes.
|
|
findArrayDimensions(SE, Terms, Sizes, ElementSize);
|
|
|
|
if (Sizes.empty())
|
|
return;
|
|
|
|
// Third step: compute the access functions for each subscript.
|
|
computeAccessFunctions(SE, Expr, Subscripts, Sizes);
|
|
}
|
|
|
|
static std::optional<APInt> tryIntoAPInt(const SCEV *S) {
|
|
if (const auto *Const = dyn_cast<SCEVConstant>(S))
|
|
return Const->getAPInt();
|
|
return std::nullopt;
|
|
}
|
|
|
|
/// Collects the absolute values of constant steps for all induction variables.
|
|
/// Returns true if we can prove that all step recurrences are constants and \p
|
|
/// Expr is divisible by \p ElementSize. Each step recurrence is stored in \p
|
|
/// Steps after divided by \p ElementSize.
|
|
static bool collectConstantAbsSteps(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<uint64_t> &Steps,
|
|
uint64_t ElementSize) {
|
|
// End of recursion. The constant value also must be a multiple of
|
|
// ElementSize.
|
|
if (const auto *Const = dyn_cast<SCEVConstant>(Expr)) {
|
|
const uint64_t Mod = Const->getAPInt().urem(ElementSize);
|
|
return Mod == 0;
|
|
}
|
|
|
|
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Expr);
|
|
if (!AR || !AR->isAffine())
|
|
return false;
|
|
|
|
const SCEV *Step = AR->getStepRecurrence(SE);
|
|
std::optional<APInt> StepAPInt = tryIntoAPInt(Step);
|
|
if (!StepAPInt)
|
|
return false;
|
|
|
|
APInt Q;
|
|
uint64_t R;
|
|
APInt::udivrem(StepAPInt->abs(), ElementSize, Q, R);
|
|
if (R != 0)
|
|
return false;
|
|
|
|
// Bail out when the step is too large.
|
|
std::optional<uint64_t> StepVal = Q.tryZExtValue();
|
|
if (!StepVal)
|
|
return false;
|
|
|
|
Steps.push_back(*StepVal);
|
|
return collectConstantAbsSteps(SE, AR->getStart(), Steps, ElementSize);
|
|
}
|
|
|
|
bool llvm::findFixedSizeArrayDimensions(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<uint64_t> &Sizes,
|
|
const SCEV *ElementSize) {
|
|
if (!ElementSize)
|
|
return false;
|
|
|
|
std::optional<APInt> ElementSizeAPInt = tryIntoAPInt(ElementSize);
|
|
if (!ElementSizeAPInt || *ElementSizeAPInt == 0)
|
|
return false;
|
|
|
|
std::optional<uint64_t> ElementSizeConst = ElementSizeAPInt->tryZExtValue();
|
|
|
|
// Early exit when ElementSize is not a positive constant.
|
|
if (!ElementSizeConst)
|
|
return false;
|
|
|
|
if (!collectConstantAbsSteps(SE, Expr, Sizes, *ElementSizeConst) ||
|
|
Sizes.empty()) {
|
|
Sizes.clear();
|
|
return false;
|
|
}
|
|
|
|
// At this point, Sizes contains the absolute step recurrences for all
|
|
// induction variables. Each step recurrence must be a multiple of the size of
|
|
// the array element. Assuming that the each value represents the size of an
|
|
// array for each dimension, attempts to restore the length of each dimension
|
|
// by dividing the step recurrence by the next smaller value. For example, if
|
|
// we have the following AddRec SCEV:
|
|
//
|
|
// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
|
|
//
|
|
// Then Sizes will become [256, 32, 1] after sorted. We don't know the size of
|
|
// the outermost dimension, the next dimension will be computed as 256 / 32 =
|
|
// 8, and the last dimension will be computed as 32 / 1 = 32. Thus it results
|
|
// in like Arr[UnknownSize][8][32] with elements of size 8 bytes, where Arr is
|
|
// a base pointer.
|
|
//
|
|
// TODO: Catch more cases, e.g., when a step recurrence is not divisible by
|
|
// the next smaller one, like A[i][3*j].
|
|
llvm::sort(Sizes.rbegin(), Sizes.rend());
|
|
Sizes.erase(llvm::unique(Sizes), Sizes.end());
|
|
|
|
// The last element in Sizes should be ElementSize. At this point, all values
|
|
// in Sizes are assumed to be divided by ElementSize, so replace it with 1.
|
|
assert(Sizes.back() != 0 && "Unexpected zero size in Sizes.");
|
|
Sizes.back() = 1;
|
|
|
|
for (unsigned I = 0; I + 1 < Sizes.size(); I++) {
|
|
uint64_t PrevSize = Sizes[I + 1];
|
|
if (Sizes[I] % PrevSize) {
|
|
Sizes.clear();
|
|
return false;
|
|
}
|
|
Sizes[I] /= PrevSize;
|
|
}
|
|
|
|
// Finally, the last element in Sizes should be ElementSize.
|
|
Sizes.back() = *ElementSizeConst;
|
|
return true;
|
|
}
|
|
|
|
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
|
|
/// sizes of an array access, assuming that the array is a fixed size array.
|
|
///
|
|
/// E.g., if we have the code like as follows:
|
|
///
|
|
/// double A[42][8][32];
|
|
/// for i
|
|
/// for j
|
|
/// for k
|
|
/// use A[i][j][k]
|
|
///
|
|
/// The access function will be represented as an AddRec SCEV like:
|
|
///
|
|
/// AddRec: {{{0,+,2048}<%for.i>,+,256}<%for.j>,+,8}<%for.k> (ElementSize=8)
|
|
///
|
|
/// Then findFixedSizeArrayDimensions infers the size of each dimension of the
|
|
/// array based on the fact that the value of the step recurrence is a multiple
|
|
/// of the size of the corresponding array element. In the above example, it
|
|
/// results in the following:
|
|
///
|
|
/// CHECK: ArrayDecl[UnknownSize][8][32] with elements of 8 bytes.
|
|
///
|
|
/// Finally each subscript will be computed as follows:
|
|
///
|
|
/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
|
|
///
|
|
/// Note that this function doesn't check the range of possible values for each
|
|
/// subscript, so the caller should perform additional boundary checks if
|
|
/// necessary.
|
|
///
|
|
/// Also note that this function doesn't guarantee that the original array size
|
|
/// is restored "correctly". For example, in the following case:
|
|
///
|
|
/// double A[42][4][64];
|
|
/// double B[42][8][32];
|
|
/// for i
|
|
/// for j
|
|
/// for k
|
|
/// use A[i][j][k]
|
|
/// use B[i][2*j][k]
|
|
///
|
|
/// The access function for both accesses will be the same:
|
|
///
|
|
/// AddRec: {{{0,+,2048}<%for.i>,+,512}<%for.j>,+,8}<%for.k> (ElementSize=8)
|
|
///
|
|
/// The array sizes for both A and B will be computed as
|
|
/// ArrayDecl[UnknownSize][4][64], which matches for A, but not for B.
|
|
///
|
|
/// TODO: At the moment, this function can handle only simple cases. For
|
|
/// example, we cannot handle a case where a step recurrence is not divisible
|
|
/// by the next smaller step recurrence, e.g., A[i][3*j].
|
|
bool llvm::delinearizeFixedSizeArray(ScalarEvolution &SE, const SCEV *Expr,
|
|
SmallVectorImpl<const SCEV *> &Subscripts,
|
|
SmallVectorImpl<const SCEV *> &Sizes,
|
|
const SCEV *ElementSize) {
|
|
|
|
// First step: find the fixed array size.
|
|
SmallVector<uint64_t, 4> ConstSizes;
|
|
if (!findFixedSizeArrayDimensions(SE, Expr, ConstSizes, ElementSize)) {
|
|
Sizes.clear();
|
|
return false;
|
|
}
|
|
|
|
// Convert the constant size to SCEV.
|
|
for (uint64_t Size : ConstSizes)
|
|
Sizes.push_back(SE.getConstant(Expr->getType(), Size));
|
|
|
|
// Second step: compute the access functions for each subscript.
|
|
computeAccessFunctions(SE, Expr, Subscripts, Sizes);
|
|
|
|
return !Subscripts.empty();
|
|
}
|
|
|
|
static bool isKnownNonNegative(ScalarEvolution *SE, const SCEV *S,
|
|
const Value *Ptr) {
|
|
bool Inbounds = false;
|
|
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr))
|
|
Inbounds = SrcGEP->isInBounds();
|
|
if (Inbounds) {
|
|
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
|
|
if (AddRec->isAffine()) {
|
|
// We know S is for Ptr, the operand on a load/store, so doesn't wrap.
|
|
// If both parts are NonNegative, the end result will be NonNegative
|
|
if (SE->isKnownNonNegative(AddRec->getStart()) &&
|
|
SE->isKnownNonNegative(AddRec->getOperand(1)))
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return SE->isKnownNonNegative(S);
|
|
}
|
|
|
|
/// Compare to see if S is less than Size, using
|
|
///
|
|
/// isKnownNegative(S - Size)
|
|
///
|
|
/// with some extra checking if S is an AddRec and we can prove less-than using
|
|
/// the loop bounds.
|
|
static bool isKnownLessThan(ScalarEvolution *SE, const SCEV *S,
|
|
const SCEV *Size) {
|
|
// First unify to the same type
|
|
auto *SType = dyn_cast<IntegerType>(S->getType());
|
|
auto *SizeType = dyn_cast<IntegerType>(Size->getType());
|
|
if (!SType || !SizeType)
|
|
return false;
|
|
Type *MaxType =
|
|
(SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType;
|
|
S = SE->getTruncateOrZeroExtend(S, MaxType);
|
|
Size = SE->getTruncateOrZeroExtend(Size, MaxType);
|
|
|
|
auto CollectUpperBound = [&](const Loop *L, Type *T) -> const SCEV * {
|
|
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
|
|
const SCEV *UB = SE->getBackedgeTakenCount(L);
|
|
return SE->getTruncateOrZeroExtend(UB, T);
|
|
}
|
|
return nullptr;
|
|
};
|
|
|
|
auto CheckAddRecBECount = [&]() {
|
|
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
|
|
if (!AddRec || !AddRec->isAffine() || !AddRec->hasNoSignedWrap())
|
|
return false;
|
|
const SCEV *BECount = CollectUpperBound(AddRec->getLoop(), MaxType);
|
|
// If the BTC cannot be computed, check the base case for S.
|
|
if (!BECount || isa<SCEVCouldNotCompute>(BECount))
|
|
return false;
|
|
const SCEV *Start = AddRec->getStart();
|
|
const SCEV *Step = AddRec->getStepRecurrence(*SE);
|
|
const SCEV *End = AddRec->evaluateAtIteration(BECount, *SE);
|
|
const SCEV *Diff0 = SE->getMinusSCEV(Start, Size);
|
|
const SCEV *Diff1 = SE->getMinusSCEV(End, Size);
|
|
|
|
// If the value of Step is non-negative and the AddRec is non-wrap, it
|
|
// reaches its maximum at the last iteration. So it's enouth to check
|
|
// whether End - Size is negative.
|
|
if (SE->isKnownNonNegative(Step) && SE->isKnownNegative(Diff1))
|
|
return true;
|
|
|
|
// If the value of Step is non-positive and the AddRec is non-wrap, the
|
|
// initial value is its maximum.
|
|
if (SE->isKnownNonPositive(Step) && SE->isKnownNegative(Diff0))
|
|
return true;
|
|
|
|
// Even if we don't know the sign of Step, either Start or End must be
|
|
// the maximum value of the AddRec since it is non-wrap.
|
|
if (SE->isKnownNegative(Diff0) && SE->isKnownNegative(Diff1))
|
|
return true;
|
|
|
|
return false;
|
|
};
|
|
|
|
if (CheckAddRecBECount())
|
|
return true;
|
|
|
|
// Check using normal isKnownNegative
|
|
const SCEV *LimitedBound = SE->getMinusSCEV(S, Size);
|
|
return SE->isKnownNegative(LimitedBound);
|
|
}
|
|
|
|
bool llvm::validateDelinearizationResult(ScalarEvolution &SE,
|
|
ArrayRef<const SCEV *> Sizes,
|
|
ArrayRef<const SCEV *> Subscripts,
|
|
const Value *Ptr) {
|
|
for (size_t I = 1; I < Sizes.size(); ++I) {
|
|
const SCEV *Size = Sizes[I - 1];
|
|
const SCEV *Subscript = Subscripts[I];
|
|
if (!isKnownNonNegative(&SE, Subscript, Ptr))
|
|
return false;
|
|
if (!isKnownLessThan(&SE, Subscript, Size))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
|
|
const GetElementPtrInst *GEP,
|
|
SmallVectorImpl<const SCEV *> &Subscripts,
|
|
SmallVectorImpl<int> &Sizes) {
|
|
assert(Subscripts.empty() && Sizes.empty() &&
|
|
"Expected output lists to be empty on entry to this function.");
|
|
assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
|
|
LLVM_DEBUG(dbgs() << "\nGEP to delinearize: " << *GEP << "\n");
|
|
Type *Ty = nullptr;
|
|
bool DroppedFirstDim = false;
|
|
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
|
|
const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));
|
|
if (i == 1) {
|
|
Ty = GEP->getSourceElementType();
|
|
if (auto *Const = dyn_cast<SCEVConstant>(Expr))
|
|
if (Const->getValue()->isZero()) {
|
|
DroppedFirstDim = true;
|
|
continue;
|
|
}
|
|
Subscripts.push_back(Expr);
|
|
continue;
|
|
}
|
|
|
|
auto *ArrayTy = dyn_cast<ArrayType>(Ty);
|
|
if (!ArrayTy) {
|
|
LLVM_DEBUG(dbgs() << "GEP delinearize failed: " << *Ty
|
|
<< " is not an array type.\n");
|
|
Subscripts.clear();
|
|
Sizes.clear();
|
|
return false;
|
|
}
|
|
|
|
Subscripts.push_back(Expr);
|
|
if (!(DroppedFirstDim && i == 2))
|
|
Sizes.push_back(ArrayTy->getNumElements());
|
|
|
|
Ty = ArrayTy->getElementType();
|
|
}
|
|
LLVM_DEBUG({
|
|
dbgs() << "Subscripts:\n";
|
|
for (const SCEV *S : Subscripts)
|
|
dbgs() << *S << "\n";
|
|
dbgs() << "\n";
|
|
});
|
|
|
|
return !Subscripts.empty();
|
|
}
|
|
|
|
namespace {
|
|
|
|
void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
|
|
ScalarEvolution *SE) {
|
|
O << "Printing analysis 'Delinearization' for function '" << F->getName()
|
|
<< "':";
|
|
for (Instruction &Inst : instructions(F)) {
|
|
// Only analyze loads and stores.
|
|
if (!isa<StoreInst>(&Inst) && !isa<LoadInst>(&Inst))
|
|
continue;
|
|
|
|
const BasicBlock *BB = Inst.getParent();
|
|
Loop *L = LI->getLoopFor(BB);
|
|
// Only delinearize the memory access in the innermost loop.
|
|
// Do not analyze memory accesses outside loops.
|
|
if (!L)
|
|
continue;
|
|
|
|
const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(&Inst), L);
|
|
|
|
const SCEVUnknown *BasePointer =
|
|
dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
|
|
// Do not delinearize if we cannot find the base pointer.
|
|
if (!BasePointer)
|
|
break;
|
|
AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
|
|
|
|
O << "\n";
|
|
O << "Inst:" << Inst << "\n";
|
|
O << "AccessFunction: " << *AccessFn << "\n";
|
|
|
|
SmallVector<const SCEV *, 3> Subscripts, Sizes;
|
|
|
|
auto IsDelinearizationFailed = [&]() {
|
|
return Subscripts.size() == 0 || Sizes.size() == 0 ||
|
|
Subscripts.size() != Sizes.size();
|
|
};
|
|
|
|
delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
|
|
if (UseFixedSizeArrayHeuristic && IsDelinearizationFailed()) {
|
|
Subscripts.clear();
|
|
Sizes.clear();
|
|
delinearizeFixedSizeArray(*SE, AccessFn, Subscripts, Sizes,
|
|
SE->getElementSize(&Inst));
|
|
}
|
|
|
|
if (IsDelinearizationFailed()) {
|
|
O << "failed to delinearize\n";
|
|
continue;
|
|
}
|
|
|
|
O << "Base offset: " << *BasePointer << "\n";
|
|
O << "ArrayDecl[UnknownSize]";
|
|
int Size = Subscripts.size();
|
|
for (int i = 0; i < Size - 1; i++)
|
|
O << "[" << *Sizes[i] << "]";
|
|
O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
|
|
|
|
O << "ArrayRef";
|
|
for (int i = 0; i < Size; i++)
|
|
O << "[" << *Subscripts[i] << "]";
|
|
O << "\n";
|
|
|
|
bool IsValid = validateDelinearizationResult(
|
|
*SE, Sizes, Subscripts, getLoadStorePointerOperand(&Inst));
|
|
O << "Delinearization validation: " << (IsValid ? "Succeeded" : "Failed")
|
|
<< "\n";
|
|
}
|
|
}
|
|
|
|
} // end anonymous namespace
|
|
|
|
DelinearizationPrinterPass::DelinearizationPrinterPass(raw_ostream &OS)
|
|
: OS(OS) {}
|
|
PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
|
|
FunctionAnalysisManager &AM) {
|
|
printDelinearization(OS, &F, &AM.getResult<LoopAnalysis>(F),
|
|
&AM.getResult<ScalarEvolutionAnalysis>(F));
|
|
return PreservedAnalyses::all();
|
|
}
|