
These are identified by misc-include-cleaner. I've filtered out those that break builds. Also, I'm staying away from llvm-config.h, config.h, and Compiler.h, which likely cause platform- or compiler-specific build failures.
301 lines
11 KiB
C++
301 lines
11 KiB
C++
//===---- EVLIndVarSimplify.cpp - Optimize vectorized loops w/ EVL IV------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass optimizes a vectorized loop with canonical IV to using EVL-based
|
|
// IV if it was tail-folded by predicated EVL.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/Analysis/IVDescriptors.h"
|
|
#include "llvm/Analysis/LoopInfo.h"
|
|
#include "llvm/Analysis/LoopPass.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/Analysis/ScalarEvolution.h"
|
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
|
#include "llvm/Analysis/ValueTracking.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/PatternMatch.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
#include "llvm/Transforms/Scalar/LoopPassManager.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
|
|
#define DEBUG_TYPE "evl-iv-simplify"
|
|
|
|
using namespace llvm;
|
|
|
|
STATISTIC(NumEliminatedCanonicalIV, "Number of canonical IVs we eliminated");
|
|
|
|
static cl::opt<bool> EnableEVLIndVarSimplify(
|
|
"enable-evl-indvar-simplify",
|
|
cl::desc("Enable EVL-based induction variable simplify Pass"), cl::Hidden,
|
|
cl::init(true));
|
|
|
|
namespace {
|
|
struct EVLIndVarSimplifyImpl {
|
|
ScalarEvolution &SE;
|
|
OptimizationRemarkEmitter *ORE = nullptr;
|
|
|
|
EVLIndVarSimplifyImpl(LoopStandardAnalysisResults &LAR,
|
|
OptimizationRemarkEmitter *ORE)
|
|
: SE(LAR.SE), ORE(ORE) {}
|
|
|
|
/// Returns true if modify the loop.
|
|
bool run(Loop &L);
|
|
};
|
|
} // anonymous namespace
|
|
|
|
/// Returns the constant part of vectorization factor from the induction
|
|
/// variable's step value SCEV expression.
|
|
static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) {
|
|
if (!Step)
|
|
return 0U;
|
|
|
|
// Looking for loops with IV step value in the form of `(<constant VF> x
|
|
// vscale)`.
|
|
if (const auto *Mul = dyn_cast<SCEVMulExpr>(Step)) {
|
|
if (Mul->getNumOperands() == 2) {
|
|
const SCEV *LHS = Mul->getOperand(0);
|
|
const SCEV *RHS = Mul->getOperand(1);
|
|
if (const auto *Const = dyn_cast<SCEVConstant>(LHS);
|
|
Const && isa<SCEVVScale>(RHS)) {
|
|
uint64_t V = Const->getAPInt().getLimitedValue();
|
|
if (llvm::isUInt<32>(V))
|
|
return V;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If not, see if the vscale_range of the parent function is a fixed value,
|
|
// which makes the step value to be replaced by a constant.
|
|
if (F.hasFnAttribute(Attribute::VScaleRange))
|
|
if (const auto *ConstStep = dyn_cast<SCEVConstant>(Step)) {
|
|
APInt V = ConstStep->getAPInt().abs();
|
|
ConstantRange CR = llvm::getVScaleRange(&F, 64);
|
|
if (const APInt *Fixed = CR.getSingleElement()) {
|
|
V = V.zextOrTrunc(Fixed->getBitWidth());
|
|
uint64_t VF = V.udiv(*Fixed).getLimitedValue();
|
|
if (VF && llvm::isUInt<32>(VF) &&
|
|
// Make sure step is divisible by vscale.
|
|
V.urem(*Fixed).isZero())
|
|
return VF;
|
|
}
|
|
}
|
|
|
|
return 0U;
|
|
}
|
|
|
|
bool EVLIndVarSimplifyImpl::run(Loop &L) {
|
|
if (!EnableEVLIndVarSimplify)
|
|
return false;
|
|
|
|
if (!getBooleanLoopAttribute(&L, "llvm.loop.isvectorized"))
|
|
return false;
|
|
const MDOperand *EVLMD =
|
|
findStringMetadataForLoop(&L, "llvm.loop.isvectorized.tailfoldingstyle")
|
|
.value_or(nullptr);
|
|
if (!EVLMD || !EVLMD->equalsStr("evl"))
|
|
return false;
|
|
|
|
BasicBlock *LatchBlock = L.getLoopLatch();
|
|
ICmpInst *OrigLatchCmp = L.getLatchCmpInst();
|
|
if (!LatchBlock || !OrigLatchCmp)
|
|
return false;
|
|
|
|
InductionDescriptor IVD;
|
|
PHINode *IndVar = L.getInductionVariable(SE);
|
|
if (!IndVar || !L.getInductionDescriptor(SE, IVD)) {
|
|
const char *Reason = (IndVar ? "induction descriptor is not available"
|
|
: "cannot recognize induction variable");
|
|
LLVM_DEBUG(dbgs() << "Cannot retrieve IV from loop " << L.getName()
|
|
<< " because" << Reason << "\n");
|
|
if (ORE) {
|
|
ORE->emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar",
|
|
L.getStartLoc(), L.getHeader())
|
|
<< "Cannot retrieve IV because " << ore::NV("Reason", Reason);
|
|
});
|
|
}
|
|
return false;
|
|
}
|
|
|
|
BasicBlock *InitBlock, *BackEdgeBlock;
|
|
if (!L.getIncomingAndBackEdge(InitBlock, BackEdgeBlock)) {
|
|
LLVM_DEBUG(dbgs() << "Expect unique incoming and backedge in "
|
|
<< L.getName() << "\n");
|
|
if (ORE) {
|
|
ORE->emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure",
|
|
L.getStartLoc(), L.getHeader())
|
|
<< "Does not have a unique incoming and backedge";
|
|
});
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Retrieve the loop bounds.
|
|
std::optional<Loop::LoopBounds> Bounds = L.getBounds(SE);
|
|
if (!Bounds) {
|
|
LLVM_DEBUG(dbgs() << "Could not obtain the bounds for loop " << L.getName()
|
|
<< "\n");
|
|
if (ORE) {
|
|
ORE->emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure",
|
|
L.getStartLoc(), L.getHeader())
|
|
<< "Could not obtain the loop bounds";
|
|
});
|
|
}
|
|
return false;
|
|
}
|
|
Value *CanonicalIVInit = &Bounds->getInitialIVValue();
|
|
Value *CanonicalIVFinal = &Bounds->getFinalIVValue();
|
|
|
|
const SCEV *StepV = IVD.getStep();
|
|
uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent());
|
|
if (!VF) {
|
|
LLVM_DEBUG(dbgs() << "Could not infer VF from IndVar step '" << *StepV
|
|
<< "'\n");
|
|
if (ORE) {
|
|
ORE->emit([&]() {
|
|
return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar",
|
|
L.getStartLoc(), L.getHeader())
|
|
<< "Could not infer VF from IndVar step "
|
|
<< ore::NV("Step", StepV);
|
|
});
|
|
}
|
|
return false;
|
|
}
|
|
LLVM_DEBUG(dbgs() << "Using VF=" << VF << " for loop " << L.getName()
|
|
<< "\n");
|
|
|
|
// Try to find the EVL-based induction variable.
|
|
using namespace PatternMatch;
|
|
BasicBlock *BB = IndVar->getParent();
|
|
|
|
Value *EVLIndVar = nullptr;
|
|
Value *RemTC = nullptr;
|
|
Value *TC = nullptr;
|
|
auto IntrinsicMatch = m_Intrinsic<Intrinsic::experimental_get_vector_length>(
|
|
m_Value(RemTC), m_SpecificInt(VF),
|
|
/*Scalable=*/m_SpecificInt(1));
|
|
for (PHINode &PN : BB->phis()) {
|
|
if (&PN == IndVar)
|
|
continue;
|
|
|
|
// Check 1: it has to contain both incoming (init) & backedge blocks
|
|
// from IndVar.
|
|
if (PN.getBasicBlockIndex(InitBlock) < 0 ||
|
|
PN.getBasicBlockIndex(BackEdgeBlock) < 0)
|
|
continue;
|
|
// Check 2: EVL index is always increasing, thus its inital value has to be
|
|
// equal to either the initial IV value (when the canonical IV is also
|
|
// increasing) or the last IV value (when canonical IV is decreasing).
|
|
Value *Init = PN.getIncomingValueForBlock(InitBlock);
|
|
using Direction = Loop::LoopBounds::Direction;
|
|
switch (Bounds->getDirection()) {
|
|
case Direction::Increasing:
|
|
if (Init != CanonicalIVInit)
|
|
continue;
|
|
break;
|
|
case Direction::Decreasing:
|
|
if (Init != CanonicalIVFinal)
|
|
continue;
|
|
break;
|
|
case Direction::Unknown:
|
|
// To be more permissive and see if either the initial or final IV value
|
|
// matches PN's init value.
|
|
if (Init != CanonicalIVInit && Init != CanonicalIVFinal)
|
|
continue;
|
|
break;
|
|
}
|
|
Value *RecValue = PN.getIncomingValueForBlock(BackEdgeBlock);
|
|
assert(RecValue && "expect recurrent IndVar value");
|
|
|
|
LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN
|
|
<< "\n");
|
|
|
|
// Check 3: Pattern match to find the EVL-based index and total trip count
|
|
// (TC).
|
|
if (match(RecValue,
|
|
m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) &&
|
|
match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) {
|
|
EVLIndVar = RecValue;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!EVLIndVar || !TC)
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
|
|
if (ORE) {
|
|
ORE->emit([&]() {
|
|
DebugLoc DL;
|
|
BasicBlock *Region = nullptr;
|
|
if (auto *I = dyn_cast<Instruction>(EVLIndVar)) {
|
|
DL = I->getDebugLoc();
|
|
Region = I->getParent();
|
|
} else {
|
|
DL = L.getStartLoc();
|
|
Region = L.getHeader();
|
|
}
|
|
return OptimizationRemark(DEBUG_TYPE, "UseEVLIndVar", DL, Region)
|
|
<< "Using " << ore::NV("EVLIndVar", EVLIndVar)
|
|
<< " for EVL-based IndVar";
|
|
});
|
|
}
|
|
|
|
// Create an EVL-based comparison and replace the branch to use it as
|
|
// predicate.
|
|
|
|
// Loop::getLatchCmpInst check at the beginning of this function has ensured
|
|
// that latch block ends in a conditional branch.
|
|
auto *LatchBranch = cast<BranchInst>(LatchBlock->getTerminator());
|
|
assert(LatchBranch->isConditional() &&
|
|
"expect the loop latch to be ended with a conditional branch");
|
|
ICmpInst::Predicate Pred;
|
|
if (LatchBranch->getSuccessor(0) == L.getHeader())
|
|
Pred = ICmpInst::ICMP_NE;
|
|
else
|
|
Pred = ICmpInst::ICMP_EQ;
|
|
|
|
IRBuilder<> Builder(OrigLatchCmp);
|
|
auto *NewLatchCmp = Builder.CreateICmp(Pred, EVLIndVar, TC);
|
|
OrigLatchCmp->replaceAllUsesWith(NewLatchCmp);
|
|
|
|
// llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are
|
|
// not used outside the cycles. However, in this case the now-RAUW-ed
|
|
// OrigLatchCmp will be considered a use outside the cycle while in reality
|
|
// it's practically dead. Thus we need to remove it before calling
|
|
// RecursivelyDeleteDeadPHINode.
|
|
(void)RecursivelyDeleteTriviallyDeadInstructions(OrigLatchCmp);
|
|
if (llvm::RecursivelyDeleteDeadPHINode(IndVar))
|
|
LLVM_DEBUG(dbgs() << "Removed original IndVar\n");
|
|
|
|
++NumEliminatedCanonicalIV;
|
|
|
|
return true;
|
|
}
|
|
|
|
PreservedAnalyses EVLIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &LAM,
|
|
LoopStandardAnalysisResults &AR,
|
|
LPMUpdater &U) {
|
|
Function &F = *L.getHeader()->getParent();
|
|
auto &FAMProxy = LAM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR);
|
|
OptimizationRemarkEmitter *ORE =
|
|
FAMProxy.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);
|
|
|
|
if (EVLIndVarSimplifyImpl(AR, ORE).run(L))
|
|
return PreservedAnalyses::allInSet<CFGAnalyses>();
|
|
return PreservedAnalyses::all();
|
|
}
|