288 lines
13 KiB
C++
288 lines
13 KiB
C++
//===- MemoryUtils.cpp ----------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "flang/Optimizer/Transforms/MemoryUtils.h"
|
|
#include "flang/Optimizer/Builder/FIRBuilder.h"
|
|
#include "flang/Optimizer/Builder/Todo.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
|
#include "mlir/IR/Dominance.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
|
|
namespace {
|
|
/// Helper class to detect if an alloca is inside an mlir::Block that can be
|
|
/// reached again before its deallocation points via block successors. This
|
|
/// analysis is only valid if the deallocation points are inside (or nested
|
|
/// inside) the same region as alloca because it does not consider region CFG
|
|
/// (for instance, the block inside a fir.do_loop is obviously inside a loop,
|
|
/// but is not a loop formed by blocks). The dominance of the alloca on its
|
|
/// deallocation points implies this pre-condition (although it is more
|
|
/// restrictive).
|
|
class BlockCycleDetector {
|
|
public:
|
|
bool allocaIsInCycle(fir::AllocaOp alloca,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints);
|
|
|
|
private:
|
|
// Cache for blocks owning alloca that have been analyzed. In many Fortran
|
|
// programs, allocas are usually made in the same blocks with no block cycles.
|
|
// So getting a fast "no" is beneficial.
|
|
llvm::DenseMap<mlir::Block *, /*isInCycle*/ bool> analyzed;
|
|
};
|
|
} // namespace
|
|
|
|
namespace {
|
|
class AllocaReplaceImpl {
|
|
public:
|
|
AllocaReplaceImpl(fir::AllocaRewriterCallBack allocaRewriter,
|
|
fir::DeallocCallBack deallocGenerator)
|
|
: allocaRewriter{allocaRewriter}, deallocGenerator{deallocGenerator} {}
|
|
bool replace(mlir::RewriterBase &, fir::AllocaOp);
|
|
|
|
private:
|
|
mlir::Region *findDeallocationPointsAndOwner(
|
|
fir::AllocaOp alloca,
|
|
llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints);
|
|
bool
|
|
allocDominatesDealloc(fir::AllocaOp alloca,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
|
|
return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) {
|
|
return this->dominanceInfo.properlyDominates(alloca.getOperation(),
|
|
deallocPoint);
|
|
});
|
|
}
|
|
void
|
|
genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints,
|
|
mlir::Value replacement, mlir::Region &owningRegion);
|
|
|
|
private:
|
|
fir::AllocaRewriterCallBack allocaRewriter;
|
|
fir::DeallocCallBack deallocGenerator;
|
|
mlir::DominanceInfo dominanceInfo;
|
|
BlockCycleDetector blockCycleDetector;
|
|
};
|
|
} // namespace
|
|
|
|
static bool
|
|
allocaIsInCycleImpl(mlir::Block *allocaBlock,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
|
|
llvm::DenseSet<mlir::Block *> seen;
|
|
// Insert the deallocation point blocks as "seen" so that the block
|
|
// traversal will stop at them.
|
|
for (mlir::Operation *deallocPoint : deallocationPoints)
|
|
seen.insert(deallocPoint->getBlock());
|
|
if (seen.contains(allocaBlock))
|
|
return false;
|
|
// Traverse the block successor graph starting by the alloca block.
|
|
llvm::SmallVector<mlir::Block *> successors{allocaBlock};
|
|
while (!successors.empty())
|
|
for (mlir::Block *next : successors.pop_back_val()->getSuccessors()) {
|
|
if (next == allocaBlock)
|
|
return true;
|
|
if (auto pair = seen.insert(next); pair.second)
|
|
successors.push_back(next);
|
|
}
|
|
// The traversal did not reach the alloca block again.
|
|
return false;
|
|
}
|
|
bool BlockCycleDetector::allocaIsInCycle(
|
|
fir::AllocaOp alloca,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
|
|
mlir::Block *allocaBlock = alloca->getBlock();
|
|
auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false);
|
|
bool alreadyAnalyzed = !analyzedPair.second;
|
|
bool &isInCycle = analyzedPair.first->second;
|
|
// Fast exit if block was already analyzed and no cycle was found.
|
|
if (alreadyAnalyzed && !isInCycle)
|
|
return false;
|
|
// If the analysis was not done generically for this block, run it and
|
|
// save the result.
|
|
if (!alreadyAnalyzed)
|
|
isInCycle = allocaIsInCycleImpl(allocaBlock, /*deallocationPoints*/ {});
|
|
if (!isInCycle)
|
|
return false;
|
|
// If the generic analysis found a block loop, see if the deallocation
|
|
// point would be reached before reaching the block again. Do not
|
|
// cache that analysis that is specific to the deallocation points
|
|
// found for this alloca.
|
|
return allocaIsInCycleImpl(allocaBlock, deallocationPoints);
|
|
}
|
|
|
|
static bool terminatorYieldsMemory(mlir::Operation &terminator) {
|
|
return llvm::any_of(terminator.getResults(), [](mlir::OpResult res) {
|
|
return fir::conformsWithPassByRef(res.getType());
|
|
});
|
|
}
|
|
|
|
static bool isRegionTerminator(mlir::Operation &terminator) {
|
|
// Using ReturnLike trait is tempting but it is not set on
|
|
// all region terminator that matters (like omp::TerminatorOp that
|
|
// has no results).
|
|
// May be true for dead code. It is not a correctness issue and dead code can
|
|
// be eliminated by running region simplification before this utility is
|
|
// used.
|
|
// May also be true for unreachable like terminators (e.g., after an abort
|
|
// call related to Fortran STOP). This is also OK, the inserted deallocation
|
|
// will simply never be reached. It is easier for the rest of the code here
|
|
// to assume there is always at least one deallocation point, so keep
|
|
// unreachable terminators.
|
|
return !terminator.hasSuccessors();
|
|
}
|
|
|
|
mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
|
|
fir::AllocaOp alloca,
|
|
llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) {
|
|
// Step 1: Identify the operation and region owning the alloca.
|
|
mlir::Region *owningRegion = alloca.getOwnerRegion();
|
|
if (!owningRegion)
|
|
return nullptr;
|
|
mlir::Operation *owningOp = owningRegion->getParentOp();
|
|
assert(owningOp && "region expected to be owned");
|
|
// Step 2: Identify the exit points of the owning region, they are the default
|
|
// deallocation points. TODO: detect and use lifetime markers to get earlier
|
|
// deallocation points.
|
|
bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp);
|
|
for (mlir::Block &block : owningRegion->getBlocks())
|
|
if (mlir::Operation *terminator = block.getTerminator();
|
|
isRegionTerminator(*terminator)) {
|
|
// FIXME: OpenACC and OpenMP privatization recipe are stand alone
|
|
// operation meant to be later "inlined", the value they return may
|
|
// be the address of a local alloca. It would be incorrect to insert
|
|
// deallocation before the terminator (this would introduce use after
|
|
// free once the recipe is inlined.
|
|
// This probably require redesign or special handling on the OpenACC/MP
|
|
// side.
|
|
if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator))
|
|
return nullptr;
|
|
deallocationPoints.push_back(terminator);
|
|
}
|
|
// If no block terminators without successors have been found, this is
|
|
// an odd region we cannot reason about (never seen yet in FIR and
|
|
// mainstream dialects, but MLIR does not really prevent it).
|
|
if (deallocationPoints.empty())
|
|
return nullptr;
|
|
|
|
// Step 3: detect block based loops between the allocation and deallocation
|
|
// points, and add a deallocation point on the back edge to avoid memory
|
|
// leaks.
|
|
// The detection avoids doing region CFG analysis by assuming that there may
|
|
// be cycles if deallocation points are not dominated by the alloca.
|
|
// This leaves the cases where the deallocation points are in the same region
|
|
// as the alloca (or nested inside it). In which cases there may be a back
|
|
// edge between the alloca and the deallocation point via block successors. An
|
|
// analysis is run to detect those cases.
|
|
// When a loop is detected, the easiest solution to deallocate on the back
|
|
// edge is to store the allocated memory address in a variable (that dominates
|
|
// the loops) and to deallocate the address in that variable if it is set
|
|
// before executing the allocation. This strategy still leads to correct
|
|
// execution in the "false positive" cases.
|
|
// Hence, the alloca is added as a deallocation point when there is no
|
|
// dominance. Note that bringing lifetime markers above will reduce the
|
|
// false positives.
|
|
if (!allocDominatesDealloc(alloca, deallocationPoints) ||
|
|
blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints))
|
|
deallocationPoints.push_back(alloca.getOperation());
|
|
return owningRegion;
|
|
}
|
|
|
|
void AllocaReplaceImpl::genIndirectDeallocation(
|
|
mlir::RewriterBase &rewriter, fir::AllocaOp alloca,
|
|
llvm::ArrayRef<mlir::Operation *> deallocationPoints,
|
|
mlir::Value replacement, mlir::Region &owningRegion) {
|
|
mlir::Location loc = alloca.getLoc();
|
|
auto replacementInsertPoint = rewriter.saveInsertionPoint();
|
|
// Create C pointer variable in the entry block to store the alloc
|
|
// and access it indirectly in the entry points that do not dominate.
|
|
rewriter.setInsertionPointToStart(&owningRegion.front());
|
|
mlir::Type heapType = fir::HeapType::get(alloca.getInType());
|
|
mlir::Value ptrVar = fir::AllocaOp::create(rewriter, loc, heapType);
|
|
mlir::Value nullPtr = fir::ZeroOp::create(rewriter, loc, heapType);
|
|
fir::StoreOp::create(rewriter, loc, nullPtr, ptrVar);
|
|
// TODO: introducing a pointer compare op in FIR would help
|
|
// generating less IR here.
|
|
mlir::Type intPtrTy = fir::getIntPtrType(rewriter);
|
|
mlir::Value c0 = mlir::arith::ConstantOp::create(
|
|
rewriter, loc, intPtrTy, rewriter.getIntegerAttr(intPtrTy, 0));
|
|
|
|
// Store new storage address right after its creation.
|
|
rewriter.restoreInsertionPoint(replacementInsertPoint);
|
|
mlir::Value castReplacement =
|
|
fir::factory::createConvert(rewriter, loc, heapType, replacement);
|
|
fir::StoreOp::create(rewriter, loc, castReplacement, ptrVar);
|
|
|
|
// Generate conditional deallocation at every deallocation point.
|
|
auto genConditionalDealloc = [&](mlir::Location loc) {
|
|
mlir::Value ptrVal = fir::LoadOp::create(rewriter, loc, ptrVar);
|
|
mlir::Value ptrToInt =
|
|
fir::ConvertOp::create(rewriter, loc, intPtrTy, ptrVal);
|
|
mlir::Value isAllocated = mlir::arith::CmpIOp::create(
|
|
rewriter, loc, mlir::arith::CmpIPredicate::ne, ptrToInt, c0);
|
|
auto ifOp = fir::IfOp::create(rewriter, loc, mlir::TypeRange{}, isAllocated,
|
|
/*withElseRegion=*/false);
|
|
rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
|
mlir::Value cast = fir::factory::createConvert(
|
|
rewriter, loc, replacement.getType(), ptrVal);
|
|
deallocGenerator(loc, rewriter, cast);
|
|
// Currently there is no need to reset the pointer var because two
|
|
// deallocation points can never be reached without going through the
|
|
// alloca.
|
|
rewriter.setInsertionPointAfter(ifOp);
|
|
};
|
|
for (mlir::Operation *deallocPoint : deallocationPoints) {
|
|
rewriter.setInsertionPoint(deallocPoint);
|
|
genConditionalDealloc(deallocPoint->getLoc());
|
|
}
|
|
}
|
|
|
|
bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter,
|
|
fir::AllocaOp alloca) {
|
|
llvm::SmallVector<mlir::Operation *> deallocationPoints;
|
|
mlir::Region *owningRegion =
|
|
findDeallocationPointsAndOwner(alloca, deallocationPoints);
|
|
if (!owningRegion)
|
|
return false;
|
|
rewriter.setInsertionPointAfter(alloca.getOperation());
|
|
bool deallocPointsDominateAlloc =
|
|
allocDominatesDealloc(alloca, deallocationPoints);
|
|
if (mlir::Value replacement =
|
|
allocaRewriter(rewriter, alloca, deallocPointsDominateAlloc)) {
|
|
mlir::Value castReplacement = fir::factory::createConvert(
|
|
rewriter, alloca.getLoc(), alloca.getType(), replacement);
|
|
if (deallocPointsDominateAlloc)
|
|
for (mlir::Operation *deallocPoint : deallocationPoints) {
|
|
rewriter.setInsertionPoint(deallocPoint);
|
|
deallocGenerator(deallocPoint->getLoc(), rewriter, replacement);
|
|
}
|
|
else
|
|
genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement,
|
|
*owningRegion);
|
|
rewriter.replaceOp(alloca, castReplacement);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool fir::replaceAllocas(mlir::RewriterBase &rewriter,
|
|
mlir::Operation *parentOp,
|
|
MustRewriteCallBack mustReplace,
|
|
AllocaRewriterCallBack allocaRewriter,
|
|
DeallocCallBack deallocGenerator) {
|
|
// If the parent operation is not an alloca owner, the code below would risk
|
|
// modifying IR outside of parentOp.
|
|
if (!fir::AllocaOp::ownsNestedAlloca(parentOp))
|
|
return false;
|
|
auto insertPoint = rewriter.saveInsertionPoint();
|
|
bool replacedAllRequestedAlloca = true;
|
|
AllocaReplaceImpl impl(allocaRewriter, deallocGenerator);
|
|
parentOp->walk([&](fir::AllocaOp alloca) {
|
|
if (mustReplace(alloca))
|
|
replacedAllRequestedAlloca &= impl.replace(rewriter, alloca);
|
|
});
|
|
rewriter.restoreInsertionPoint(insertPoint);
|
|
return replacedAllRequestedAlloca;
|
|
}
|