
*) Factors slice union computation out of LoopFusion into Analysis/Utils (where other iteration slice utilities exist). *) Generalizes slice union computation to take the union of slices computed on all loads/stores pairs between source and destination loop nests. *) Fixes a bug in FlatAffineConstraints::addSliceBounds where redundant constraints were added. *) Takes care of a TODO to expose FlatAffineConstraints::mergeAndAlignIds as a public method. -- PiperOrigin-RevId: 250561529
246 lines
9.4 KiB
C++
246 lines
9.4 KiB
C++
//===- LoopFusionUtils.cpp ---- Utilities for loop fusion ----------===//
|
|
//
|
|
// Copyright 2019 The MLIR Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
// =============================================================================
|
|
//
|
|
// This file implements loop fusion transformation utility functions.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Transforms/LoopFusionUtils.h"
|
|
|
|
#include "mlir/AffineOps/AffineOps.h"
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
|
#include "mlir/Analysis/AffineStructures.h"
|
|
#include "mlir/Analysis/Utils.h"
|
|
#include "mlir/IR/AffineExpr.h"
|
|
#include "mlir/IR/AffineMap.h"
|
|
#include "mlir/IR/BlockAndValueMapping.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/StandardOps/Ops.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#define DEBUG_TYPE "loop-fusion-utils"
|
|
|
|
using namespace mlir;
|
|
|
|
// Gathers all load and store memref accesses in 'opA' into 'values', where
|
|
// 'values[memref] == true' for each store operation.
|
|
static void getLoadAndStoreMemRefAccesses(Operation *opA,
|
|
DenseMap<Value *, bool> &values) {
|
|
opA->walk([&](Operation *op) {
|
|
if (auto loadOp = dyn_cast<LoadOp>(op)) {
|
|
if (values.count(loadOp.getMemRef()) == 0)
|
|
values[loadOp.getMemRef()] = false;
|
|
} else if (auto storeOp = dyn_cast<StoreOp>(op)) {
|
|
values[storeOp.getMemRef()] = true;
|
|
}
|
|
});
|
|
}
|
|
|
|
// Returns true if 'op' is a load or store operation which access an memref
|
|
// accessed 'values' and at least one of the access is a store operation.
|
|
// Returns false otherwise.
|
|
static bool isDependentLoadOrStoreOp(Operation *op,
|
|
DenseMap<Value *, bool> &values) {
|
|
if (auto loadOp = dyn_cast<LoadOp>(op)) {
|
|
return values.count(loadOp.getMemRef()) > 0 &&
|
|
values[loadOp.getMemRef()] == true;
|
|
} else if (auto storeOp = dyn_cast<StoreOp>(op)) {
|
|
return values.count(storeOp.getMemRef()) > 0;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Returns the first operation in range ('opA', 'opB') which has a data
|
|
// dependence on 'opA'. Returns 'nullptr' of no dependence exists.
|
|
static Operation *getFirstDependentOpInRange(Operation *opA, Operation *opB) {
|
|
// Record memref values from all loads/store in loop nest rooted at 'opA'.
|
|
// Map from memref value to bool which is true if store, false otherwise.
|
|
DenseMap<Value *, bool> values;
|
|
getLoadAndStoreMemRefAccesses(opA, values);
|
|
|
|
// For each 'opX' in block in range ('opA', 'opB'), check if there is a data
|
|
// dependence from 'opA' to 'opX' ('opA' and 'opX' access the same memref
|
|
// and at least one of the accesses is a store).
|
|
Operation *firstDepOp = nullptr;
|
|
for (Block::iterator it = std::next(Block::iterator(opA));
|
|
it != Block::iterator(opB); ++it) {
|
|
Operation *opX = &(*it);
|
|
opX->walk([&](Operation *op) {
|
|
if (!firstDepOp && isDependentLoadOrStoreOp(op, values))
|
|
firstDepOp = opX;
|
|
});
|
|
if (firstDepOp)
|
|
break;
|
|
}
|
|
return firstDepOp;
|
|
}
|
|
|
|
// Returns the last operation 'opX' in range ('opA', 'opB'), for which there
|
|
// exists a data dependence from 'opX' to 'opB'.
|
|
// Returns 'nullptr' of no dependence exists.
|
|
static Operation *getLastDependentOpInRange(Operation *opA, Operation *opB) {
|
|
// Record memref values from all loads/store in loop nest rooted at 'opB'.
|
|
// Map from memref value to bool which is true if store, false otherwise.
|
|
DenseMap<Value *, bool> values;
|
|
getLoadAndStoreMemRefAccesses(opB, values);
|
|
|
|
// For each 'opX' in block in range ('opA', 'opB') in reverse order,
|
|
// check if there is a data dependence from 'opX' to 'opB':
|
|
// *) 'opX' and 'opB' access the same memref and at least one of the accesses
|
|
// is a store.
|
|
// *) 'opX' produces an SSA Value which is used by 'opB'.
|
|
Operation *lastDepOp = nullptr;
|
|
for (Block::reverse_iterator it = std::next(Block::reverse_iterator(opB));
|
|
it != Block::reverse_iterator(opA); ++it) {
|
|
Operation *opX = &(*it);
|
|
opX->walk([&](Operation *op) {
|
|
if (lastDepOp)
|
|
return;
|
|
if (isa<LoadOp>(op) || isa<StoreOp>(op)) {
|
|
if (isDependentLoadOrStoreOp(op, values))
|
|
lastDepOp = opX;
|
|
return;
|
|
}
|
|
for (auto *value : op->getResults()) {
|
|
for (auto *user : value->getUsers()) {
|
|
SmallVector<AffineForOp, 4> loops;
|
|
// Check if any loop in loop nest surrounding 'user' is 'opB'.
|
|
getLoopIVs(*user, &loops);
|
|
if (llvm::is_contained(loops, cast<AffineForOp>(opB))) {
|
|
lastDepOp = opX;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
if (lastDepOp)
|
|
break;
|
|
}
|
|
return lastDepOp;
|
|
}
|
|
|
|
// Computes and returns an insertion point operation, before which the
|
|
// the fused <srcForOp, dstForOp> loop nest can be inserted while preserving
|
|
// dependences. Returns nullptr if no such insertion point is found.
|
|
static Operation *getFusedLoopNestInsertionPoint(AffineForOp srcForOp,
|
|
AffineForOp dstForOp) {
|
|
bool isSrcForOpBeforeDstForOp =
|
|
srcForOp.getOperation()->isBeforeInBlock(dstForOp.getOperation());
|
|
auto forOpA = isSrcForOpBeforeDstForOp ? srcForOp : dstForOp;
|
|
auto forOpB = isSrcForOpBeforeDstForOp ? dstForOp : srcForOp;
|
|
|
|
auto *firstDepOpA =
|
|
getFirstDependentOpInRange(forOpA.getOperation(), forOpB.getOperation());
|
|
auto *lastDepOpB =
|
|
getLastDependentOpInRange(forOpA.getOperation(), forOpB.getOperation());
|
|
// Block:
|
|
// ...
|
|
// |-- opA
|
|
// | ...
|
|
// | lastDepOpB --|
|
|
// | ... |
|
|
// |-> firstDepOpA |
|
|
// ... |
|
|
// opB <---------
|
|
//
|
|
// Valid insertion point range: (lastDepOpB, firstDepOpA)
|
|
//
|
|
if (firstDepOpA != nullptr) {
|
|
if (lastDepOpB != nullptr) {
|
|
if (firstDepOpA->isBeforeInBlock(lastDepOpB) || firstDepOpA == lastDepOpB)
|
|
// No valid insertion point exists which preserves dependences.
|
|
return nullptr;
|
|
}
|
|
// Return insertion point in valid range closest to 'opB'.
|
|
// TODO(andydavis) Consider other insertion points in valid range.
|
|
return firstDepOpA;
|
|
}
|
|
// No dependences from 'opA' to operation in range ('opA', 'opB'), return
|
|
// 'opB' insertion point.
|
|
return forOpB.getOperation();
|
|
}
|
|
|
|
// Gathers all load and store ops in loop nest rooted at 'forOp' into
|
|
// 'loadAndStoreOps'.
|
|
static bool
|
|
gatherLoadsAndStores(AffineForOp forOp,
|
|
SmallVectorImpl<Operation *> &loadAndStoreOps) {
|
|
bool hasIfOp = false;
|
|
forOp.getOperation()->walk([&](Operation *op) {
|
|
if (isa<LoadOp>(op) || isa<StoreOp>(op))
|
|
loadAndStoreOps.push_back(op);
|
|
else if (isa<AffineIfOp>(op))
|
|
hasIfOp = true;
|
|
});
|
|
return !hasIfOp;
|
|
}
|
|
|
|
// TODO(andydavis) Add support for the following features in subsequent CLs:
|
|
// *) Compute dependences of unfused src/dst loops.
|
|
// *) Compute dependences of src/dst loop as if they were fused.
|
|
// *) Check for fusion preventing dependences (e.g. a dependence which changes
|
|
// from loop-independent to backward loop-carried after fusion).
|
|
FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
|
|
unsigned dstLoopDepth,
|
|
ComputationSliceState *srcSlice) {
|
|
// Return 'failure' if 'dstLoopDepth == 0'.
|
|
if (dstLoopDepth == 0) {
|
|
LLVM_DEBUG(llvm::dbgs() << "Cannot fuse loop nests at depth 0\n.");
|
|
return FusionResult::FailPrecondition;
|
|
}
|
|
// Return 'failure' if 'srcForOp' and 'dstForOp' are not in the same block.
|
|
auto *block = srcForOp.getOperation()->getBlock();
|
|
if (block != dstForOp.getOperation()->getBlock()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "Cannot fuse loop nests in different blocks\n.");
|
|
return FusionResult::FailPrecondition;
|
|
}
|
|
|
|
// Return 'failure' if no valid insertion point for fused loop nest in 'block'
|
|
// exists which would preserve dependences.
|
|
if (!getFusedLoopNestInsertionPoint(srcForOp, dstForOp)) {
|
|
LLVM_DEBUG(llvm::dbgs() << "Fusion would violate dependences in block\n.");
|
|
return FusionResult::FailBlockDependence;
|
|
}
|
|
|
|
// Gather all load and store ops in 'srcForOp'.
|
|
SmallVector<Operation *, 4> srcLoadAndStoreOps;
|
|
if (!gatherLoadsAndStores(srcForOp, srcLoadAndStoreOps)) {
|
|
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
|
|
return FusionResult::FailPrecondition;
|
|
}
|
|
|
|
// Gather all load and store ops in 'dstForOp'.
|
|
SmallVector<Operation *, 4> dstLoadAndStoreOps;
|
|
if (!gatherLoadsAndStores(dstForOp, dstLoadAndStoreOps)) {
|
|
LLVM_DEBUG(llvm::dbgs() << "Fusing loops with affine.if unsupported.\n.");
|
|
return FusionResult::FailPrecondition;
|
|
}
|
|
|
|
// Compute union of computation slices computed from all pairs in
|
|
// {'srcLoadAndStoreOps', 'dstLoadAndStoreOps'}.
|
|
if (failed(mlir::computeSliceUnion(srcLoadAndStoreOps, dstLoadAndStoreOps,
|
|
dstLoopDepth, srcSlice))) {
|
|
LLVM_DEBUG(llvm::dbgs() << "computeSliceUnion failed\n");
|
|
return FusionResult::FailPrecondition;
|
|
}
|
|
|
|
return FusionResult::Success;
|
|
}
|