llvm-project/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterfaceTransformOps.cpp
MaheshRavishankar fa19a57b87
[mlir][SCF] Allow using a custom operation to generate loops with mlir::tileUsingSCF. (#159660)
This change adds an option to use a custom operation to generate the
inter-tile loops during tiling. When the loop type is set to
scf::SCFTilingOptions::LoopType::CustomOp, the method
mlir::tileUsingSCF provides two callback functions

First one to generate the header of the loop.
Second one to generate the terminator of the loop.
These methods receive the information needed to generate the
loops/terminator and expect to return information needed to generate
the code for the intra-tile computation. See comments for more
details.

Presently this is adds support only for tiling. Subsequent commits
will update this to add support for fusion as well.

The PR is split into two commits.

The first commit is an NFC that just refactors the code (and cleans up
some naming) to make it easier to add the support for custom loop
operations.
The second commit adds the support for using a custom loop operation, as
well as a test to exercise this path.

Note that this is duplicate of
https://github.com/llvm/llvm-project/pull/159506 that was accidently
committed and was reverted in
https://github.com/llvm/llvm-project/pull/159598 to wait for reviews.

Signed-off-by: MaheshRavishankar
[mahesh.ravishankar@gmail.com](mailto:mahesh.ravishankar@gmail.com)

---------

Signed-off-by: MaheshRavishankar <mahesh.ravishankar@gmail.com>
2025-09-22 15:20:02 -07:00

657 lines
26 KiB
C++

//===- TestTilingInterfaceTransformOps.cpp - Test `TilingInterface` ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines transform dialect operations used for testing
// TilingInterface
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Index/IR/IndexDialect.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
#include "mlir/Dialect/Transform/IR/TransformAttrs.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/Interfaces/TilingInterface.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "test-tiling-interface"
#define GET_OP_CLASSES
#include "TestTilingInterfaceTransformOps.h.inc"
using namespace mlir;
using namespace mlir::transform;
//===----------------------------------------------------------------------===//
// TestFuseAndYieldOp
//===----------------------------------------------------------------------===//
static llvm::SmallDenseSet<Operation *> collectTiledAndFusedOps(Operation *op) {
SmallVector<Operation *> worklist;
llvm::SmallDenseSet<Operation *> producers;
worklist.push_back(op);
producers.insert(op);
while (!worklist.empty()) {
Operation *current = worklist.pop_back_val();
for (OpOperand &operand : current->getOpOperands()) {
Operation *producer = operand.get().getDefiningOp();
if (!producer || !isa<TilingInterface>(producer) ||
producers.contains(producer))
continue;
worklist.push_back(producer);
producers.insert(producer);
}
}
return producers;
}
/// Apply a tile and fuse transformation to all payload ops and store both the
/// tiled operation as well as the created tile loops.
template <typename Range>
static LogicalResult
applyTileAndFuseToAll(RewriterBase &rewriter, Operation *transformOp,
Range &&payloadOps, unsigned numLoops,
scf::SCFTilingOptions tilingOptions,
TransformResults &transformResults) {
SmallVector<Operation *> tiledOps;
SmallVector<SmallVector<Operation *>> loopOps(numLoops);
for (Operation *target : payloadOps) {
auto tilingInterfaceOp = dyn_cast<TilingInterface>(target);
if (!tilingInterfaceOp)
return transformOp->emitError("only TilingInterface ops are supported");
DominanceInfo dominanceInfo(tilingInterfaceOp);
llvm::SmallDenseSet<Operation *> tiledAndFusedOps =
collectTiledAndFusedOps(tilingInterfaceOp);
llvm::DenseSet<Operation *> yieldReplacementsFor;
for (auto op : tiledAndFusedOps) {
if (llvm::any_of(op->getUsers(), [&](Operation *user) {
return dominanceInfo.properlyDominates(tilingInterfaceOp, user);
})) {
yieldReplacementsFor.insert(op);
}
}
scf::SCFTileAndFuseOptions tileAndFuseOptions;
tileAndFuseOptions.setTilingOptions(tilingOptions);
scf::SCFTileAndFuseOptions::ControlFnTy controlFn =
[&](tensor::ExtractSliceOp candidateSliceOp, OpResult originalProducer,
bool isDestinationOperand)
-> std::optional<scf::SCFTileAndFuseOptions::ControlFnResult> {
Operation *owner = originalProducer.getOwner();
bool yieldProducerReplacement = yieldReplacementsFor.contains(owner);
return scf::SCFTileAndFuseOptions::ControlFnResult{
yieldProducerReplacement};
};
tileAndFuseOptions.setFusionControlFn(controlFn);
rewriter.setInsertionPoint(target);
FailureOr<scf::SCFTileAndFuseResult> tiledResults =
scf::tileConsumerAndFuseProducersUsingSCF(rewriter, tilingInterfaceOp,
tileAndFuseOptions);
if (failed(tiledResults))
return failure();
// Perform the replacement of tiled and fused values.
SmallVector<Operation *> opsToReplace{target};
llvm::append_range(opsToReplace, tiledResults->fusedProducers);
for (Operation *toReplace : opsToReplace) {
for (OpResult res : toReplace->getResults())
if (auto replacement = tiledResults->replacements.lookup(res)) {
Operation *replacementOp = replacement.getDefiningOp();
rewriter.replaceUsesWithIf(res, replacement, [&](OpOperand &use) {
Operation *user = use.getOwner();
return dominanceInfo.properlyDominates(replacementOp, user) &&
user->getParentOp() == replacementOp->getParentOp();
});
}
if (toReplace->use_empty()) {
rewriter.eraseOp(toReplace);
}
}
// Report back the relevant handles to the transform op.
tiledOps.push_back(tiledResults->tiledAndFusedOps.front());
assert(tiledResults->loops.size() == numLoops &&
"Mismatched number of loops, tile and fuse transform should have "
"failed");
for (unsigned int i = 0; i < numLoops; ++i)
loopOps[i].push_back(tiledResults->loops[i]);
}
transformResults.set(transformOp->getOpResult(0), tiledOps);
for (unsigned int i = 0; i < numLoops; ++i)
transformResults.set(transformOp->getOpResult(i + 1), loopOps[i]);
return success();
}
DiagnosedSilenceableFailure
transform::TestFuseAndYieldOp::apply(TransformRewriter &rewriter,
TransformResults &transformResults,
TransformState &state) {
SmallVector<int64_t> tileSizes =
extractFromIntegerArrayAttr<int64_t>(getTileSizes());
SmallVector<int64_t> tileInterchange =
extractFromIntegerArrayAttr<int64_t>(getTileInterchange());
SmallVector<OpFoldResult> tileSizesOfr =
getAsIndexOpFoldResult(rewriter.getContext(), tileSizes);
scf::SCFTilingOptions tilingOptions;
tilingOptions.setTileSizes(tileSizesOfr).setInterchange(tileInterchange);
if (getUseForall()) {
tilingOptions.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);
}
LogicalResult result = applyTileAndFuseToAll(
rewriter, getOperation(), state.getPayloadOps(getTarget()),
tileSizes.size() - llvm::count(tileSizes, 0), tilingOptions,
transformResults);
return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()
: DiagnosedSilenceableFailure::success();
}
//===----------------------------------------------------------------------===//
// TestFuseConsumerOp
//===----------------------------------------------------------------------===//
/// Apply fusing of consumer transformation to all payload ops and store both
/// the original consumer operation as well as the fused consumer operation.
static LogicalResult applyFuseConsumer(
RewriterBase &rewriter, Operation *transformOp,
ArrayRef<Operation *> slices, MutableArrayRef<LoopLikeOpInterface> loops,
uint32_t numConsumerToFuse, TransformResults &transformResults) {
SmallVector<Operation *> originalConsumerOps;
SmallVector<Operation *> fusedConsumerOps;
rewriter.setInsertionPoint(slices.front());
while (numConsumerToFuse--) {
FailureOr<scf::SCFFuseConsumerOfSliceResult> fuseConsumerResults =
scf::tileAndFuseConsumerOfSlices(rewriter, slices, loops);
if (failed(fuseConsumerResults))
return slices.front()->emitOpError("failed to fuse consumer of slice");
// Report back the relevant handles to the transform op.
for (OpOperand *origConsumerOperand :
fuseConsumerResults->origConsumerOperands) {
originalConsumerOps.push_back(origConsumerOperand->getOwner());
}
for (OpOperand *tiledAndFusedConsumerOperand :
fuseConsumerResults->tiledAndFusedConsumerOperands) {
fusedConsumerOps.push_back(tiledAndFusedConsumerOperand->getOwner());
}
}
transformResults.set(transformOp->getOpResult(0), originalConsumerOps);
transformResults.set(transformOp->getOpResult(1), fusedConsumerOps);
return success();
}
DiagnosedSilenceableFailure
transform::TestFuseConsumerOp::apply(TransformRewriter &rewriter,
TransformResults &transformResults,
TransformState &state) {
SmallVector<Operation *> slices;
for (auto op : getTargets()) {
auto sliceOp = *state.getPayloadOps(op).begin();
slices.push_back(sliceOp);
}
SmallVector<LoopLikeOpInterface> loops;
for (auto op : llvm::reverse(getLoops())) {
auto loopLikeOp =
dyn_cast<LoopLikeOpInterface>(*state.getPayloadOps(op).begin());
if (!loopLikeOp) {
return DiagnosedSilenceableFailure::definiteFailure();
}
loops.push_back(loopLikeOp);
}
LogicalResult result =
applyFuseConsumer(rewriter, getOperation(), slices, loops,
getNumConsumerToFuse(), transformResults);
return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()
: DiagnosedSilenceableFailure::success();
}
void transform::TestFuseConsumerOp::getEffects(
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
consumesHandle(getTargetsMutable(), effects);
consumesHandle(getLoopsMutable(), effects);
producesHandle(getOperation()->getOpResults(), effects);
modifiesPayload(effects);
}
//===----------------------------------------------------------------------===//
// TestTileUsingForallOp
//===----------------------------------------------------------------------===//
/// Apply a tiling transformation to all payload ops and store both the
/// tiled operation as well as the created tile loops.
template <typename Range>
static LogicalResult
applyTileToAll(RewriterBase &rewriter, Operation *transformOp,
Range &&payloadOps, ArrayRef<OpFoldResult> tileSizes,
ArrayRef<int64_t> interchange, std::optional<ArrayAttr> mapping,
TransformResults &transformResults) {
SmallVector<Operation *> tiledOps;
SmallVector<Operation *> loopOps;
for (Operation *target : payloadOps) {
auto tilingInterfaceOp = dyn_cast<TilingInterface>(target);
if (!tilingInterfaceOp)
return transformOp->emitError("only TilingInterface ops are supported");
scf::SCFTilingOptions tilingOptions;
tilingOptions.setTileSizes(tileSizes).setInterchange(interchange);
if (mapping) {
tilingOptions.setMapping(mapping.value().getValue());
}
tilingOptions.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);
rewriter.setInsertionPoint(target);
FailureOr<scf::SCFTilingResult> tiledResults =
scf::tileUsingSCF(rewriter, tilingInterfaceOp, tilingOptions);
if (failed(tiledResults))
return failure();
// Perform the replacement of tiled and fused values.
rewriter.replaceOp(tilingInterfaceOp, tiledResults->replacements);
// Report back the relevant handles to the transform op.
tiledOps.push_back(tiledResults->tiledOps.front());
for (Operation *loop : tiledResults->loops)
loopOps.push_back(loop);
}
transformResults.set(transformOp->getOpResult(0), tiledOps);
for (auto [index, loop] : llvm::enumerate(loopOps))
transformResults.set(transformOp->getOpResult(index + 1), {loop});
return success();
}
DiagnosedSilenceableFailure
transform::TestTileUsingForallOp::apply(TransformRewriter &rewriter,
TransformResults &transformResults,
TransformState &state) {
SmallVector<int64_t> tileSizes =
extractFromIntegerArrayAttr<int64_t>(getTileSizes());
SmallVector<int64_t> interchange =
extractFromIntegerArrayAttr<int64_t>(getInterchange());
SmallVector<OpFoldResult> tileSizesOfr =
getAsIndexOpFoldResult(rewriter.getContext(), tileSizes);
LogicalResult result =
applyTileToAll(rewriter, getOperation(), state.getPayloadOps(getTarget()),
tileSizesOfr, interchange, getMapping(), transformResults);
return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()
: DiagnosedSilenceableFailure::success();
}
void transform::TestTileUsingForallOp::getEffects(
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
consumesHandle(getTargetMutable(), effects);
producesHandle(getOperation()->getOpResults(), effects);
modifiesPayload(effects);
}
//===----------------------------------------------------------------------===//
// TestFuseUsingForallOp
//===----------------------------------------------------------------------===//
/// Apply a tiling transformation to all payload ops and store both the
/// tiled operation as well as the created tile loops.
template <typename Range>
static LogicalResult applyTilingToAll(
RewriterBase &rewriter, Operation *transformOp, Range &&payloadOps,
unsigned numLoops, TransformResults &transformResults,
function_ref<FailureOr<scf::SCFTileAndFuseResult>(TilingInterface)>
applyFn) {
SmallVector<Operation *> tiledLinalgOps;
SmallVector<SmallVector<Operation *>> loopOps(1);
for (Operation *target : payloadOps) {
auto tilingInterfaceOp = dyn_cast<TilingInterface>(target);
if (!tilingInterfaceOp)
return transformOp->emitError("only TilingInterface ops are supported");
rewriter.setInsertionPoint(target);
FailureOr<scf::SCFTileAndFuseResult> tiledResults =
applyFn(tilingInterfaceOp);
if (failed(tiledResults))
return failure();
// Perform the replacement of tiled and fused values.
SmallVector<Operation *> opsToReplace{target};
llvm::append_range(opsToReplace, tiledResults->fusedProducers);
for (Operation *toReplace : opsToReplace) {
for (OpResult res : toReplace->getResults())
if (auto replacement = tiledResults->replacements.lookup(res))
rewriter.replaceAllUsesWith(res, replacement);
if (toReplace->use_empty())
rewriter.eraseOp(toReplace);
}
// Report back the relevant handles to the transform op.
tiledLinalgOps.push_back(tiledResults->tiledAndFusedOps.front());
assert(tiledResults->loops.size() == 1 &&
cast<scf::ForallOp>(tiledResults->loops[0]).getRank() == numLoops &&
"Mismatched number of loops, tile and fuse transform should have "
"failed");
loopOps[0] = {tiledResults->loops[0]};
}
transformResults.set(transformOp->getOpResult(0), tiledLinalgOps);
if (!loopOps.empty())
transformResults.set(transformOp->getOpResult(1), loopOps[0]);
return success();
}
DiagnosedSilenceableFailure
transform::TestFuseUsingForallOp::apply(TransformRewriter &rewriter,
TransformResults &transformResults,
TransformState &state) {
SmallVector<int64_t> tileSizes =
extractFromIntegerArrayAttr<int64_t>(getTileSizes());
SmallVector<int64_t> tileInterchange =
extractFromIntegerArrayAttr<int64_t>(getInterchange());
scf::SCFTilingOptions tilingOptions;
tilingOptions.interchangeVector = tileInterchange;
SmallVector<OpFoldResult> tileSizesOfr =
getAsIndexOpFoldResult(rewriter.getContext(), tileSizes);
tilingOptions = tilingOptions.setTileSizes(tileSizesOfr);
tilingOptions.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);
scf::SCFTileAndFuseOptions tileAndFuseOptions;
tileAndFuseOptions.tilingOptions = tilingOptions;
LogicalResult result = applyTilingToAll(
rewriter, getOperation(), state.getPayloadOps(getRootOp()),
tileSizes.size() - llvm::count(tileSizes, 0), transformResults,
[&](TilingInterface tilingInterfaceOp)
-> FailureOr<scf::SCFTileAndFuseResult> {
return tileConsumerAndFuseProducersUsingSCF(rewriter, tilingInterfaceOp,
tileAndFuseOptions);
});
return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()
: DiagnosedSilenceableFailure::success();
}
void transform::TestFuseUsingForallOp::getEffects(
SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
consumesHandle(getRootOpMutable(), effects);
producesHandle(getOperation()->getOpResults(), effects);
modifiesPayload(effects);
}
//===----------------------------------------------------------------------===//
// TestTileAndFuseOuterParallelPartialReduction
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure
transform::TestTileAndFuseOuterParallelPartialReductionOp::apply(
TransformRewriter &rewriter, TransformResults &transformResults,
TransformState &state) {
auto target =
dyn_cast<TilingInterface>(*state.getPayloadOps(getRootOp()).begin());
if (!target) {
emitOpError("expected root operation to implement `TilingInterface`");
return DiagnosedSilenceableFailure::definiteFailure();
}
SmallVector<unsigned> reductionDims =
extractFromIntegerArrayAttr<unsigned>(getReductionDims());
if (reductionDims.empty()) {
for (auto [index, iterator] :
llvm::enumerate(target.getLoopIteratorTypes()))
if (iterator == utils::IteratorType::reduction)
reductionDims.push_back(index);
}
if (reductionDims.empty()) {
emitOpError(
"no reduction dimension specified or found in the target operation");
return DiagnosedSilenceableFailure::definiteFailure();
}
SmallVector<int64_t> reductionTileSizes =
extractFromIntegerArrayAttr<int64_t>(getTileSizes());
if (reductionTileSizes.size() != reductionDims.size()) {
emitOpError(
"missing tile sizes for reduction dimensions that are to be tiled");
return DiagnosedSilenceableFailure::definiteFailure();
}
// Adjust tile sizes so that it corresponds to the reduction iterator types.
SmallVector<OpFoldResult> tileSizes;
int reductionTileSizeNum = 0;
OpFoldResult zero = rewriter.getIndexAttr(0);
for (auto iterator : target.getLoopIteratorTypes()) {
if (iterator == utils::IteratorType::parallel) {
tileSizes.push_back(zero);
continue;
}
tileSizes.push_back(
rewriter.getIndexAttr(reductionTileSizes[reductionTileSizeNum++]));
}
scf::SCFTilingOptions tilingOptions;
tilingOptions.setTileSizes(tileSizes)
.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp)
.setReductionTilingStrategy(
ReductionTilingStrategy::PartialReductionOuterParallel)
.setReductionDims(reductionDims);
if (auto mapping = getMapping()) {
tilingOptions.setMapping(getMapping().value());
}
LogicalResult result = applyTileAndFuseToAll(
rewriter, getOperation(), state.getPayloadOps(getRootOp()),
/*numLoops =*/1, tilingOptions, transformResults);
return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()
: DiagnosedSilenceableFailure::success();
}
//===----------------------------------------------------------------------===//
// TestTileAndFuseOuterParallelPartialReduction
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure transform::TestTileUsingCustomLoopOp::apply(
TransformRewriter &transformRewriter, TransformResults &transformResults,
TransformState &state) {
auto target =
dyn_cast<TilingInterface>(*state.getPayloadOps(getRootOp()).begin());
if (!target) {
emitOpError("expected root operation to implement `TilingInterface`");
return DiagnosedSilenceableFailure::definiteFailure();
}
OpFoldResult oneOfr = transformRewriter.getIndexAttr(1);
scf::SCFTilingOptions::GenerateLoopHeaderFn loopHeaderFn =
[&](RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
ArrayRef<OpFoldResult> givenTileSizes,
ValueRange outerDestinationTensors)
-> FailureOr<scf::SCFTilingOptions::CustomLoopHeaderInfo> {
// Check that the strides are all 1 (to make it easier in the test).
if (llvm::any_of(loopRanges, [](Range r) {
return !isConstantIntValue(r.stride, 1);
})) {
return emitOpError("unable to handle loop ranges with strides != 1");
}
// Check number of tile sizes is equal to loop dimensions.
if (loopRanges.size() != givenTileSizes.size()) {
return emitOpError("expected number of tile sizes to be same as the "
"number of loops in the operation");
}
// For testing disallow any of the tile sizes being 0.
if (llvm::any_of(givenTileSizes, isZeroInteger)) {
return emitOpError("unhandled case of zero tile size");
}
// For testing, only handle tensor tiling.
if (outerDestinationTensors.empty()) {
return emitOpError("expected destination tensors");
}
// Compute the number of iterations for each of the loops.
AffineExpr s0, s1, s2;
bindSymbols(rewriter.getContext(), s0, s1, s2);
AffineExpr numItersExpr = (s1 - s0).ceilDiv(s2); // (ub - lb) / tileSize
SmallVector<OpFoldResult> allNumIters;
allNumIters.reserve(loopRanges.size());
for (auto [loopRange, tileSize] :
llvm::zip_equal(loopRanges, givenTileSizes)) {
OpFoldResult numIters = affine::makeComposedFoldedAffineApply(
rewriter, loc, numItersExpr,
{loopRange.offset, loopRange.size, tileSize});
allNumIters.push_back(numIters);
}
if (allNumIters.empty()) {
return emitOpError("invalid empty tile sizes and loop ranges");
}
AffineExpr mulExpr = s0 * s1;
OpFoldResult cumulative = oneOfr;
for (auto numIters : allNumIters) {
cumulative = affine::makeComposedFoldedAffineApply(
rewriter, loc, mulExpr, {cumulative, numIters});
}
Value zeroVal = arith::ConstantIndexOp::create(rewriter, loc, 0);
Value oneVal = arith::ConstantIndexOp::create(rewriter, loc, 1);
Value ub = getValueOrCreateConstantIndexOp(rewriter, loc, cumulative);
SmallVector<OpFoldResult> offsets;
SmallVector<OpFoldResult> sizes;
SmallVector<Value> innerDestinationTensors;
offsets.reserve(loopRanges.size());
sizes.reserve(loopRanges.size());
AffineExpr d0;
bindDims(rewriter.getContext(), d0);
AffineExpr offsetExpr = s0 + d0 * s1; // lb + iv * tileSize
AffineMap minMap =
AffineMap::get(1, 2, {s0 - d0, s1},
rewriter.getContext()); // min(ub - offset, tileSize)
auto forOp = scf::ForOp::create(
rewriter, loc, zeroVal, ub, oneVal, outerDestinationTensors,
[&](OpBuilder &b, Location bodyLoc, Value linearizedIv,
ValueRange destinations) {
auto delinearizeOp = affine::AffineDelinearizeIndexOp::create(
b, bodyLoc, linearizedIv, allNumIters);
for (auto [normalizedIv, range, tileSize] : llvm::zip_equal(
delinearizeOp.getResults(), loopRanges, givenTileSizes)) {
OpFoldResult normalizedIvOfr = getAsOpFoldResult(normalizedIv);
OpFoldResult offset = affine::makeComposedFoldedAffineApply(
b, bodyLoc, offsetExpr,
{normalizedIvOfr, range.offset, tileSize});
offsets.push_back(offset);
OpFoldResult size = affine::makeComposedFoldedAffineMin(
b, bodyLoc, minMap, {offset, range.size, tileSize});
sizes.push_back(size);
}
innerDestinationTensors = llvm::to_vector(destinations);
});
rewriter.setInsertionPointToEnd(forOp.getBody());
return scf::SCFTilingOptions::CustomLoopHeaderInfo{
{cast<LoopLikeOpInterface>(forOp.getOperation())},
offsets,
sizes,
innerDestinationTensors};
};
scf::SCFTilingOptions::GenerateLoopTerminatorFn terminatorFn =
[&](RewriterBase &rewriter, Location loc, ValueRange tiledResults,
ArrayRef<SmallVector<OpFoldResult>> resultOffsets,
ArrayRef<SmallVector<OpFoldResult>> resultSizes,
ValueRange destinationTensors) -> LogicalResult {
SmallVector<Value> yieldValues;
yieldValues.reserve(destinationTensors.size());
for (auto [tiledResult, offsets, sizes, destination] : llvm::zip_equal(
tiledResults, resultOffsets, resultSizes, destinationTensors)) {
SmallVector<OpFoldResult> strides(offsets.size(), oneOfr);
Value insertedVal = tensor::InsertSliceOp::create(
rewriter, loc, tiledResult, destination, offsets, sizes, strides);
yieldValues.push_back(insertedVal);
}
scf::YieldOp::create(rewriter, loc, yieldValues);
return success();
};
scf::SCFTilingOptions tilingOptions;
SmallVector<int64_t> staticTileSizes =
extractFromIntegerArrayAttr<int64_t>(getTileSizes());
SmallVector<OpFoldResult> tileSizes =
getAsIndexOpFoldResult(transformRewriter.getContext(), staticTileSizes);
tilingOptions.setTileSizes(tileSizes)
.setLoopType(scf::SCFTilingOptions::LoopType::CustomOp)
.setCustomLoopGenerationFns(loopHeaderFn, terminatorFn);
OpBuilder::InsertionGuard g(transformRewriter);
transformRewriter.setInsertionPoint(target);
FailureOr<scf::SCFTilingResult> tiledResults =
scf::tileUsingSCF(transformRewriter, target, tilingOptions);
if (failed(tiledResults)) {
return DiagnosedSilenceableFailure::definiteFailure();
}
transformRewriter.replaceOp(target, tiledResults->replacements);
transformResults.set(getOperation()->getResult(0), tiledResults->tiledOps);
transformResults.set(getOperation()->getResult(1), tiledResults->loops);
return DiagnosedSilenceableFailure::success();
}
#define GET_OP_CLASSES
#include "TestTilingInterfaceTransformOps.cpp.inc"
namespace {
class TestTilingInterfaceDialectExtension
: public transform::TransformDialectExtension<
TestTilingInterfaceDialectExtension> {
public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
TestTilingInterfaceDialectExtension)
using Base::Base;
void init() {
declareDependentDialect<affine::AffineDialect>();
declareDependentDialect<index::IndexDialect>();
declareDependentDialect<scf::SCFDialect>();
declareDependentDialect<tensor::TensorDialect>();
registerTransformOps<
#define GET_OP_LIST
#include "TestTilingInterfaceTransformOps.cpp.inc"
>();
}
};
} // namespace
namespace test {
void registerTestTilingInterfaceTransformDialectExtension(
DialectRegistry &registry) {
registry.addExtensions<TestTilingInterfaceDialectExtension>();
}
} // namespace test