llvm-project/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
Tres Popp 68f58812e3 [mlir] Move casting calls from methods to function calls
The MLIR classes Type/Attribute/Operation/Op/Value support
cast/dyn_cast/isa/dyn_cast_or_null functionality through llvm's doCast
functionality in addition to defining methods with the same name.
This change begins the migration of uses of the method to the
corresponding function call as has been decided as more consistent.

Note that there still exist classes that only define methods directly,
such as AffineExpr, and this does not include work currently to support
a functional cast/isa call.

Context:
- https://mlir.llvm.org/deprecation/ at "Use the free function variants
  for dyn_cast/cast/isa/…"
- Original discussion at https://discourse.llvm.org/t/preferred-casting-style-going-forward/68443

Implementation:
This patch updates all remaining uses of the deprecated functionality in
mlir/. This was done with clang-tidy as described below and further
modifications to GPUBase.td and OpenMPOpsInterfaces.td.

Steps are described per line, as comments are removed by git:
0. Retrieve the change from the following to build clang-tidy with an
   additional check:
   main...tpopp:llvm-project:tidy-cast-check
1. Build clang-tidy
2. Run clang-tidy over your entire codebase while disabling all checks
   and enabling the one relevant one. Run on all header files also.
3. Delete .inc files that were also modified, so the next build rebuilds
   them to a pure state.

```
ninja -C $BUILD_DIR clang-tidy

run-clang-tidy -clang-tidy-binary=$BUILD_DIR/bin/clang-tidy -checks='-*,misc-cast-functions'\
               -header-filter=mlir/ mlir/* -fix

rm -rf $BUILD_DIR/tools/mlir/**/*.inc
```

Differential Revision: https://reviews.llvm.org/D151542
2023-05-26 10:29:55 +02:00

1167 lines
48 KiB
C++

//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Bufferization/IR/DstBufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Operation.h"
using namespace mlir;
using namespace mlir::bufferization;
using namespace mlir::tensor;
namespace mlir {
namespace tensor {
namespace {
struct CastOpInterface
: public BufferizableOpInterface::ExternalModel<CastOpInterface,
tensor::CastOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {{op->getResult(0), BufferRelation::Equivalent}};
}
FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
auto castOp = cast<tensor::CastOp>(op);
auto maybeSrcBufferType =
bufferization::getBufferType(castOp.getSource(), options, fixedTypes);
if (failed(maybeSrcBufferType))
return failure();
Attribute memorySpace = maybeSrcBufferType->getMemorySpace();
// Note: `getMemRefTypeWithFullyDynamicLayout` returns an unranked memref
// type in case the input is an unranked tensor type.
// Case 1: Casting an unranked tensor
if (isa<UnrankedTensorType>(castOp.getSource().getType())) {
// When casting to a ranked tensor, we cannot infer any static offset or
// strides from the source. Assume fully dynamic.
return getMemRefTypeWithFullyDynamicLayout(castOp.getType(), memorySpace);
}
// Case 2: Casting to an unranked tensor type
if (isa<UnrankedTensorType>(castOp.getType())) {
return getMemRefTypeWithFullyDynamicLayout(castOp.getType(), memorySpace);
}
// Case 3: Ranked tensor -> ranked tensor. The offsets and strides do not
// change.
auto rankedResultType = cast<RankedTensorType>(castOp.getType());
return MemRefType::get(
rankedResultType.getShape(), rankedResultType.getElementType(),
llvm::cast<MemRefType>(*maybeSrcBufferType).getLayout(), memorySpace);
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto castOp = cast<tensor::CastOp>(op);
// The result buffer still has the old (pre-cast) type.
FailureOr<Value> resultBuffer =
getBuffer(rewriter, castOp.getSource(), options);
if (failed(resultBuffer))
return failure();
// Compute the new type.
auto resultMemRefType =
bufferization::getBufferType(castOp.getResult(), options);
if (failed(resultMemRefType))
return failure();
if (resultBuffer->getType() == *resultMemRefType) {
// This cast is a no-op.
replaceOpWithBufferizedValues(rewriter, op, *resultBuffer);
return success();
}
// Replace the op with a memref.cast.
assert(memref::CastOp::areCastCompatible(resultBuffer->getType(),
*resultMemRefType) &&
"CallOp::bufferize: cast incompatible");
replaceOpWithNewBufferizedOp<memref::CastOp>(
rewriter, op, *resultMemRefType, *resultBuffer);
return success();
}
};
/// Bufferization of tensor.collapse_shape. Replace with memref.collapse_shape.
struct CollapseShapeOpInterface
: public BufferizableOpInterface::ExternalModel<CollapseShapeOpInterface,
tensor::CollapseShapeOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
// TODO: CollapseShapeOp may allocate at runtime.
return {{op->getOpResult(0), BufferRelation::Equivalent}};
}
FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
auto collapseShapeOp = cast<tensor::CollapseShapeOp>(op);
auto maybeSrcBufferType = bufferization::getBufferType(
collapseShapeOp.getSrc(), options, fixedTypes);
if (failed(maybeSrcBufferType))
return failure();
auto srcBufferType = llvm::cast<MemRefType>(*maybeSrcBufferType);
bool canBeCollapsed = memref::CollapseShapeOp::isGuaranteedCollapsible(
srcBufferType, collapseShapeOp.getReassociationIndices());
if (!canBeCollapsed) {
// If dims cannot be collapsed, this op bufferizes to a new allocation.
RankedTensorType tensorResultType = collapseShapeOp.getResultType();
return bufferization::getMemRefTypeWithStaticIdentityLayout(
tensorResultType, srcBufferType.getMemorySpace());
}
return memref::CollapseShapeOp::computeCollapsedType(
srcBufferType, collapseShapeOp.getReassociationIndices());
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto collapseShapeOp = cast<tensor::CollapseShapeOp>(op);
RankedTensorType tensorResultType = collapseShapeOp.getResultType();
FailureOr<Value> maybeBuffer =
getBuffer(rewriter, collapseShapeOp.getSrc(), options);
if (failed(maybeBuffer))
return failure();
Value buffer = *maybeBuffer;
auto bufferType = cast<MemRefType>(buffer.getType());
if (tensorResultType.getRank() == 0) {
// 0-d collapses must go through a different op builder.
MemRefType resultType;
if (bufferType.getLayout().isIdentity()) {
// Standard layout: result type has no offset.
MemRefLayoutAttrInterface layout;
resultType = MemRefType::get({}, tensorResultType.getElementType(),
layout, bufferType.getMemorySpace());
} else {
// Source memref has a layout map: result type has the same offset as
// the source type.
SmallVector<int64_t> strides;
int64_t offset;
if (failed(getStridesAndOffset(bufferType, strides, offset)))
return failure();
resultType = MemRefType::get(
{}, tensorResultType.getElementType(),
StridedLayoutAttr::get(op->getContext(), offset, {}),
bufferType.getMemorySpace());
}
replaceOpWithNewBufferizedOp<memref::CollapseShapeOp>(
rewriter, op, resultType, buffer, collapseShapeOp.getReassociation());
return success();
}
// If the dims are not collapsible (due to an incompatible source layout
// map), force an out-of-place bufferization, i.e., a buffer copy. This
// newly allocated buffer will have no layout map and thus be collapsible.
bool canBeCollapsed = memref::CollapseShapeOp::isGuaranteedCollapsible(
bufferType, collapseShapeOp.getReassociationIndices());
if (!canBeCollapsed) {
// TODO: Create alloc_tensor ops during TensorCopyInsertion.
AnalysisState analysisState(options);
FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
rewriter, op->getLoc(), collapseShapeOp.getSrc(),
analysisState.isTensorYielded(collapseShapeOp.getResult()), options);
if (failed(tensorAlloc))
return failure();
auto memrefType =
MemRefType::get(collapseShapeOp.getSrcType().getShape(),
collapseShapeOp.getSrcType().getElementType(),
AffineMap(), bufferType.getMemorySpace());
buffer = rewriter.create<bufferization::ToMemrefOp>(
op->getLoc(), memrefType, *tensorAlloc);
}
// Result type is inferred by the builder.
replaceOpWithNewBufferizedOp<memref::CollapseShapeOp>(
rewriter, op, buffer, collapseShapeOp.getReassociationIndices());
return success();
}
};
/// Bufferization of tensor.dim. Replace with memref.dim.
struct DimOpInterface
: public BufferizableOpInterface::ExternalModel<DimOpInterface,
tensor::DimOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
// The op reads the tensor's metadata but not its contents.
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto dimOp = cast<tensor::DimOp>(op);
FailureOr<Value> v = getBuffer(rewriter, dimOp.getSource(), options);
if (failed(v))
return failure();
replaceOpWithNewBufferizedOp<memref::DimOp>(rewriter, op, *v,
dimOp.getIndex());
return success();
}
};
/// Bufferization of tensor.empty. This op does not bufferize, but we need an
/// interface implementation, so that the result of this op is considered
/// "writable" (default impl. of `isWritable`). Results of ops that do not
/// implement `BufferizableOpInterface` are not writable.
struct EmptyOpInterface
: public BufferizableOpInterface::ExternalModel<EmptyOpInterface,
tensor::EmptyOp> {
bool resultBufferizesToMemoryWrite(Operation *op, OpResult opResult,
const AnalysisState &state) const {
// The returned tensor does not have specified contents.
return false;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
// tensor.empty ops are used to indicate the shape of a tensor. They have
// no defined contents and cannot be bufferized. However, they can be
// converted to bufferization.alloc_tensor ops, which then bufferize to an
// allocation (--empty-tensor-to-alloc-tensor).
return op->emitOpError("cannot be bufferized, but can be converted to "
"bufferization.alloc_tensor");
}
};
/// Bufferization of tensor.expand_shape. Replace with memref.expand_shape.
struct ExpandShapeOpInterface
: public BufferizableOpInterface::ExternalModel<ExpandShapeOpInterface,
tensor::ExpandShapeOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {{op->getOpResult(0), BufferRelation::Equivalent}};
}
FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
auto expandShapeOp = cast<tensor::ExpandShapeOp>(op);
auto maybeSrcBufferType = bufferization::getBufferType(
expandShapeOp.getSrc(), options, fixedTypes);
if (failed(maybeSrcBufferType))
return failure();
auto srcBufferType = llvm::cast<MemRefType>(*maybeSrcBufferType);
auto maybeResultType = memref::ExpandShapeOp::computeExpandedType(
srcBufferType, expandShapeOp.getResultType().getShape(),
expandShapeOp.getReassociationIndices());
if (failed(maybeResultType))
return failure();
return *maybeResultType;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto expandShapeOp = cast<tensor::ExpandShapeOp>(op);
auto tensorResultType = expandShapeOp.getResultType();
FailureOr<Value> buffer =
getBuffer(rewriter, expandShapeOp.getSrc(), options);
if (failed(buffer))
return failure();
// Memref result type is inferred by the builder based on reassociation
// indices and result shape.
replaceOpWithNewBufferizedOp<memref::ExpandShapeOp>(
rewriter, op, tensorResultType.getShape(), *buffer,
expandShapeOp.getReassociationIndices());
return success();
}
};
/// Bufferization of tensor.extract_slice. Replace with memref.subview.
struct ExtractSliceOpInterface
: public BufferizableOpInterface::ExternalModel<ExtractSliceOpInterface,
tensor::ExtractSliceOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {{op->getOpResult(0), BufferRelation::Unknown}};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto extractSliceOp = cast<tensor::ExtractSliceOp>(op);
SmallVector<OpFoldResult> mixedOffsets = extractSliceOp.getMixedOffsets();
SmallVector<OpFoldResult> mixedSizes = extractSliceOp.getMixedSizes();
SmallVector<OpFoldResult> mixedStrides = extractSliceOp.getMixedStrides();
Location loc = extractSliceOp.getLoc();
// Get source buffer.
FailureOr<Value> srcMemref =
getBuffer(rewriter, extractSliceOp.getSource(), options);
if (failed(srcMemref))
return failure();
// Take a subview of the source buffer.
auto resultMemrefType =
bufferization::getBufferType(extractSliceOp.getResult(), options);
if (failed(resultMemrefType))
return failure();
Value subView = rewriter.create<memref::SubViewOp>(
loc, llvm::cast<MemRefType>(*resultMemrefType), *srcMemref, mixedOffsets,
mixedSizes, mixedStrides);
replaceOpWithBufferizedValues(rewriter, op, subView);
return success();
}
FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
auto extractSliceOp = cast<tensor::ExtractSliceOp>(op);
assert(value == extractSliceOp.getResult() && "invalid value");
auto srcMemrefType = bufferization::getBufferType(
extractSliceOp.getSource(), options, fixedTypes);
if (failed(srcMemrefType))
return failure();
SmallVector<OpFoldResult> mixedOffsets = extractSliceOp.getMixedOffsets();
SmallVector<OpFoldResult> mixedSizes = extractSliceOp.getMixedSizes();
SmallVector<OpFoldResult> mixedStrides = extractSliceOp.getMixedStrides();
return cast<BaseMemRefType>(memref::SubViewOp::inferRankReducedResultType(
extractSliceOp.getType().getShape(), llvm::cast<MemRefType>(*srcMemrefType),
mixedOffsets, mixedSizes, mixedStrides));
}
};
/// Bufferization of tensor.extract. Replace with memref.load.
struct ExtractOpInterface
: public BufferizableOpInterface::ExternalModel<ExtractOpInterface,
tensor::ExtractOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto extractOp = cast<tensor::ExtractOp>(op);
FailureOr<Value> srcMemref =
getBuffer(rewriter, extractOp.getTensor(), options);
if (failed(srcMemref))
return failure();
replaceOpWithNewBufferizedOp<memref::LoadOp>(rewriter, op, *srcMemref,
extractOp.getIndices());
return success();
}
};
// Implements backtracking to traverse indices of the output buffer while
// iterating over op.elements().
static void createStores(RewriterBase &rewriter, Location loc, int dim,
Value buffer, ArrayRef<int64_t> shape,
ArrayRef<Value> constants,
OperandRange::iterator &elementIt,
SmallVectorImpl<Value> &indices) {
if (dim == static_cast<int>(shape.size()) - 1) {
for (int i = 0; i < shape.back(); ++i) {
indices.back() = constants[i];
rewriter.create<memref::StoreOp>(loc, *elementIt, buffer, indices);
++elementIt;
}
return;
}
for (int i = 0; i < shape[dim]; ++i) {
indices[dim] = constants[i];
createStores(rewriter, loc, dim + 1, buffer, shape, constants, elementIt,
indices);
}
}
/// Bufferization of tensor.from_elements.
struct FromElementsOpInterface
: public BufferizableOpInterface::ExternalModel<FromElementsOpInterface,
tensor::FromElementsOp> {
bool bufferizesToAllocation(Operation *op, OpResult opResult) const {
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto fromElementsOp = cast<tensor::FromElementsOp>(op);
// Should the buffer be deallocated?
bool dealloc = shouldDeallocateOpResult(
cast<OpResult>(fromElementsOp.getResult()), options);
// TODO: Implement memory space for this op.
if (options.defaultMemorySpace != Attribute())
return op->emitError("memory space not implemented yet");
// Allocate a buffer for the result.
Location loc = op->getLoc();
auto tensorType = cast<RankedTensorType>(fromElementsOp.getType());
auto shape = tensorType.getShape();
// TODO: Create alloc_tensor ops during TensorCopyInsertion.
FailureOr<Value> tensorAlloc =
allocateTensorForShapedValue(rewriter, loc, fromElementsOp.getResult(),
/*escape=*/!dealloc, options,
/*copy=*/false);
if (failed(tensorAlloc))
return failure();
auto memrefType =
MemRefType::get(tensorType.getShape(), tensorType.getElementType());
Value buffer = rewriter.create<bufferization::ToMemrefOp>(
op->getLoc(), memrefType, *tensorAlloc);
// Case: tensor<0xelem_type>.
if (fromElementsOp.getElements().empty()) {
replaceOpWithBufferizedValues(rewriter, op, buffer);
return success();
}
// Case: tensor<elem_type>.
if (shape.empty()) {
rewriter.create<memref::StoreOp>(
loc, fromElementsOp.getElements().front(), buffer);
replaceOpWithBufferizedValues(rewriter, op, buffer);
return success();
}
// Create constants for the range of possible indices [0, max{shape_i}).
auto maxDim = *std::max_element(shape.begin(), shape.end());
SmallVector<Value, 2> constants;
constants.reserve(maxDim);
for (int i = 0; i < maxDim; ++i)
constants.push_back(rewriter.create<arith::ConstantIndexOp>(loc, i));
// Traverse all `elements` and create `memref.store` ops.
auto elementIt = fromElementsOp.getElements().begin();
SmallVector<Value, 2> indices(tensorType.getRank(), constants[0]);
createStores(rewriter, loc, /*dim=*/0, buffer, shape, constants, elementIt,
indices);
replaceOpWithBufferizedValues(rewriter, op, buffer);
return success();
}
};
/// Lower the body of a tensor.generate like op (one index-typed bbArg per dim).
/// Such ops are lowered to linalg.map with the given tensor as a destination.
///
/// Example:
/// ```
/// %r = tensor.generate %x, %y {
/// ^bb0(%arg0: index, %arg1: index):
/// %0 = "some_op"(%arg0, %arg1) : (index, index) -> (index)
/// tensor.yield %0 : index
/// } : tensor<?x?xindex>
/// ```
///
/// Is lowered to:
/// ```
/// linalg.map ins() outs(%dest) {
/// %d0 = linalg.index 0 : index
/// %d1 = linalg.index 1 : index
/// %0 = "some_op"(%d0, %d1) : (index, index) -> (index)
/// linalg.yield %0 : index
/// }
/// ```
static Value lowerGenerateLikeOpBody(RewriterBase &rewriter, Location loc,
Value tensorDestination,
ValueRange dynamicSizes,
Region &generateBody) {
assert(generateBody.hasOneBlock() && "expected body with single block");
auto tensorType = cast<RankedTensorType>(tensorDestination.getType());
assert(generateBody.getNumArguments() == tensorType.getRank() &&
"rank mismatch");
// Create linalg::MapOp.
OpBuilder::InsertionGuard g(rewriter);
auto linalgOp =
rewriter.create<linalg::MapOp>(loc, tensorType, /*inputs=*/ValueRange(),
/*init=*/tensorDestination);
Block &linalgBody = linalgOp.getMapper().emplaceBlock();
// Create linalg::IndexOps.
rewriter.setInsertionPointToStart(&linalgBody);
SmallVector<Value> indices;
for (int64_t dim = 0; dim < tensorType.getRank(); ++dim)
indices.push_back(rewriter.create<linalg::IndexOp>(loc, dim));
// Move over body.
rewriter.mergeBlocks(&generateBody.front(), &linalgBody, indices);
auto yieldOp = cast<tensor::YieldOp>(linalgBody.getTerminator());
rewriter.replaceOpWithNewOp<linalg::YieldOp>(yieldOp, yieldOp.getValue());
return linalgOp.getResult()[0];
}
/// Bufferization of tensor.generate.
struct GenerateOpInterface
: public BufferizableOpInterface::ExternalModel<GenerateOpInterface,
tensor::GenerateOp> {
bool bufferizesToAllocation(Operation *op, OpResult opResult) const {
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto generateOp = cast<tensor::GenerateOp>(op);
// Should the buffer be deallocated?
bool dealloc = shouldDeallocateOpResult(
cast<OpResult>(generateOp.getResult()), options);
// TODO: Implement memory space for this op.
if (options.defaultMemorySpace != Attribute())
return op->emitError("memory space not implemented yet");
// Allocate memory.
Location loc = op->getLoc();
FailureOr<Value> tensorAlloc =
allocateTensorForShapedValue(rewriter, loc, generateOp.getResult(),
/*escape=*/!dealloc, options,
/*copy=*/false);
if (failed(tensorAlloc))
return failure();
Value result = lowerGenerateLikeOpBody(rewriter, loc, *tensorAlloc,
generateOp.getDynamicExtents(),
generateOp.getBody());
rewriter.replaceOp(generateOp, result);
return success();
}
};
/// Bufferization of tensor.insert. Replace with memref.store.
///
/// Note: DstBufferizableOpInterfaceExternalModel provides many default method
/// implementations for DestinationStyle ops.
struct InsertOpInterface
: public DstBufferizableOpInterfaceExternalModel<InsertOpInterface,
tensor::InsertOp> {
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto insertOp = cast<tensor::InsertOp>(op);
FailureOr<Value> destMemref =
getBuffer(rewriter, insertOp.getDest(), options);
if (failed(destMemref))
return failure();
rewriter.create<memref::StoreOp>(insertOp.getLoc(), insertOp.getScalar(),
*destMemref, insertOp.getIndices());
replaceOpWithBufferizedValues(rewriter, op, *destMemref);
return success();
}
};
/// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e.
/// equivalent operand / result and same offset/sizes/strides specification).
template <typename OpTy>
static bool areEquivalentSlices(const AnalysisState &state,
ExtractSliceOp extractSliceOp,
OpTy insertSliceOp) {
if (!extractSliceOp || !insertSliceOp)
return false;
if (extractSliceOp != insertSliceOp &&
!state.areEquivalentBufferizedValues(extractSliceOp.getSource(),
insertSliceOp.getDest()))
return false;
if (!sameOffsetsSizesAndStrides(extractSliceOp, insertSliceOp,
isEqualConstantIntOrValue))
return false;
return true;
}
/// Return true if `value` is originating from an ExtractSliceOp that matches
/// the given InsertSliceOp.
template <typename OpTy>
static bool matchesInsertDestination(const AnalysisState &state, Value value,
OpTy insertSliceOp) {
// Look for matching slices.
auto matchesSlice = [&](Value val) {
if (auto extractSliceOp = val.getDefiningOp<ExtractSliceOp>())
if (areEquivalentSlices(state, extractSliceOp, insertSliceOp))
return true;
return false;
};
return static_cast<bool>(llvm::all_of(
state.findValueInReverseUseDefChain(value, matchesSlice), matchesSlice));
}
template <typename OpTy>
static bool isNotConflictingInsertSliceLikeOp(Operation *op, OpOperand *uRead,
OpOperand *uConflictingWrite,
const AnalysisState &state) {
Operation *readingOp = uRead->getOwner();
Operation *conflictingWritingOp = uConflictingWrite->getOwner();
// Special rules for matching ExtractSliceOp/InsertSliceOp pairs. If
// uRead is an InsertSliceOp...
if (auto insertSliceOp = dyn_cast<OpTy>(readingOp)) {
// As an example, consider the following IR.
//
// %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
// %1 = linalg.fill %cst, %0 {inplace= [true] }
// %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
// {inplace= [true] }
// TODO: Use insertSliceOp.getDestOpOperand etc. when available.
if (uRead == &insertSliceOp->getOpOperand(1) /*dest*/ &&
matchesInsertDestination(state, uConflictingWrite->get(),
insertSliceOp))
// Case 1: The main insight is that InsertSliceOp reads only part of
// the destination tensor. The overwritten area is not read. If
// uConflictingWrite writes into exactly the memory location that is
// being read by uRead, this is not a conflict.
//
// In the above example:
// uRead = OpOperand 1 (%t) of tensor.insert_slice
// uConflictingWrite = OpOperand 1 (%0) of linalg.fill
//
// The read of %t does not conflict with the write of the FillOp
// (same aliases!) because the area that the FillOp operates on is
// exactly the one that is *not* read via %t.
return true;
if (uRead == &insertSliceOp->getOpOperand(0) /*source*/ &&
uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
matchesInsertDestination(state, uRead->get(), insertSliceOp))
// Case 2: The read of the source tensor and the write to the dest
// tensor via an InsertSliceOp is not a conflict if the read is
// reading exactly that part of an equivalent tensor that the
// InsertSliceOp is writing.
//
// In the above example:
// uRead = OpOperand 0 (%1) of tensor.insert_slice
// uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
return true;
}
// If uConflictingWrite is an InsertSliceOp...
if (auto insertSliceOp = dyn_cast<OpTy>(conflictingWritingOp))
// As an example, consider the following IR.
//
// %0 = tensor.extract_slice %t[%a, %b][%c, %d][1, 1] {inplace = [true] }
// %1 = linalg.fill %cst, %0 {inplace= [true] }
// %2 = tensor.insert_slice %1 into %t[%a, %b][%c, %d][1, 1]
// {inplace= [true] }
// %3 = vector.transfer_read %1, %cst
//
// In the above example:
// uRead = OpOperand 0 (%1) of vector.transfer_read
// uConflictingWrite = OpOperand 1 (%t) of tensor.insert_slice
// definition = %1
//
// This is not a conflict because the InsertSliceOp overwrites the
// memory segment of %1 with the exact same data. (Effectively, there
// is no memory write here.)
if (uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ &&
state.areEquivalentBufferizedValues(uRead->get(),
insertSliceOp.getSource()) &&
matchesInsertDestination(state, insertSliceOp.getSource(),
insertSliceOp))
return true;
return false;
}
/// Bufferization of tensor.insert_slice. Replace with a memory copy. Under
/// certain circumstances, this op can also be a no-op.
///
/// Note: DstBufferizableOpInterfaceExternalModel provides many default method
/// implementations for DestinationStyle ops.
struct InsertSliceOpInterface
: public DstBufferizableOpInterfaceExternalModel<InsertSliceOpInterface,
tensor::InsertSliceOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
RankedTensorType destType = insertSliceOp.getDestType();
// The source is always read.
if (&opOperand == &op->getOpOperand(0) /*src*/)
return true;
// For the destination, it depends...
assert(&opOperand == &insertSliceOp->getOpOperand(1) && "expected dest");
// Dest is not read if it is entirely overwritten. E.g.:
// tensor.insert_slice %a into %t[0][10][1] : ... into tensor<10xf32>
bool allOffsetsZero =
llvm::all_of(insertSliceOp.getMixedOffsets(), [](OpFoldResult ofr) {
return isConstantIntValue(ofr, 0);
});
bool sizesMatchDestSizes = llvm::all_of(
llvm::enumerate(insertSliceOp.getMixedSizes()), [&](const auto &it) {
return getConstantIntValue(it.value()) ==
destType.getDimSize(it.index());
});
bool allStridesOne =
llvm::all_of(insertSliceOp.getMixedStrides(), [](OpFoldResult ofr) {
return isConstantIntValue(ofr, 1);
});
return !(allOffsetsZero && sizesMatchDestSizes && allStridesOne);
}
bool isNotConflicting(Operation *op, OpOperand *uRead,
OpOperand *uConflictingWrite,
const AnalysisState &state) const {
return isNotConflictingInsertSliceLikeOp<tensor::InsertSliceOp>(
op, uRead, uConflictingWrite, state);
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
// insert_slice ops arise from tiling and bufferizing them out-of-place is
// generally a deal breaker. When used with loops, this ends up cloning the
// whole tensor on every single iteration and is a symptom of a
// catastrophically bad scheduling decision.
// TODO: be very loud about it or even consider failing the pass.
auto insertSliceOp = cast<tensor::InsertSliceOp>(op);
SmallVector<OpFoldResult> mixedOffsets = insertSliceOp.getMixedOffsets();
SmallVector<OpFoldResult> mixedSizes = insertSliceOp.getMixedSizes();
SmallVector<OpFoldResult> mixedStrides = insertSliceOp.getMixedStrides();
Location loc = insertSliceOp.getLoc();
// Get destination buffer.
FailureOr<Value> dstMemref =
getBuffer(rewriter, insertSliceOp.getDest(), options);
if (failed(dstMemref))
return failure();
// Take a subview of the destination buffer.
auto dstMemrefType = cast<MemRefType>(dstMemref->getType());
auto subviewMemRefType =
cast<MemRefType>(memref::SubViewOp::inferRankReducedResultType(
insertSliceOp.getSourceType().getShape(), dstMemrefType,
mixedOffsets, mixedSizes, mixedStrides));
Value subView = rewriter.create<memref::SubViewOp>(
loc, subviewMemRefType, *dstMemref, mixedOffsets, mixedSizes,
mixedStrides);
// Copy tensor. If this tensor.insert_slice has a matching
// tensor.extract_slice, the copy operation will eventually fold away.
FailureOr<Value> srcMemref =
getBuffer(rewriter, insertSliceOp.getSource(), options);
if (failed(srcMemref))
return failure();
if (failed(options.createMemCpy(rewriter, loc, *srcMemref, subView)))
return failure();
replaceOpWithBufferizedValues(rewriter, op, *dstMemref);
return success();
}
};
/// Bufferization of tensor.pad. Replace with bufferization.alloc_tensor +
/// linalg.map + insert_slice.
/// For best performance, vectorize before bufferization (better performance in
/// case of padding with a constant).
struct PadOpInterface
: public BufferizableOpInterface::ExternalModel<PadOpInterface,
tensor::PadOp> {
bool bufferizesToAllocation(Operation *op, OpResult opResult) const {
return true;
}
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
// Infer memory space from the source tensor.
auto padOp = cast<tensor::PadOp>(op);
auto maybeSrcBufferType =
bufferization::getBufferType(padOp.getSource(), options, fixedTypes);
if (failed(maybeSrcBufferType))
return failure();
MemRefLayoutAttrInterface layout;
return MemRefType::get(padOp.getResultType().getShape(),
padOp.getResultType().getElementType(), layout,
maybeSrcBufferType->getMemorySpace());
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto padOp = cast<tensor::PadOp>(op);
Location loc = padOp.getLoc();
RankedTensorType resultType = padOp.getResultType();
RankedTensorType srcType = padOp.getSourceType();
auto toValue = [&](OpFoldResult ofr) {
if (ofr.is<Value>())
return ofr.get<Value>();
return rewriter
.create<arith::ConstantIndexOp>(loc, *getConstantIntValue(ofr))
.getResult();
};
// Compute dynamic result dimensions.
SmallVector<OpFoldResult> mixedLowPad = padOp.getMixedLowPad();
SmallVector<OpFoldResult> mixedHighPad = padOp.getMixedHighPad();
SmallVector<Value> dynamicSizes;
for (int64_t i = 0; i < resultType.getRank(); ++i) {
if (!resultType.isDynamicDim(i))
continue;
Value srcDim = rewriter.create<tensor::DimOp>(loc, padOp.getSource(), i);
Value lowPad = toValue(mixedLowPad[i]);
Value highPad = toValue(mixedHighPad[i]);
AffineExpr s0, s1, s2;
bindSymbols(op->getContext(), s0, s1, s2);
AffineExpr sumExpr = s0 + s1 + s2;
Value sum = rewriter.create<affine::AffineApplyOp>(
loc, sumExpr, ValueRange{srcDim, lowPad, highPad});
dynamicSizes.push_back(sum);
}
// Should the buffer be deallocated?
bool dealloc =
shouldDeallocateOpResult(cast<OpResult>(padOp.getResult()), options);
// Allocate a buffer for the padded result.
FailureOr<Value> tensorAlloc =
allocateTensorForShapedValue(rewriter, loc, padOp.getResult(),
/*escape=*/!dealloc, options,
/*copy=*/false);
if (failed(tensorAlloc))
return failure();
// tensor::PadOp is like tensor::GenerateOp: The only difference is that
// only a part of the generated tensor is needed. For simplicity, we reuse
// the same functionality here.
Value filledBuffer = lowerGenerateLikeOpBody(
rewriter, loc, *tensorAlloc, dynamicSizes, padOp.getBodyRegion());
// Create tensor::InsertSliceOp.
SmallVector<OpFoldResult> sliceSizes =
getMixedSizes(rewriter, loc, padOp.getSource());
SmallVector<OpFoldResult> sliceStrides(srcType.getRank(),
rewriter.getIndexAttr(1));
rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
padOp, padOp.getSource(), filledBuffer,
/*offsets=*/padOp.getMixedLowPad(), sliceSizes, sliceStrides);
return success();
}
};
/// Bufferization of tensor.rank. Replace with memref.rank.
struct RankOpInterface
: public BufferizableOpInterface::ExternalModel<RankOpInterface,
tensor::RankOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
// The op reads the tensor's metadata but not its contents.
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto rankOp = cast<tensor::RankOp>(op);
FailureOr<Value> v = getBuffer(rewriter, rankOp.getTensor(), options);
if (failed(v))
return failure();
replaceOpWithNewBufferizedOp<memref::RankOp>(rewriter, op, rankOp.getType(),
*v);
return success();
}
};
/// Bufferization of tensor.reshape. Replace with memref.reshape.
struct ReshapeOpInterface
: public BufferizableOpInterface::ExternalModel<ReshapeOpInterface,
tensor::ReshapeOp> {
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
if (&opOperand == &op->getOpOperand(1) /* shape */)
return true;
return false;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return false;
}
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {{op->getOpResult(0), BufferRelation::Equivalent}};
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto reshapeOp = cast<tensor::ReshapeOp>(op);
FailureOr<Value> srcBuffer =
getBuffer(rewriter, reshapeOp.getSource(), options);
FailureOr<Value> shapeBuffer =
getBuffer(rewriter, reshapeOp.getShape(), options);
if (failed(srcBuffer) || failed(shapeBuffer))
return failure();
auto resultMemRefType = getMemRefType(
reshapeOp.getResult(), options, /*layout=*/{},
cast<BaseMemRefType>(srcBuffer->getType()).getMemorySpace());
replaceOpWithNewBufferizedOp<memref::ReshapeOp>(
rewriter, op, resultMemRefType, *srcBuffer, *shapeBuffer);
return success();
}
};
/// Analysis of ParallelInsertSliceOp.
struct ParallelInsertSliceOpInterface
: public BufferizableOpInterface::ExternalModel<
ParallelInsertSliceOpInterface, ParallelInsertSliceOp> {
AliasingOpResultList getAliasingOpResults(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return {};
}
bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return true;
}
bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
return &opOperand == &op->getOpOperand(1) /*dest*/;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
OpBuilder::InsertionGuard g(rewriter);
auto parallelInsertSliceOp = cast<ParallelInsertSliceOp>(op);
ParallelCombiningOpInterface parallelCombiningParent =
parallelInsertSliceOp.getParallelCombiningParent();
// Bufferize the op outside of the parallel combining terminator.
rewriter.setInsertionPoint(parallelCombiningParent);
// Get source and destination buffers.
FailureOr<Value> destBuffer =
getBuffer(rewriter, parallelInsertSliceOp.getDest(), options);
if (failed(destBuffer))
return failure();
FailureOr<Value> srcBuffer =
getBuffer(rewriter, parallelInsertSliceOp.getSource(), options);
if (failed(srcBuffer))
return failure();
// Take a subview of the destination buffer.
auto destBufferType = cast<MemRefType>(destBuffer->getType());
auto subviewMemRefType =
cast<MemRefType>(memref::SubViewOp::inferRankReducedResultType(
parallelInsertSliceOp.getSourceType().getShape(), destBufferType,
parallelInsertSliceOp.getMixedOffsets(),
parallelInsertSliceOp.getMixedSizes(),
parallelInsertSliceOp.getMixedStrides()));
Value subview = rewriter.create<memref::SubViewOp>(
parallelInsertSliceOp.getLoc(), subviewMemRefType, *destBuffer,
parallelInsertSliceOp.getMixedOffsets(),
parallelInsertSliceOp.getMixedSizes(),
parallelInsertSliceOp.getMixedStrides());
// This memcpy will fold away if everything bufferizes in-place.
if (failed(options.createMemCpy(rewriter, parallelInsertSliceOp.getLoc(),
*srcBuffer, subview)))
return failure();
// In case the source was allocated in the same block, make sure that the
// deallocation op (if any) appears after the memcpy. By default, deallocs
// are placed before the terminator, but this does not work for ForallOp
// because the terminator does more than just yielding a value.
//
// Note: This is not a problem for the destination buffer because these are
// assumed to always bufferize in-place.
for (Operation *user : srcBuffer->getUsers()) {
if (hasEffect<MemoryEffects::Free>(user)) {
if (user->getBlock() == parallelCombiningParent->getBlock())
user->moveBefore(user->getBlock()->getTerminator());
break;
}
}
// Delete the op.
rewriter.eraseOp(op);
return success();
}
bool isNotConflicting(Operation *op, OpOperand *uRead,
OpOperand *uConflictingWrite,
const AnalysisState &state) const {
return isNotConflictingInsertSliceLikeOp<tensor::ParallelInsertSliceOp>(
op, uRead, uConflictingWrite, state);
}
};
/// Bufferization of tensor.splat. Bufferizes to a new allocation that is filled
/// with a linalg.map. Similar to tensor.generate.
struct SplatOpInterface
: public BufferizableOpInterface::ExternalModel<SplatOpInterface,
tensor::SplatOp> {
bool bufferizesToAllocation(Operation *op, OpResult opResult) const {
return true;
}
LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
OpBuilder::InsertionGuard g(rewriter);
auto splatOp = cast<tensor::SplatOp>(op);
// Should the buffer be deallocated?
bool dealloc =
shouldDeallocateOpResult(cast<OpResult>(splatOp.getResult()), options);
// TODO: Implement memory space for this op.
if (options.defaultMemorySpace != Attribute())
return op->emitError("memory space not implemented yet");
// Allocate memory.
Location loc = op->getLoc();
FailureOr<Value> tensorAlloc =
allocateTensorForShapedValue(rewriter, loc, splatOp.getResult(),
/*escape=*/!dealloc, options,
/*copy=*/false);
if (failed(tensorAlloc))
return failure();
// Create linalg::MapOp.
auto tensorType = cast<RankedTensorType>(tensorAlloc->getType());
auto linalgOp =
rewriter.create<linalg::MapOp>(loc, tensorType, /*inputs=*/ValueRange(),
/*init=*/*tensorAlloc);
Block &linalgBody = linalgOp.getMapper().emplaceBlock();
// Create linalg::IndexOps.
rewriter.setInsertionPointToStart(&linalgBody);
rewriter.create<linalg::YieldOp>(loc, splatOp.getInput());
rewriter.replaceOp(splatOp, linalgOp.getResult()[0]);
return success();
}
};
} // namespace
} // namespace tensor
} // namespace mlir
void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext *ctx, tensor::TensorDialect *dialect) {
CastOp::attachInterface<CastOpInterface>(*ctx);
CollapseShapeOp::attachInterface<CollapseShapeOpInterface>(*ctx);
DimOp::attachInterface<DimOpInterface>(*ctx);
EmptyOp::attachInterface<EmptyOpInterface>(*ctx);
ExpandShapeOp::attachInterface<ExpandShapeOpInterface>(*ctx);
ExtractSliceOp::attachInterface<ExtractSliceOpInterface>(*ctx);
ExtractOp::attachInterface<ExtractOpInterface>(*ctx);
FromElementsOp::attachInterface<FromElementsOpInterface>(*ctx);
GenerateOp::attachInterface<GenerateOpInterface>(*ctx);
InsertOp::attachInterface<InsertOpInterface>(*ctx);
InsertSliceOp::attachInterface<InsertSliceOpInterface>(*ctx);
PadOp::attachInterface<PadOpInterface>(*ctx);
ParallelInsertSliceOp::attachInterface<ParallelInsertSliceOpInterface>(
*ctx);
RankOp::attachInterface<RankOpInterface>(*ctx);
ReshapeOp::attachInterface<ReshapeOpInterface>(*ctx);
SplatOp::attachInterface<SplatOpInterface>(*ctx);
// Load additional dialects of which ops may get created.
ctx->loadDialect<arith::ArithDialect, linalg::LinalgDialect>();
});
}