[MLIR][Linalg] Remove matmul_transpose variants (#147961)

Removes the `(batch_)matmul_transpose_{a|b}` variants from OpDSL and
replace it with `matmul affine_maps [...]` whenever appropriate. This is
in line with the
[plan](https://discourse.llvm.org/t/rfc-op-explosion-in-linalg/82863),
and can be done since #104783 merged.

See:
https://discourse.llvm.org/t/deprecate-batch-matmul-transpose-a-b-linalg-operations/87245

Issues investigated:
* pad transform tests that could use `matmul` instead, so change to
that.
* ArmSME test using transpose actually needed it, so changed to `matmul`
+ affine maps.

Arm tests validated by @banach-space (thanks!!).
This commit is contained in:
Renato Golin 2025-08-08 22:20:27 +01:00 committed by GitHub
parent 5a009838a2
commit d15280894b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 689 additions and 902 deletions

View File

@ -145,8 +145,7 @@ std::pair<int64_t, int64_t> getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr);
#define GET_OP_CLASSES
#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.h.inc"
namespace mlir {
namespace linalg {
namespace mlir::linalg {
/// Returns the outer shape in the packed domain before applying the
/// transposition.
@ -155,7 +154,194 @@ template <typename OpTy,
std::is_same_v<OpTy, linalg::UnPackOp>>>
SmallVector<int64_t> getPackedOuterShapeWithoutTransposition(OpTy packOrUnPack);
} // namespace linalg
} // namespace mlir
/// Specialization of `linalg.matmul` op that has a transpose map on A
class MatmulTransposeAOp : public MatmulOp {
/// Create an affine map for a transpose-A matmul. Used only in the builders.
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
public:
using MatmulOp::MatmulOp;
static ::mlir::TypeID resolveTypeID() { return TypeID::get<MatmulOp>(); }
/// Build a transpose A matmul.
static void build(OpBuilder &builder, OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose A matmul with a specific result type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose A matmul with a specific result type and a cast type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
/// Checks if the affine map is the expected one for this operation
static bool isDefaultIndexingMaps(Attribute attr);
static bool classof(Operation *op);
};
/// Specialization of `linalg.matmul` op that has a transpose map on B
class MatmulTransposeBOp : public MatmulOp {
/// Create an affine map for a transpose-B matmul. Used only in the builders.
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
public:
using MatmulOp::MatmulOp;
static ::mlir::TypeID resolveTypeID() { return TypeID::get<MatmulOp>(); }
/// Build a transpose B matmul.
static void build(OpBuilder &builder, OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose B matmul with a specific result type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose B matmul with a specific result type and a cast type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
/// Checks if the affine map is the expected one for this operation
static bool isDefaultIndexingMaps(Attribute attr);
static bool classof(Operation *op);
};
/// Specialization of `linalg.batch_matmul` op that has a transpose map on A
class BatchMatmulTransposeAOp : public BatchMatmulOp {
/// Create an affine map for a transpose-A batch_matmul. Used only in the
/// builders.
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
public:
using BatchMatmulOp::BatchMatmulOp;
static ::mlir::TypeID resolveTypeID() { return TypeID::get<BatchMatmulOp>(); }
/// Build a transpose A matmul.
static void build(OpBuilder &builder, OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeAOp
create(OpBuilder &builder, Location location, ValueRange inputs,
ValueRange outputs, ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose A matmul with a specific result type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeAOp
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose A matmul with a specific result type and a cast type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeAOp
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
/// Checks if the affine map is the expected one for this operation
static bool isDefaultIndexingMaps(Attribute attr);
static bool classof(Operation *op);
};
/// Specialization of `linalg.batch_matmul` op that has a transpose map on B
class BatchMatmulTransposeBOp : public BatchMatmulOp {
/// Create an affine map for a transpose-B batch_matmul. Used only in the
/// builders.
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
public:
using BatchMatmulOp::BatchMatmulOp;
static ::mlir::TypeID resolveTypeID() { return TypeID::get<BatchMatmulOp>(); }
/// Build a transpose B matmul.
static void build(OpBuilder &builder, OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeBOp
create(OpBuilder &builder, Location location, ValueRange inputs,
ValueRange outputs, ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose B matmul with a specific result type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeBOp
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes = {});
/// Build a transpose B matmul with a specific result type and a cast type.
static void build(OpBuilder &builder, OperationState &result,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
static BatchMatmulTransposeBOp
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes = {});
/// Checks if the affine map is the expected one for this operation
static bool isDefaultIndexingMaps(Attribute attr);
static bool classof(Operation *op);
};
} // namespace mlir::linalg
#endif // MLIR_DIALECT_LINALG_IR_LINALG_H

View File

@ -1055,152 +1055,6 @@ structured_op: !LinalgStructuredOpConfig
- !ScalarExpression
scalar_arg: BZp
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: matmul_transpose_a
cpp_class_name: MatmulTransposeAOp
doc: |-
Performs a matrix multiplication of two 2D inputs with lhs operand
transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: A
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
- !LinalgOperandDefConfig
name: B
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
- !LinalgOperandDefConfig
name: C
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
- !LinalgOperandDefConfig
name: cast
kind: type_fn_attr
default_fn: cast_signed
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d0)>
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
iterator_types:
- parallel
- parallel
- reduction
assignments:
- !ScalarAssign
arg: C
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: C
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
attr_name: cast
type_var: U
operands:
- !ScalarExpression
scalar_arg: A
- !ScalarExpression
scalar_fn:
kind: type
attr_name: cast
type_var: U
operands:
- !ScalarExpression
scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: matmul_transpose_b
cpp_class_name: MatmulTransposeBOp
doc: |-
Performs a matrix multiplication of two 2D inputs with rhs operand
transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: A
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
- !LinalgOperandDefConfig
name: B
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
- !LinalgOperandDefConfig
name: C
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
- !LinalgOperandDefConfig
name: cast
kind: type_fn_attr
default_fn: cast_signed
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
iterator_types:
- parallel
- parallel
- reduction
assignments:
- !ScalarAssign
arg: C
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: C
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
attr_name: cast
type_var: U
operands:
- !ScalarExpression
scalar_arg: A
- !ScalarExpression
scalar_fn:
kind: type
attr_name: cast
type_var: U
operands:
- !ScalarExpression
scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: mmt4d
cpp_class_name: Mmt4DOp
@ -1358,146 +1212,6 @@ structured_op: !LinalgStructuredOpConfig
- !ScalarExpression
scalar_arg: rhs
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: batch_matmul_transpose_a
cpp_class_name: BatchMatmulTransposeAOp
doc: |-
Performs a batched matrix multiplication of two 3D inputs where lhs operand
has its non-batch dimensions transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: A
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
- !LinalgOperandDefConfig
name: B
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
- !LinalgOperandDefConfig
name: C
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d1)>
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
iterator_types:
- parallel
- parallel
- parallel
- reduction
assignments:
- !ScalarAssign
arg: C
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: C
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: A
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: batch_matmul_transpose_b
cpp_class_name: BatchMatmulTransposeBOp
doc: |-
Performs a batched matrix multiplication of two 3D inputs where rhs operand
has its non-batch dimensions transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
implements:
- LinalgContractionOpInterface
structured_op: !LinalgStructuredOpConfig
args:
- !LinalgOperandDefConfig
name: A
kind: input_tensor
type_var: T1
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
- !LinalgOperandDefConfig
name: B
kind: input_tensor
type_var: T2
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
- !LinalgOperandDefConfig
name: C
kind: output_tensor
type_var: U
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
indexing_maps: !LinalgIndexingMapsConfig
static_indexing_maps:
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
iterator_types:
- parallel
- parallel
- parallel
- reduction
assignments:
- !ScalarAssign
arg: C
value: !ScalarExpression
scalar_fn:
kind: binary
fn_name: add
operands:
- !ScalarExpression
scalar_arg: C
- !ScalarExpression
scalar_fn:
kind: binary
fn_name: mul
operands:
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: A
- !ScalarExpression
scalar_fn:
kind: type
fn_name: cast_signed
type_var: U
operands:
- !ScalarExpression
scalar_arg: B
--- !LinalgOpConfig
metadata: !LinalgOpMetadata
name: quantized_batch_matmul
cpp_class_name: QuantizedBatchMatmulOp

View File

@ -785,6 +785,9 @@ def MatmulOp : LinalgStructuredBase_Op<"matmul", [
/// Returns a list of AffineMap with the default matmul indexing charactristic.
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
/// Returns true if the AffineMap is the default matmul indexing charactristic.
static bool isDefaultIndexingMaps(Attribute attr);
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap);
@ -1057,6 +1060,9 @@ def BatchMatmulOp : LinalgStructuredBase_Op<"batch_matmul", !listconcat([AttrSiz
/// Returns a list with default AffineMap(s), i.e. without broadcasts and transpositions.
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
/// Returns true if the AffineMap is the default batch matmul indexing charactristic.
static bool isDefaultIndexingMaps(Attribute attr);
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap, bool isLHS = true);
@ -1181,6 +1187,9 @@ def BatchReduceMatmulOp : LinalgStructuredBase_Op<"batch_reduce_matmul", [
/// Returns a list of AffineMap with the default batch_reduce_matmul indexing charactristic.
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
/// Returns true if the AffineMap is the default batch reduce matmul indexing charactristic.
static bool isDefaultIndexingMaps(Attribute attr);
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap, bool isLHS = true);

View File

@ -194,9 +194,10 @@ static void buildMatmulOp(OpBuilder &b, OperationState &state,
ArrayRef<AffineMap> indexingMaps) {
// Initialize indexingMaps attribute, for MatmulOp.
SmallVector<Attribute, 3> indexingMapsAttrVal;
indexingMapsAttrVal = llvm::map_to_vector(
MatmulOp::getDefaultIndexingMaps(b.getContext()),
[](AffineMap map) -> Attribute { return AffineMapAttr::get(map); });
indexingMapsAttrVal =
llvm::map_to_vector(indexingMaps, [](AffineMap map) -> Attribute {
return AffineMapAttr::get(map);
});
state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
return buildStructuredOp(b, state, resultTensorTypes, inputs, outputs,
attributes, regionBuilder);
@ -3749,6 +3750,25 @@ std::pair<int64_t, int64_t> getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr) {
// MatMulOp
//===----------------------------------------------------------------------===//
static FailureOr<SmallVector<SmallVector<int64_t>>>
getAffineResultPositions(ArrayAttr maps) {
SmallVector<SmallVector<int64_t>> positions;
for (auto map : maps) {
AffineMapAttr attr = dyn_cast<AffineMapAttr>(map);
if (!attr)
return failure();
SmallVector<int64_t> pos;
for (auto result : attr.getAffineMap().getResults()) {
auto dim = dyn_cast<AffineDimExpr>(result);
if (!dim)
return failure();
pos.push_back(dim.getPosition());
}
positions.push_back(pos);
}
return positions;
}
/// Returns a list of AffineMap with the typical matmul indexing charactristic.
SmallVector<AffineMap> MatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
AffineExpr d0, d1, d2;
@ -3760,6 +3780,20 @@ SmallVector<AffineMap> MatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
return indexingMaps;
}
bool MatmulOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 2} &&
(*positions)[1] == SmallVector<int64_t>{2, 1} &&
(*positions)[2] == SmallVector<int64_t>{0, 1};
}
SmallVector<utils::IteratorType> MatmulOp::getIteratorTypesArray() {
return SmallVector<utils::IteratorType>{utils::IteratorType::parallel,
utils::IteratorType::parallel,
@ -3912,6 +3946,380 @@ Speculation::Speculatability MatmulOp::getSpeculatability() {
return getGenericSpeculatabilityImpl(cast<LinalgOp>(getOperation()));
}
SmallVector<AffineMap>
MatmulTransposeAOp::getDefaultIndexingMaps(OpBuilder &builder) {
AffineExpr d0, d1, d2;
MLIRContext *context = builder.getContext();
bindDims(context, d0, d1, d2);
AffineMap mapLHS = AffineMap::get(3, 0, {d2, d0}, context);
AffineMap mapRHS = AffineMap::get(3, 0, {d2, d1}, context);
AffineMap mapOut = AffineMap::get(3, 0, {d0, d1}, context);
return {mapLHS, mapRHS, mapOut};
}
bool MatmulTransposeAOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{2, 0} &&
(*positions)[1] == SmallVector<int64_t>{2, 1} &&
(*positions)[2] == SmallVector<int64_t>{0, 1};
}
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeAOp
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, inputs, outputs, attributes);
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
OperationState &result,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeAOp
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
OperationState &result,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
Attribute cast,
ArrayRef<NamedAttribute> attributes) {
result.addAttribute("cast", cast);
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeAOp
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
bool MatmulTransposeAOp::classof(Operation *op) {
return dyn_cast_or_null<linalg::MatmulOp>(op) &&
MatmulTransposeAOp::isDefaultIndexingMaps(
op->getAttr("indexing_maps"));
}
SmallVector<AffineMap>
MatmulTransposeBOp::getDefaultIndexingMaps(OpBuilder &builder) {
AffineExpr d0, d1, d2;
MLIRContext *context = builder.getContext();
bindDims(context, d0, d1, d2);
AffineMap mapLHS = AffineMap::get(3, 0, {d0, d2}, context);
AffineMap mapRHS = AffineMap::get(3, 0, {d1, d2}, context);
AffineMap mapOut = AffineMap::get(3, 0, {d0, d1}, context);
return {mapLHS, mapRHS, mapOut};
}
bool MatmulTransposeBOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 2} &&
(*positions)[1] == SmallVector<int64_t>{1, 2} &&
(*positions)[2] == SmallVector<int64_t>{0, 1};
}
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
OperationState &result,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeBOp
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, inputs, outputs, attributes);
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
OperationState &result,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeBOp
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
OperationState &result,
TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
Attribute cast,
ArrayRef<NamedAttribute> attributes) {
result.addAttribute("cast", cast);
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
}
MatmulTransposeBOp
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
bool MatmulTransposeBOp::classof(Operation *op) {
return dyn_cast_or_null<linalg::MatmulOp>(op) &&
MatmulTransposeBOp::isDefaultIndexingMaps(
op->getAttr("indexing_maps"));
}
SmallVector<AffineMap>
BatchMatmulTransposeAOp::getDefaultIndexingMaps(OpBuilder &builder) {
AffineExpr d0, d1, d2, d3;
MLIRContext *context = builder.getContext();
bindDims(context, d0, d1, d2, d3);
AffineMap mapLHS = AffineMap::get(4, 0, {d0, d3, d1}, context);
AffineMap mapRHS = AffineMap::get(4, 0, {d0, d3, d2}, context);
AffineMap mapOut = AffineMap::get(4, 0, {d0, d1, d2}, context);
return {mapLHS, mapRHS, mapOut};
}
bool BatchMatmulTransposeAOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 3, 1} &&
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
}
void linalg::BatchMatmulTransposeAOp::build(
OpBuilder &builder, OperationState &result, ValueRange inputs,
ValueRange outputs, ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeAOp
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, inputs, outputs, attributes);
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::BatchMatmulTransposeAOp::build(
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeAOp
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::BatchMatmulTransposeAOp::build(
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
result.addAttribute("cast", cast);
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeAOp
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
bool BatchMatmulTransposeAOp::classof(Operation *op) {
return dyn_cast_or_null<linalg::BatchMatmulOp>(op) &&
BatchMatmulTransposeAOp::isDefaultIndexingMaps(
op->getAttr("indexing_maps"));
}
SmallVector<AffineMap>
BatchMatmulTransposeBOp::getDefaultIndexingMaps(OpBuilder &builder) {
AffineExpr d0, d1, d2, d3;
MLIRContext *context = builder.getContext();
bindDims(context, d0, d1, d2, d3);
AffineMap mapLHS = AffineMap::get(4, 0, {d0, d1, d3}, context);
AffineMap mapRHS = AffineMap::get(4, 0, {d0, d2, d3}, context);
AffineMap mapOut = AffineMap::get(4, 0, {d0, d1, d2}, context);
return {mapLHS, mapRHS, mapOut};
}
bool BatchMatmulTransposeBOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
(*positions)[1] == SmallVector<int64_t>{0, 2, 3} &&
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
}
void linalg::BatchMatmulTransposeBOp::build(
OpBuilder &builder, OperationState &result, ValueRange inputs,
ValueRange outputs, ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeBOp
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, inputs, outputs, attributes);
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::BatchMatmulTransposeBOp::build(
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeBOp
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
void linalg::BatchMatmulTransposeBOp::build(
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
ValueRange inputs, ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
result.addAttribute("cast", cast);
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
BatchMatmulOp::getRegionBuilder(),
getDefaultIndexingMaps(builder));
}
BatchMatmulTransposeBOp
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
TypeRange resultTensorTypes, ValueRange inputs,
ValueRange outputs, Attribute cast,
ArrayRef<NamedAttribute> attributes) {
OperationState state(location, getOperationName());
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
assert(res && "builder didn't return the right type");
return res;
}
bool BatchMatmulTransposeBOp::classof(Operation *op) {
return dyn_cast_or_null<linalg::BatchMatmulOp>(op) &&
BatchMatmulTransposeBOp::isDefaultIndexingMaps(
op->getAttr("indexing_maps"));
}
//===----------------------------------------------------------------------===//
// ContractOp
//===----------------------------------------------------------------------===//
@ -4120,6 +4528,20 @@ BatchMatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
return indexingMaps;
}
bool BatchMatmulOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
}
SmallVector<utils::IteratorType> BatchMatmulOp::getIteratorTypesArray() {
return SmallVector<utils::IteratorType>{
utils::IteratorType::parallel, utils::IteratorType::parallel,
@ -5646,6 +6068,19 @@ BatchReduceMatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
return indexingMaps;
}
bool BatchReduceMatmulOp::isDefaultIndexingMaps(Attribute attr) {
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
if (!maps)
return false;
if (maps.size() != 3)
return false;
auto positions = getAffineResultPositions(maps);
if (failed(positions))
return false;
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
(*positions)[2] == SmallVector<int64_t>{1, 2};
}
unsigned BatchReduceMatmulOp::getNumRegionArgs() { return 3; }
std::string BatchReduceMatmulOp::getLibraryCallName() {

View File

@ -320,10 +320,6 @@ void linalg::populateBlockPackMatmulPatterns(
RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn) {
patterns.add<BlockPackMatmul<linalg::GenericOp>,
BlockPackMatmul<linalg::MatmulOp>,
BlockPackMatmul<linalg::BatchMatmulOp>,
BlockPackMatmul<linalg::MatmulTransposeAOp>,
BlockPackMatmul<linalg::BatchMatmulTransposeAOp>,
BlockPackMatmul<linalg::MatmulTransposeBOp>,
BlockPackMatmul<linalg::BatchMatmulTransposeBOp>>(
patterns.getContext(), controlFn);
BlockPackMatmul<linalg::BatchMatmulOp>>(patterns.getContext(),
controlFn);
}

View File

@ -1052,12 +1052,8 @@ struct RankReduceMatmul : RankReduceContractionOps<FromOpTy, ToOpTy> {
static bool constexpr reduceLeft =
(std::is_same_v<FromOpTy, BatchMatmulOp> &&
std::is_same_v<ToOpTy, BatchVecmatOp>) ||
(std::is_same_v<FromOpTy, BatchMatmulTransposeAOp> &&
std::is_same_v<ToOpTy, BatchVecmatOp>) ||
(std::is_same_v<FromOpTy, MatmulOp> &&
std::is_same_v<ToOpTy, VecmatOp>) ||
(std::is_same_v<FromOpTy, MatmulTransposeAOp> &&
std::is_same_v<ToOpTy, VecmatOp>) ||
(std::is_same_v<FromOpTy, MatvecOp> && std::is_same_v<ToOpTy, DotOp>);
/// Look for non-batch spatial dims to collapse.
@ -1113,27 +1109,15 @@ void mlir::linalg::populateContractionOpRankReducingPatterns(
MLIRContext *context = patterns.getContext();
// Unbatching patterns for unit batch size
patterns.add<RankReduceToUnBatched<BatchMatmulOp, MatmulOp>>(context);
patterns
.add<RankReduceToUnBatched<BatchMatmulTransposeAOp, MatmulTransposeAOp>>(
context);
patterns
.add<RankReduceToUnBatched<BatchMatmulTransposeBOp, MatmulTransposeBOp>>(
context);
patterns.add<RankReduceToUnBatched<BatchMatvecOp, MatvecOp>>(context);
patterns.add<RankReduceToUnBatched<BatchVecmatOp, VecmatOp>>(context);
// Non-batch rank 1 reducing patterns
patterns.add<RankReduceMatmul<MatmulOp, VecmatOp>>(context);
patterns.add<RankReduceMatmul<MatmulOp, MatvecOp>>(context);
patterns.add<RankReduceMatmul<MatmulTransposeAOp, VecmatOp>>(context);
patterns.add<RankReduceMatmul<MatmulTransposeBOp, MatvecOp>>(context);
// Batch rank 1 reducing patterns
patterns.add<RankReduceMatmul<BatchMatmulOp, BatchVecmatOp>>(context);
patterns.add<RankReduceMatmul<BatchMatmulOp, BatchMatvecOp>>(context);
patterns.add<RankReduceMatmul<BatchMatmulTransposeAOp, BatchVecmatOp>>(
context);
patterns.add<RankReduceMatmul<BatchMatmulTransposeBOp, BatchMatvecOp>>(
context);
// Non-batch rank 0 reducing patterns
patterns.add<RankReduceMatmul<MatvecOp, DotOp>>(context);

View File

@ -234,19 +234,8 @@ static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
/// Codegen the different matmul variants.
if (numOfBatchDims) {
if (a == IndexMatchResult::Transposed)
return replaceWithMatmulVariant<BatchMatmulTransposeAOp>(rewriter,
genericOp);
if (b == IndexMatchResult::Transposed)
return replaceWithMatmulVariant<BatchMatmulTransposeBOp>(rewriter,
genericOp);
return replaceWithMatmulVariant<BatchMatmulOp>(rewriter, genericOp);
}
if (a == IndexMatchResult::Transposed)
return replaceWithMatmulVariant<MatmulTransposeAOp>(rewriter, genericOp);
if (b == IndexMatchResult::Transposed)
return replaceWithMatmulVariant<MatmulTransposeBOp>(rewriter, genericOp);
return replaceWithMatmulVariant<MatmulOp>(rewriter, genericOp);
}

View File

@ -52,19 +52,19 @@ FailureOr<Operation *> mlir::linalg::transposeMatmul(RewriterBase &rewriter,
dynamicDims.push_back(tensor::DimOp::create(rewriter, loc, input, 0));
ArrayRef<int64_t> shape = type.getShape();
Value empty = tensor::EmptyOp::create(rewriter, loc,
ArrayRef<int64_t>{shape[1], shape[0]},
type.getElementType(), dynamicDims);
auto transposeOp = linalg::TransposeOp::create(rewriter, loc, input, empty,
ArrayRef<int64_t>{1, 0});
Value empty = rewriter.create<tensor::EmptyOp>(
loc, ArrayRef<int64_t>{shape[1], shape[0]}, type.getElementType(),
dynamicDims);
auto transposeOp = rewriter.create<linalg::TransposeOp>(
loc, input, empty, ArrayRef<int64_t>{1, 0});
Operation *newMatmulOp;
if (transposeLHS) {
newMatmulOp = linalg::MatmulTransposeAOp::create(
newMatmulOp = MatmulTransposeAOp::create(
rewriter, loc, matmulOp.getResultTypes(),
ValueRange{transposeOp->getResult(0), matmulOp.getInputs()[1]},
matmulOp.getOutputs());
} else {
newMatmulOp = linalg::MatmulTransposeBOp::create(
newMatmulOp = MatmulTransposeBOp::create(
rewriter, loc, matmulOp.getResultTypes(),
ValueRange{matmulOp.getInputs()[0], transposeOp->getResult(0)},
matmulOp.getOutputs());
@ -112,16 +112,16 @@ mlir::linalg::transposeBatchMatmul(RewriterBase &rewriter,
Value empty = tensor::EmptyOp::create(
rewriter, loc, ArrayRef<int64_t>{shape[0], shape[2], shape[1]},
type.getElementType(), dynamicDims);
auto transposeOp = linalg::TransposeOp::create(rewriter, loc, input, empty,
ArrayRef<int64_t>{0, 2, 1});
auto transposeOp = rewriter.create<linalg::TransposeOp>(
loc, input, empty, ArrayRef<int64_t>{0, 2, 1});
Operation *newMatmulOp;
if (transposeLHS) {
newMatmulOp = linalg::BatchMatmulTransposeAOp::create(
newMatmulOp = BatchMatmulTransposeAOp::create(
rewriter, loc, batchMatmulOp.getResultTypes(),
ValueRange{transposeOp->getResult(0), batchMatmulOp.getInputs()[1]},
batchMatmulOp.getOutputs());
} else {
newMatmulOp = linalg::BatchMatmulTransposeBOp::create(
newMatmulOp = BatchMatmulTransposeBOp::create(
rewriter, loc, batchMatmulOp.getResultTypes(),
ValueRange{batchMatmulOp.getInputs()[0], transposeOp->getResult(0)},
batchMatmulOp.getOutputs());

View File

@ -2563,7 +2563,7 @@ vectorizeScalableVectorPrecondition(Operation *op,
"vectorization";
return failure();
}
if (isa<linalg::MatmulOp>(op) || isa<linalg::MatmulTransposeAOp>(op)) {
if (isa<linalg::MatmulOp>(op)) {
LDBG()
<< "Scalable vectorization of the reduction dim in Matmul-like ops "
"is not supported";
@ -2604,15 +2604,9 @@ vectorizeScalableVectorPrecondition(Operation *op,
return failure();
}
// Check to not let go the matmul with extended semantic, through this
// transform.
if (linalgOp.hasUserDefinedMaps())
return failure();
// Cond 4: Only the following ops are supported in the
// presence of scalable vectors
return success(isElementwise(linalgOp) || isa<linalg::MatmulOp>(op) ||
isa<linalg::MatmulTransposeAOp>(op) ||
isa<linalg::DepthwiseConv1DNwcWcOp>(op) ||
isa<linalg::MatvecOp>(op) || isa<linalg::Mmt4DOp>(op) ||
hasReductionIterator(linalgOp));

View File

@ -373,42 +373,6 @@ def quantized_matmul(
)
@linalg_structured_op
def matmul_transpose_a(
A=TensorDef(T1, S.K, S.N),
B=TensorDef(T2, S.K, S.M),
C=TensorDef(U, S.M, S.N, output=True),
cast=TypeFnAttrDef(default=TypeFn.cast_signed),
):
"""Performs a matrix multiplication of two 2D inputs with lhs operand
transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
domain(D.m, D.n, D.k)
implements(ContractionOpInterface)
C[D.m, D.n] += cast(U, A[D.k, D.m]) * cast(U, B[D.k, D.n])
@linalg_structured_op
def matmul_transpose_b(
A=TensorDef(T1, S.M, S.K),
B=TensorDef(T2, S.N, S.K),
C=TensorDef(U, S.M, S.N, output=True),
cast=TypeFnAttrDef(default=TypeFn.cast_signed),
):
"""Performs a matrix multiplication of two 2D inputs with rhs operand
transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
domain(D.m, D.n, D.k)
implements(ContractionOpInterface)
C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.n, D.k])
@linalg_structured_op
def mmt4d(
lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
@ -453,44 +417,6 @@ def batch_mmt4d(
) * TypeFn.cast_signed(TV.AccumType, rhs[D.b, D.n, D.k, D.n0, D.k0])
@linalg_structured_op
def batch_matmul_transpose_a(
A=TensorDef(T1, Batch, S.K, S.M),
B=TensorDef(T2, Batch, S.K, S.N),
C=TensorDef(U, Batch, S.M, S.N, output=True),
):
"""Performs a batched matrix multiplication of two 3D inputs where lhs operand
has its non-batch dimensions transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
domain(D.b, D.m, D.n, D.k)
implements(ContractionOpInterface)
C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.k, D.m]) * TypeFn.cast_signed(
U, B[D.b, D.k, D.n]
)
@linalg_structured_op
def batch_matmul_transpose_b(
A=TensorDef(T1, Batch, S.M, S.K),
B=TensorDef(T2, Batch, S.N, S.K),
C=TensorDef(U, Batch, S.M, S.N, output=True),
):
"""Performs a batched matrix multiplication of two 3D inputs where rhs operand
has its non-batch dimensions transposed.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
domain(D.b, D.m, D.n, D.k)
implements(ContractionOpInterface)
C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
U, B[D.b, D.n, D.k]
)
@linalg_structured_op
def quantized_batch_matmul(
A=TensorDef(T1, Batch, S.M, S.K),
@ -512,25 +438,6 @@ def quantized_batch_matmul(
) * (TypeFn.cast_signed(U, B[D.b, D.k, D.n]) - TypeFn.cast_signed(U, BZp))
@linalg_structured_op
def batch_reduce_matmul(
A=TensorDef(T1, Batch, S.M, S.K),
B=TensorDef(T2, Batch, S.K, S.N),
C=TensorDef(U, S.M, S.N, output=True),
):
"""Performs a batch-reduce matrix multiplication of two 3D inputs.
The partial multiplication results are reduced into a 2D output.
Numeric casting is performed on the operands to the inner multiply, promoting
them to the same data type as the accumulator/output.
"""
domain(D.b, D.m, D.n, D.k)
implements(ContractionOpInterface)
C[D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
U, B[D.b, D.k, D.n]
)
@linalg_structured_op
def matvec(
A=TensorDef(T1, S.M, S.N), y=TensorDef(T2, S.N), x=TensorDef(U, S.M, output=True)

View File

@ -20,20 +20,6 @@ func.func @block_matmul(
return %0 : tensor<64x64xf32>
}
func.func @block_matmul_transpose_a(
%A: tensor<128x64xf32>, %B: tensor<128x64xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
%0 = linalg.matmul_transpose_a ins(%A, %B : tensor<128x64xf32>, tensor<128x64xf32>)
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
return %0 : tensor<64x64xf32>
}
func.func @block_matmul_transpose_b(
%A: tensor<64x128xf32>, %B: tensor<64x128xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
%0 = linalg.matmul_transpose_b ins(%A, %B : tensor<64x128xf32>, tensor<64x128xf32>)
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
return %0 : tensor<64x64xf32>
}
// MMT4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// MMT4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
// MMT4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
@ -43,18 +29,6 @@ func.func @block_matmul_transpose_b(
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MMT4D-COUNT-1: linalg.unpack
// MMT4D-LABEL: func @block_matmul_transpose_a
// MMT4D-COUNT-3: linalg.pack
// MMT4D: linalg.generic
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MMT4D-COUNT-1: linalg.unpack
// MMT4D-LABEL: func @block_matmul_transpose_b
// MMT4D-COUNT-3: linalg.pack
// MMT4D: linalg.generic
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MMT4D-COUNT-1: linalg.unpack
// MM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// MM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
@ -65,18 +39,6 @@ func.func @block_matmul_transpose_b(
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MM4D-COUNT-1: linalg.unpack
// MM4D-LABEL: func @block_matmul_transpose_a
// MM4D-COUNT-3: linalg.pack
// MM4D: linalg.generic
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MM4D-COUNT-1: linalg.unpack
// MM4D-LABEL: func @block_matmul_transpose_b
// MM4D-COUNT-3: linalg.pack
// MM4D: linalg.generic
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MM4D-COUNT-1: linalg.unpack
// MTM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5, d3)>
// MTM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
@ -87,15 +49,3 @@ func.func @block_matmul_transpose_b(
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MTM4D-COUNT-1: linalg.unpack
// MTM4D-LABEL: func @block_matmul_transpose_a
// MTM4D-COUNT-3: linalg.pack
// MTM4D: linalg.generic
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MTM4D-COUNT-1: linalg.unpack
// MTM4D-LABEL: func @block_matmul_transpose_b
// MTM4D-COUNT-3: linalg.pack
// MTM4D: linalg.generic
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// MTM4D-COUNT-1: linalg.unpack

View File

@ -197,150 +197,6 @@ func.func @block_batch_matmul(
// -----
func.func @block_matmul_transpose_a(
%A: tensor<128x64xf32>, %B: tensor<128x64xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
%0 = linalg.matmul_transpose_a ins(%A, %B : tensor<128x64xf32>, tensor<128x64xf32>)
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
return %0 : tensor<64x64xf32>
}
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
// CHECK-LABEL: func @block_matmul_transpose_a(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
// -----
func.func @block_batch_matmul_transpose_a(
%A: tensor<512x128x64xf32>, %B: tensor<512x128x64xf32>, %C: tensor<512x64x64xf32>) -> tensor<512x64x64xf32> {
%0 = linalg.batch_matmul_transpose_a ins(%A, %B : tensor<512x128x64xf32>, tensor<512x128x64xf32>)
outs(%C : tensor<512x64x64xf32>) -> tensor<512x64x64xf32>
return %0 : tensor<512x64x64xf32>
}
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d3, d4, d6)>
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d2, d3, d5, d6)>
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d5)>
// CHECK-LABEL: func @block_batch_matmul_transpose_a(
// CHECK-SAME: %[[A:.+]]: tensor<512x128x64xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x128x64xf32> -> tensor<512x2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
// -----
func.func @block_matmul_transpose_b(
%A: tensor<64x128xf32>, %B: tensor<64x128xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
%0 = linalg.matmul_transpose_b ins(%A, %B : tensor<64x128xf32>, tensor<64x128xf32>)
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
return %0 : tensor<64x64xf32>
}
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
// CHECK-LABEL: func @block_matmul_transpose_b(
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
// -----
func.func @block_batch_matmul_transpose_b(
%A: tensor<512x64x128xf32>, %B: tensor<512x64x128xf32>, %C: tensor<512x64x64xf32>) -> tensor<512x64x64xf32> {
%0 = linalg.batch_matmul_transpose_b ins(%A, %B : tensor<512x64x128xf32>, tensor<512x64x128xf32>)
outs(%C : tensor<512x64x64xf32>) -> tensor<512x64x64xf32>
return %0 : tensor<512x64x64xf32>
}
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d3, d4, d6)>
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d2, d3, d5, d6)>
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d5)>
// CHECK-LABEL: func @block_batch_matmul_transpose_b(
// CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x64x128xf32>, %[[C:.+]]: tensor<512x64x64xf32>
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 64]
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x64x128xf32> -> tensor<512x4x2x16x64xf32>
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
// -----
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>

View File

@ -157,36 +157,6 @@ module attributes {transform.with_named_sequence} {
// -----
!type = tensor<2048x2048xf32>
func.func @fold_add_on_transposed_matmuls(%arg0: !type, %arg1: !type) -> !type {
%0 = arith.constant dense<1.111111e+00> : !type
%cst = arith.constant 0.000000e+00 : f32
%1 = tensor.empty() : !type
%2 = linalg.fill ins(%cst : f32) outs(%1 : !type) -> !type
%3 = linalg.matmul_transpose_a ins(%arg0, %0 : !type, !type) outs(%2 : !type) -> !type
%4 = linalg.matmul_transpose_b ins(%arg1, %0 : !type, !type) outs(%2 : !type) -> !type
%5 = linalg.add ins(%3, %4 : !type, !type) outs(%1 : !type) -> !type
return %5 : !type
}
// CHECK-LABEL: func.func @fold_add_on_transposed_matmuls
// CHECK: %[[ACC:.+]] = linalg.matmul_transpose_a
// CHECK-NEXT: %[[RES:.+]] = linalg.matmul_transpose_b ins({{.+}}) outs(%[[ACC]]
// CHECK-NOT: linalg.add
// CHECK-NEXT: return %[[RES]]
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.linalg.fold_add_into_dest
} : !transform.any_op
transform.yield
}
}
// -----
!type = tensor<2048x2048xf32>
func.func @expect_no_fold_of_add_as_dominated_op_is_not_a_contraction(%arg0: !type, %arg1: !type) -> !type {
%0 = arith.constant dense<1.111111e+00> : !type

View File

@ -1222,17 +1222,6 @@ func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32
// -----
// CHECK-LABEL: func @matmul_transpose_a
// CHECK: linalg.matmul_transpose_a
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<5x3xf32>, memref<5x7xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>)
func.func @matmul_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5x7xf32>, %arg2: memref<3x7xf32>) {
linalg.matmul_transpose_a ins(%arg0, %arg1 : memref<5x3xf32>, memref<5x7xf32>) outs(%arg2: memref<3x7xf32>)
return
}
// -----
// CHECK-LABEL: func @matmul_transpose_a_explicit
// CHECK: linalg.matmul
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<5x3xf32>, memref<5x7xf32>)
@ -1478,17 +1467,6 @@ func.func @matmul_bcast_b_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5xf3
// -----
// CHECK-LABEL: func @matmul_transpose_b
// CHECK: linalg.matmul_transpose_b
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<3x5xf32>, memref<7x5xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>)
func.func @matmul_transpose_b(%arg0: memref<3x5xf32>, %arg1: memref<7x5xf32>, %arg2: memref<3x7xf32>) {
linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) outs(%arg2: memref<3x7xf32>)
return
}
// -----
// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK: #[[$ATTR_2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
@ -1806,28 +1784,6 @@ func.func @bcast_A_transpose_B(%A: memref<3x5xf32>, %B: memref<2x7x5xf32>, %C: m
// -----
// CHECK-LABEL: func @batchmatmul_transpose_a
// CHECK: linalg.batch_matmul_transpose_a
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x5x3xf32>, memref<2x5x7xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>)
func.func @batchmatmul_transpose_a(%arg0: memref<2x5x3xf32>, %arg1: memref<2x5x7xf32>, %arg2: memref<2x3x7xf32>) {
linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : memref<2x5x3xf32>, memref<2x5x7xf32>) outs(%arg2: memref<2x3x7xf32>)
return
}
// -----
// CHECK-LABEL: func @batchmatmul_transpose_b
// CHECK: linalg.batch_matmul_transpose_b
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x3x5xf32>, memref<2x7x5xf32>)
// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>)
func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5xf32>, %arg2: memref<2x3x7xf32>) {
linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) outs(%arg2: memref<2x3x7xf32>)
return
}
// -----
// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// CHECK: #[[$ATTR_2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>

View File

@ -92,38 +92,6 @@ func.func @singleton_batch_vecmat(%arg0 : tensor<1x?xf32>, %arg1 : tensor<1x?x?x
// -----
func.func @singleton_batchmatmul_transpose_a(%arg0: memref<1x5x3xf32>, %arg1: memref<1x5x7xf32>, %arg2: memref<1x3x7xf32>) {
// CHECK-LABEL: @singleton_batchmatmul_transpose_a
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: memref<1x5x3xf32>
// CHECK-SAME: %[[RHS:[a-zA-Z0-9]+]]: memref<1x5x7xf32>
// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: memref<1x3x7xf32>
// CHECK-NEXT: %[[COLLAPSED_LHS:.*]] = memref.collapse_shape %[[LHS]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: %[[COLLAPSED_RHS:.*]] = memref.collapse_shape %[[RHS]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: %[[COLLAPSED_INIT:.*]] = memref.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: linalg.matmul_transpose_a ins(%[[COLLAPSED_LHS]], %[[COLLAPSED_RHS]] : memref<5x3xf32>, memref<5x7xf32>) outs(%[[COLLAPSED_INIT]] : memref<3x7xf32>)
// CHECK-NEXT: return
linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : memref<1x5x3xf32>, memref<1x5x7xf32>) outs(%arg2: memref<1x3x7xf32>)
return
}
// -----
func.func @singleton_batchmatmul_transpose_b(%arg0: memref<1x3x5xf32>, %arg1: memref<1x7x5xf32>, %arg2: memref<1x3x7xf32>) {
// CHECK-LABEL: @singleton_batchmatmul_transpose_b
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: memref<1x3x5xf32>
// CHECK-SAME: %[[RHS:[a-zA-Z0-9]+]]: memref<1x7x5xf32>
// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: memref<1x3x7xf32>
// CHECK-NEXT: %[[COLLAPSED_LHS:.*]] = memref.collapse_shape %[[LHS]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: %[[COLLAPSED_RHS:.*]] = memref.collapse_shape %[[RHS]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: %[[COLLAPSED_INIT:.*]] = memref.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]]
// CHECK-NEXT: linalg.matmul_transpose_b ins(%[[COLLAPSED_LHS]], %[[COLLAPSED_RHS]] : memref<3x5xf32>, memref<7x5xf32>) outs(%[[COLLAPSED_INIT]] : memref<3x7xf32>)
// CHECK-NEXT: return
linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<1x3x5xf32>, memref<1x7x5xf32>) outs(%arg2: memref<1x3x7xf32>)
return
}
// -----
func.func @matmul_to_matvec_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x1xf32>, %arg2: tensor<?x1xf32>) -> tensor<?x1xf32> {
// CHECK-LABEL: @matmul_to_matvec_tensor
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: tensor<?x?xf32>
@ -226,59 +194,6 @@ func.func @matvec_to_dot_tensor(%arg0: tensor<1x?xf32>, %arg1: tensor<?xf32>, %a
// -----
func.func @matmul_transpose_a_to_vecmat(%arg0: tensor<256x1xf32>, %arg1: tensor<256x512xf32>, %arg2: tensor<1x512xf32>) -> tensor<1x512xf32> {
// CHECK-LABEL: @matmul_transpose_a_to_vecmat
// CHECK: collapse_shape {{.*}} into tensor<256xf32>
// CHECK: collapse_shape {{.*}} into tensor<512xf32>
// CHECK: linalg.vecmat
// CHECK: expand_shape {{.*}} into tensor<1x512xf32>
%0 = linalg.matmul_transpose_a ins(%arg0, %arg1: tensor<256x1xf32>, tensor<256x512xf32>) outs(%arg2: tensor<1x512xf32>) -> tensor<1x512xf32>
return %0 : tensor<1x512xf32>
}
// -----
func.func @batch_matmul_transpose_a_to_batch_vecmat(%arg0: tensor<64x256x1xf32>, %arg1: tensor<64x256x512xf32>, %arg2: tensor<64x1x512xf32>) -> tensor<64x1x512xf32> {
// CHECK-LABEL: @batch_matmul_transpose_a_to_batch_vecmat
// CHECK: collapse_shape {{.*}} into tensor<64x256xf32>
// CHECK: collapse_shape {{.*}} into tensor<64x512xf32>
// CHECK: linalg.batch_vecmat
// CHECK: expand_shape {{.*}} into tensor<64x1x512xf32>
%0 = linalg.batch_matmul_transpose_a ins(%arg0, %arg1: tensor<64x256x1xf32>, tensor<64x256x512xf32>) outs(%arg2: tensor<64x1x512xf32>) -> tensor<64x1x512xf32>
return %0 : tensor<64x1x512xf32>
}
// -----
func.func @matmul_transpose_b_to_matvec(%arg0: memref<?x?xf32>, %arg1: memref<1x?xf32>, %arg2: memref<?x1xf32>) {
// CHECK-LABEL: @matmul_transpose_b_to_matvec
// CHECK: linalg.matvec
linalg.matmul_transpose_b ins(%arg0, %arg1: memref<?x?xf32>, memref<1x?xf32>) outs(%arg2: memref<?x1xf32>)
return
}
// -----
func.func @batchmatmul_transpose_b_to_batchmatvec_tensor(%arg0: tensor<64x128x256xf32>, %arg1: tensor<64x1x256xf32>, %arg2: tensor<64x128x1xf32>) -> tensor<64x128x1xf32> {
// CHECK: collapse_shape {{.*}} into tensor<64x256xf32>
// CHECK: collapse_shape {{.*}} into tensor<64x128xf32>
// CHECK: linalg.batch_matvec
// CHECK: expand_shape {{.*}} into tensor<64x128x1xf32>
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1: tensor<64x128x256xf32>, tensor<64x1x256xf32>) outs(%arg2: tensor<64x128x1xf32>) -> tensor<64x128x1xf32>
return %0 : tensor<64x128x1xf32>
}
// -----
func.func @batchmatmul_transpose_b_to_to_dot(%arg0: tensor<1x1x?xf32>, %arg1: tensor<1x1x?xf32>, %arg2: tensor<1x1x1xf32>) -> tensor<1x1x1xf32> {
// CHECK-LABEL: @batchmatmul_transpose_b_to_to_dot
// CHECK: linalg.dot
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1: tensor<1x1x?xf32>, tensor<1x1x?xf32>) outs(%arg2: tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
return %0 : tensor<1x1x1xf32>
}
// -----
func.func @nonsingleton_batch_matmul(%arg0 : tensor<2x?x?xf32>, %arg1 : tensor<2x?x?xf32>, %arg2: tensor<2x?x?xf32>) -> tensor<2x?x?xf32> {
// CHECK-LABEL: @nonsingleton_batch_matmul
// CHECK-NOT: collapse_shape

View File

@ -504,7 +504,7 @@ func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.matmul_transpose_b"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%c10 = transform.param.constant 10 : i64 -> !transform.param<i64>
%c20 = transform.param.constant 20 : i64 -> !transform.param<i64>
%sz = transform.merge_handles %c10, %c20 : !transform.param<i64>

View File

@ -465,14 +465,14 @@ module attributes {transform.with_named_sequence} {
// CHECK: %[[RHS:.*]] = tensor.pad
// CHECK: scf.for
// CHECK-DAG: tensor.extract_slice %[[LHS]][0, %{{.*}}] [%{{.*}}, 32]
// CHECK-DAG: tensor.extract_slice %[[RHS]][0, %{{.*}}] [%{{.*}}, 32]
// CHECK-DAG: tensor.extract_slice %[[RHS]][%{{.*}}, 0] [32, %{{.*}}]
func.func @dyn_pad_tiling(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.matmul_transpose_b"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%padded, %pad, %copy = transform.structured.pad %0 pad_to_multiple_of [32] use_prescribed_tensor_shapes {padding_dimensions = [2], padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
%tiled_linalg_op, %loops = transform.structured.tile_using_for %padded tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%1 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op

View File

@ -30,66 +30,6 @@ module attributes {transform.with_named_sequence} {
// -----
#map = affine_map<(d0, d1, d2) -> (d2, d0)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
func.func @matmul_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5x7xf32>, %arg2: memref<3x7xf32>) {
linalg.generic
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
ins(%arg0, %arg1 : memref<5x3xf32>, memref<5x7xf32>) outs(%arg2 : memref<3x7xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%0 = arith.mulf %in, %in_0 : f32
%1 = arith.addf %out, %0 : f32
linalg.yield %1 : f32
}
return
}
// CHECK-LABEL: @matmul_transpose_a
// CHECK-SAME: %[[ARG0:.+]]: memref<5x3xf32>, %[[ARG1:.+]]: memref<5x7xf32>, %[[ARG2:.+]]: memref<3x7xf32>) {
// CHECK-NOT: linalg.generic
// CHECK: linalg.matmul_transpose_a ins(%[[ARG0]], %[[ARG1]] : memref<5x3xf32>, memref<5x7xf32>) outs(%[[ARG2]] : memref<3x7xf32>)
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
// -----
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
func.func @matmul_transpose_b(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
linalg.yield %2 : f32
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
// CHECK-LABEL: @matmul_transpose_b
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG1:.+]]: tensor<?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK-NOT: linalg.generic
// CHECK: linalg.matmul_transpose_b ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
// -----
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
@ -117,32 +57,3 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
// -----
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
func.func @batch_matmul_transpose_b(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
%0 = linalg.generic
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%arg2 : tensor<?x?x?xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
linalg.yield %2 : f32
} -> tensor<?x?x?xf32>
return %0 : tensor<?x?x?xf32>
}
// CHECK-LABEL: @batch_matmul_transpose_b
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?xf32>, %[[ARG2:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// CHECK-NOT: linalg.generic
// CHECK: linalg.batch_matmul_transpose_b ins(%[[ARG0]], %[[ARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[ARG2]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}

View File

@ -1,6 +1,20 @@
// RUN: mlir-opt -transform-preload-library='transform-library-paths=%p/transpose-matmul-a.mlir' -transform-interpreter -split-input-file %s | FileCheck %s --check-prefixes=CHECK,TRANSPOSE-A
// RUN: mlir-opt -transform-preload-library='transform-library-paths=%p/transpose-matmul-b.mlir' -transform-interpreter -split-input-file %s | FileCheck %s --check-prefixes=CHECK,TRANSPOSE-B
// TRANSPOSE-A-DAG: #[[$MA:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
// TRANSPOSE-A-DAG: #[[$MB:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
// TRANSPOSE-A-DAG: #[[$MC:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
// TRANSPOSE-A-DAG: #[[$BMA:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
// TRANSPOSE-A-DAG: #[[$BMB:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
// TRANSPOSE-A-DAG: #[[$BMC:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
// TRANSPOSE-B-DAG: #[[$MA:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
// TRANSPOSE-B-DAG: #[[$MB:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)>
// TRANSPOSE-B-DAG: #[[$MC:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
// TRANSPOSE-B-DAG: #[[$BMA:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
// TRANSPOSE-B-DAG: #[[$BMB:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
// TRANSPOSE-B-DAG: #[[$BMC:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
// CHECK-LABEL: func.func @matmul_static(
// CHECK-SAME: %[[A:.*]]: tensor<16x8xf32>,
// CHECK-SAME: %[[B:.*]]: tensor<8x16xf32>) -> tensor<16x16xf32> {
@ -9,10 +23,10 @@
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<16x16xf32>) -> tensor<16x16xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty() : tensor<8x16xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<16x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<8x16xf32>) permutation = [1, 0]
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<8x16xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<8x16xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<16x8xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<16x8xf32>) permutation = [1, 0]
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<16x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<16x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
// CHECK: return %[[C]] : tensor<16x16xf32>
// CHECK: }
func.func @matmul_static(%A: tensor<16x8xf32>, %B: tensor<8x16xf32>) -> (tensor<16x16xf32>) {
@ -38,11 +52,11 @@ func.func @matmul_static(%A: tensor<16x8xf32>, %B: tensor<8x16xf32>) -> (tensor<
// TRANSPOSE-A: %[[A_DIM1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM1]], %[[A_DIM0]]) : tensor<?x?xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x?xf32>) outs(%[[A_TRANSP_INIT]] : tensor<?x?xf32>) permutation = [1, 0]
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// TRANSPOSE-B: %[[B_DIM0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty(%[[B_DIM1]], %[[B_DIM0]]) : tensor<?x?xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<?x?xf32>) outs(%[[B_TRANSP_INIT]] : tensor<?x?xf32>) permutation = [1, 0]
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: return %[[C]] : tensor<?x?xf32>
// CHECK: }
func.func @matmul_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
@ -69,10 +83,10 @@ func.func @matmul_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>) -> (tensor<?
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<?x16xf32>) -> tensor<?x16xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM0]]) : tensor<8x?xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<8x?xf32>) permutation = [1, 0]
// TRANSPOSE-A: %[[B0:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<8x?xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
// TRANSPOSE-A: %[[B0:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<8x?xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<16x8xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<16x8xf32>) permutation = [1, 0]
// TRANSPOSE-B: %[[B0:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
// TRANSPOSE-B: %[[B0:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
// CHECK: return %[[B0]] : tensor<?x16xf32>
// CHECK: }
func.func @matmul_mixed(%A: tensor<?x8xf32>, %B: tensor<8x16xf32>) -> (tensor<?x16xf32>) {
@ -96,10 +110,10 @@ func.func @matmul_mixed(%A: tensor<?x8xf32>, %B: tensor<8x16xf32>) -> (tensor<?x
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x8x16xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<2x16x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<2x8x16xf32>) permutation = [0, 2, 1]
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x16xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x16xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x16x8xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<2x8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<2x16x8xf32>) permutation = [0, 2, 1]
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// CHECK: return %[[C]] : tensor<2x16x16xf32>
// CHECK: }
func.func @batch_matmul_static(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>) -> (tensor<2x16x16xf32>) {
@ -127,12 +141,12 @@ func.func @batch_matmul_static(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>) -
// TRANSPOSE-A: %[[A_DIM2:.*]] = tensor.dim %[[A]], %[[C2]] : tensor<?x?x?xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM0]], %[[A_DIM2]], %[[A_DIM1]]) : tensor<?x?x?xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x?x?xf32>) outs(%[[A_TRANSP_INIT]] : tensor<?x?x?xf32>) permutation = [0, 2, 1]
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// TRANSPOSE-B: %[[B_DIM0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?x?xf32>
// TRANSPOSE-B: %[[B_DIM1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor<?x?x?xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty(%[[B_DIM0]], %[[B_DIM2]], %[[B_DIM1]]) : tensor<?x?x?xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<?x?x?xf32>) outs(%[[B_TRANSP_INIT]] : tensor<?x?x?xf32>) permutation = [0, 2, 1]
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
// CHECK: return %[[C]] : tensor<?x?x?xf32>
// CHECK: }
func.func @batch_matmul_dynamic(%A: tensor<?x?x?xf32>, %B: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
@ -161,10 +175,10 @@ func.func @batch_matmul_dynamic(%A: tensor<?x?x?xf32>, %B: tensor<?x?x?xf32>) ->
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM1]]) : tensor<2x8x?xf32>
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<2x?x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<2x8x?xf32>) permutation = [0, 2, 1]
// TRANSPOSE-A: %[[B0:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x?xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
// TRANSPOSE-A: %[[B0:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x?xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x16x8xf32>
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<2x8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<2x16x8xf32>) permutation = [0, 2, 1]
// TRANSPOSE-B: %[[B0:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<2x?x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
// TRANSPOSE-B: %[[B0:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<2x?x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
// CHECK: return %[[B0]] : tensor<2x?x16xf32>
// CHECK: }
func.func @batch_matmul_mixed(%A: tensor<2x?x8xf32>, %B: tensor<2x8x16xf32>) -> (tensor<2x?x16xf32>) {

View File

@ -9,7 +9,12 @@
// RUN: FileCheck %s
func.func @matmul_transpose_a(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>, %C : tensor<?x?xf32>) {
%res = linalg.matmul_transpose_a ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
%res = linalg.matmul
indexing_maps = [
affine_map<(d0, d1, d2) -> (d2, d0)>,
affine_map<(d0, d1, d2) -> (d2, d1)>,
affine_map<(d0, d1, d2) -> (d0, d1)>]
ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%C: tensor<?x?xf32>) -> tensor<?x?xf32>
%xf = tensor.cast %res : tensor<?x?xf32> to tensor<*xf32>
call @printMemrefF32(%xf) : (tensor<*xf32>) -> ()
@ -56,7 +61,7 @@ func.func @main() {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%module : !transform.any_op {transform.readonly}) {
%matmul_transpose_a = transform.structured.match ops{["linalg.matmul_transpose_a"]} in %module
%matmul_transpose_a = transform.structured.match ops{["linalg.matmul"]} in %module
: (!transform.any_op) -> !transform.any_op
// Step 1: Tile for size [4] x [4], which corresponds to SVLs x SVLs, where

View File

@ -1,7 +1,7 @@
# RUN: %PYTHON -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops | FileCheck %s
# Just verify that at least one known op is generated.
# CHECK: name: matmul
# CHECK: name: copy
# verify some special cases: negf->NegFOp, powf->PowFOp
# CHECK cpp_class_name: NegFOp

View File

@ -4,7 +4,6 @@ module.exports = {
linalg_dialect : $ => prec.right(choice(
seq(choice(
'linalg.batch_matmul',
'linalg.batch_matmul_transpose_b',
'linalg.batch_matvec',
'linalg.batch_reduce_matmul', 'linalg.broadcast',
'linalg.conv_1d_ncw_fcw', 'linalg.conv_1d_nwc_wcf',
@ -27,7 +26,6 @@ module.exports = {
'linalg.dot', 'linalg.elemwise_binary',
'linalg.elemwise_unary', 'linalg.fill',
'linalg.fill_rng_2d', 'linalg.matmul',
'linalg.matmul_transpose_b',
'linalg.matmul_unsigned', 'linalg.matvec',
'linalg.mmt4d', 'linalg.pooling_nchw_max',
'linalg.pooling_nchw_sum',

View File

@ -213,7 +213,6 @@
"bufferization.to_tensor"
"linalg.batch_matmul"
"linalg.batch_matmul_transpose_b"
"linalg.batch_matvec"
"linalg.batch_reduce_matmul"
"linalg.broadcast"
@ -244,7 +243,6 @@
"linalg.fill"
"linalg.fill_rng_2d"
"linalg.matmul"
"linalg.matmul_transpose_b"
"linalg.matmul_unsigned"
"linalg.matvec"
"linalg.mmt4d"