[MLIR][Linalg] Remove matmul_transpose variants (#147961)
Removes the `(batch_)matmul_transpose_{a|b}` variants from OpDSL and replace it with `matmul affine_maps [...]` whenever appropriate. This is in line with the [plan](https://discourse.llvm.org/t/rfc-op-explosion-in-linalg/82863), and can be done since #104783 merged. See: https://discourse.llvm.org/t/deprecate-batch-matmul-transpose-a-b-linalg-operations/87245 Issues investigated: * pad transform tests that could use `matmul` instead, so change to that. * ArmSME test using transpose actually needed it, so changed to `matmul` + affine maps. Arm tests validated by @banach-space (thanks!!).
This commit is contained in:
parent
5a009838a2
commit
d15280894b
@ -145,8 +145,7 @@ std::pair<int64_t, int64_t> getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr);
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/Linalg/IR/LinalgRelayoutOps.h.inc"
|
||||
|
||||
namespace mlir {
|
||||
namespace linalg {
|
||||
namespace mlir::linalg {
|
||||
|
||||
/// Returns the outer shape in the packed domain before applying the
|
||||
/// transposition.
|
||||
@ -155,7 +154,194 @@ template <typename OpTy,
|
||||
std::is_same_v<OpTy, linalg::UnPackOp>>>
|
||||
SmallVector<int64_t> getPackedOuterShapeWithoutTransposition(OpTy packOrUnPack);
|
||||
|
||||
} // namespace linalg
|
||||
} // namespace mlir
|
||||
/// Specialization of `linalg.matmul` op that has a transpose map on A
|
||||
class MatmulTransposeAOp : public MatmulOp {
|
||||
/// Create an affine map for a transpose-A matmul. Used only in the builders.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
|
||||
|
||||
public:
|
||||
using MatmulOp::MatmulOp;
|
||||
static ::mlir::TypeID resolveTypeID() { return TypeID::get<MatmulOp>(); }
|
||||
|
||||
/// Build a transpose A matmul.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose A matmul with a specific result type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose A matmul with a specific result type and a cast type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeAOp create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Checks if the affine map is the expected one for this operation
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
static bool classof(Operation *op);
|
||||
};
|
||||
|
||||
/// Specialization of `linalg.matmul` op that has a transpose map on B
|
||||
class MatmulTransposeBOp : public MatmulOp {
|
||||
/// Create an affine map for a transpose-B matmul. Used only in the builders.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
|
||||
|
||||
public:
|
||||
using MatmulOp::MatmulOp;
|
||||
static ::mlir::TypeID resolveTypeID() { return TypeID::get<MatmulOp>(); }
|
||||
|
||||
/// Build a transpose B matmul.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose B matmul with a specific result type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose B matmul with a specific result type and a cast type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static MatmulTransposeBOp create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Checks if the affine map is the expected one for this operation
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
static bool classof(Operation *op);
|
||||
};
|
||||
|
||||
/// Specialization of `linalg.batch_matmul` op that has a transpose map on A
|
||||
class BatchMatmulTransposeAOp : public BatchMatmulOp {
|
||||
/// Create an affine map for a transpose-A batch_matmul. Used only in the
|
||||
/// builders.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
|
||||
|
||||
public:
|
||||
using BatchMatmulOp::BatchMatmulOp;
|
||||
static ::mlir::TypeID resolveTypeID() { return TypeID::get<BatchMatmulOp>(); }
|
||||
|
||||
/// Build a transpose A matmul.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeAOp
|
||||
create(OpBuilder &builder, Location location, ValueRange inputs,
|
||||
ValueRange outputs, ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose A matmul with a specific result type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeAOp
|
||||
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose A matmul with a specific result type and a cast type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeAOp
|
||||
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Checks if the affine map is the expected one for this operation
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
static bool classof(Operation *op);
|
||||
};
|
||||
|
||||
/// Specialization of `linalg.batch_matmul` op that has a transpose map on B
|
||||
class BatchMatmulTransposeBOp : public BatchMatmulOp {
|
||||
/// Create an affine map for a transpose-B batch_matmul. Used only in the
|
||||
/// builders.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(OpBuilder &builder);
|
||||
|
||||
public:
|
||||
using BatchMatmulOp::BatchMatmulOp;
|
||||
static ::mlir::TypeID resolveTypeID() { return TypeID::get<BatchMatmulOp>(); }
|
||||
|
||||
/// Build a transpose B matmul.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeBOp
|
||||
create(OpBuilder &builder, Location location, ValueRange inputs,
|
||||
ValueRange outputs, ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose B matmul with a specific result type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeBOp
|
||||
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Build a transpose B matmul with a specific result type and a cast type.
|
||||
static void build(OpBuilder &builder, OperationState &result,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
static BatchMatmulTransposeBOp
|
||||
create(OpBuilder &builder, Location location, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes = {});
|
||||
|
||||
/// Checks if the affine map is the expected one for this operation
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
static bool classof(Operation *op);
|
||||
};
|
||||
|
||||
} // namespace mlir::linalg
|
||||
|
||||
#endif // MLIR_DIALECT_LINALG_IR_LINALG_H
|
||||
|
@ -1055,152 +1055,6 @@ structured_op: !LinalgStructuredOpConfig
|
||||
- !ScalarExpression
|
||||
scalar_arg: BZp
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: matmul_transpose_a
|
||||
cpp_class_name: MatmulTransposeAOp
|
||||
doc: |-
|
||||
Performs a matrix multiplication of two 2D inputs with lhs operand
|
||||
transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
implements:
|
||||
- LinalgContractionOpInterface
|
||||
structured_op: !LinalgStructuredOpConfig
|
||||
args:
|
||||
- !LinalgOperandDefConfig
|
||||
name: A
|
||||
kind: input_tensor
|
||||
type_var: T1
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: B
|
||||
kind: input_tensor
|
||||
type_var: T2
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: C
|
||||
kind: output_tensor
|
||||
type_var: U
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: cast
|
||||
kind: type_fn_attr
|
||||
default_fn: cast_signed
|
||||
indexing_maps: !LinalgIndexingMapsConfig
|
||||
static_indexing_maps:
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d0)>
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2, d1)>
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
|
||||
iterator_types:
|
||||
- parallel
|
||||
- parallel
|
||||
- reduction
|
||||
assignments:
|
||||
- !ScalarAssign
|
||||
arg: C
|
||||
value: !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: add
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: C
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: mul
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
attr_name: cast
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: A
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
attr_name: cast
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: B
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: matmul_transpose_b
|
||||
cpp_class_name: MatmulTransposeBOp
|
||||
doc: |-
|
||||
Performs a matrix multiplication of two 2D inputs with rhs operand
|
||||
transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
implements:
|
||||
- LinalgContractionOpInterface
|
||||
structured_op: !LinalgStructuredOpConfig
|
||||
args:
|
||||
- !LinalgOperandDefConfig
|
||||
name: A
|
||||
kind: input_tensor
|
||||
type_var: T1
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s0, s1)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: B
|
||||
kind: input_tensor
|
||||
type_var: T2
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s2, s1)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: C
|
||||
kind: output_tensor
|
||||
type_var: U
|
||||
shape_map: affine_map<()[s0, s1, s2] -> (s0, s2)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: cast
|
||||
kind: type_fn_attr
|
||||
default_fn: cast_signed
|
||||
indexing_maps: !LinalgIndexingMapsConfig
|
||||
static_indexing_maps:
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d2)>
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d1, d2)>
|
||||
- affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0, d1)>
|
||||
iterator_types:
|
||||
- parallel
|
||||
- parallel
|
||||
- reduction
|
||||
assignments:
|
||||
- !ScalarAssign
|
||||
arg: C
|
||||
value: !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: add
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: C
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: mul
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
attr_name: cast
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: A
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
attr_name: cast
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: B
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: mmt4d
|
||||
cpp_class_name: Mmt4DOp
|
||||
@ -1358,146 +1212,6 @@ structured_op: !LinalgStructuredOpConfig
|
||||
- !ScalarExpression
|
||||
scalar_arg: rhs
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: batch_matmul_transpose_a
|
||||
cpp_class_name: BatchMatmulTransposeAOp
|
||||
doc: |-
|
||||
Performs a batched matrix multiplication of two 3D inputs where lhs operand
|
||||
has its non-batch dimensions transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
implements:
|
||||
- LinalgContractionOpInterface
|
||||
structured_op: !LinalgStructuredOpConfig
|
||||
args:
|
||||
- !LinalgOperandDefConfig
|
||||
name: A
|
||||
kind: input_tensor
|
||||
type_var: T1
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: B
|
||||
kind: input_tensor
|
||||
type_var: T2
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: C
|
||||
kind: output_tensor
|
||||
type_var: U
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s2, s3)>
|
||||
indexing_maps: !LinalgIndexingMapsConfig
|
||||
static_indexing_maps:
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d1)>
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)>
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
|
||||
iterator_types:
|
||||
- parallel
|
||||
- parallel
|
||||
- parallel
|
||||
- reduction
|
||||
assignments:
|
||||
- !ScalarAssign
|
||||
arg: C
|
||||
value: !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: add
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: C
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: mul
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
fn_name: cast_signed
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: A
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
fn_name: cast_signed
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: B
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: batch_matmul_transpose_b
|
||||
cpp_class_name: BatchMatmulTransposeBOp
|
||||
doc: |-
|
||||
Performs a batched matrix multiplication of two 3D inputs where rhs operand
|
||||
has its non-batch dimensions transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
implements:
|
||||
- LinalgContractionOpInterface
|
||||
structured_op: !LinalgStructuredOpConfig
|
||||
args:
|
||||
- !LinalgOperandDefConfig
|
||||
name: A
|
||||
kind: input_tensor
|
||||
type_var: T1
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: B
|
||||
kind: input_tensor
|
||||
type_var: T2
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)>
|
||||
- !LinalgOperandDefConfig
|
||||
name: C
|
||||
kind: output_tensor
|
||||
type_var: U
|
||||
shape_map: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)>
|
||||
indexing_maps: !LinalgIndexingMapsConfig
|
||||
static_indexing_maps:
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)>
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d2, d3)>
|
||||
- affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)>
|
||||
iterator_types:
|
||||
- parallel
|
||||
- parallel
|
||||
- parallel
|
||||
- reduction
|
||||
assignments:
|
||||
- !ScalarAssign
|
||||
arg: C
|
||||
value: !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: add
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: C
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: binary
|
||||
fn_name: mul
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
fn_name: cast_signed
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: A
|
||||
- !ScalarExpression
|
||||
scalar_fn:
|
||||
kind: type
|
||||
fn_name: cast_signed
|
||||
type_var: U
|
||||
operands:
|
||||
- !ScalarExpression
|
||||
scalar_arg: B
|
||||
--- !LinalgOpConfig
|
||||
metadata: !LinalgOpMetadata
|
||||
name: quantized_batch_matmul
|
||||
cpp_class_name: QuantizedBatchMatmulOp
|
||||
|
@ -785,6 +785,9 @@ def MatmulOp : LinalgStructuredBase_Op<"matmul", [
|
||||
/// Returns a list of AffineMap with the default matmul indexing charactristic.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
|
||||
|
||||
/// Returns true if the AffineMap is the default matmul indexing charactristic.
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
|
||||
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap);
|
||||
|
||||
@ -1057,6 +1060,9 @@ def BatchMatmulOp : LinalgStructuredBase_Op<"batch_matmul", !listconcat([AttrSiz
|
||||
/// Returns a list with default AffineMap(s), i.e. without broadcasts and transpositions.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
|
||||
|
||||
/// Returns true if the AffineMap is the default batch matmul indexing charactristic.
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
|
||||
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap, bool isLHS = true);
|
||||
|
||||
@ -1181,6 +1187,9 @@ def BatchReduceMatmulOp : LinalgStructuredBase_Op<"batch_reduce_matmul", [
|
||||
/// Returns a list of AffineMap with the default batch_reduce_matmul indexing charactristic.
|
||||
static SmallVector<AffineMap> getDefaultIndexingMaps(MLIRContext *context);
|
||||
|
||||
/// Returns true if the AffineMap is the default batch reduce matmul indexing charactristic.
|
||||
static bool isDefaultIndexingMaps(Attribute attr);
|
||||
|
||||
/// Returns true if the given broadcast map \p bcastMap is valid for this op.
|
||||
bool isValidLhsRhsBroadcastMap(AffineMap bcastMap, bool isLHS = true);
|
||||
|
||||
|
@ -194,9 +194,10 @@ static void buildMatmulOp(OpBuilder &b, OperationState &state,
|
||||
ArrayRef<AffineMap> indexingMaps) {
|
||||
// Initialize indexingMaps attribute, for MatmulOp.
|
||||
SmallVector<Attribute, 3> indexingMapsAttrVal;
|
||||
indexingMapsAttrVal = llvm::map_to_vector(
|
||||
MatmulOp::getDefaultIndexingMaps(b.getContext()),
|
||||
[](AffineMap map) -> Attribute { return AffineMapAttr::get(map); });
|
||||
indexingMapsAttrVal =
|
||||
llvm::map_to_vector(indexingMaps, [](AffineMap map) -> Attribute {
|
||||
return AffineMapAttr::get(map);
|
||||
});
|
||||
state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
|
||||
return buildStructuredOp(b, state, resultTensorTypes, inputs, outputs,
|
||||
attributes, regionBuilder);
|
||||
@ -3749,6 +3750,25 @@ std::pair<int64_t, int64_t> getFmrFromWinogradConv2DFmr(WinogradConv2DFmr fmr) {
|
||||
// MatMulOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static FailureOr<SmallVector<SmallVector<int64_t>>>
|
||||
getAffineResultPositions(ArrayAttr maps) {
|
||||
SmallVector<SmallVector<int64_t>> positions;
|
||||
for (auto map : maps) {
|
||||
AffineMapAttr attr = dyn_cast<AffineMapAttr>(map);
|
||||
if (!attr)
|
||||
return failure();
|
||||
SmallVector<int64_t> pos;
|
||||
for (auto result : attr.getAffineMap().getResults()) {
|
||||
auto dim = dyn_cast<AffineDimExpr>(result);
|
||||
if (!dim)
|
||||
return failure();
|
||||
pos.push_back(dim.getPosition());
|
||||
}
|
||||
positions.push_back(pos);
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
|
||||
/// Returns a list of AffineMap with the typical matmul indexing charactristic.
|
||||
SmallVector<AffineMap> MatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
|
||||
AffineExpr d0, d1, d2;
|
||||
@ -3760,6 +3780,20 @@ SmallVector<AffineMap> MatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
|
||||
return indexingMaps;
|
||||
}
|
||||
|
||||
bool MatmulOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 2} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{2, 1} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1};
|
||||
}
|
||||
|
||||
SmallVector<utils::IteratorType> MatmulOp::getIteratorTypesArray() {
|
||||
return SmallVector<utils::IteratorType>{utils::IteratorType::parallel,
|
||||
utils::IteratorType::parallel,
|
||||
@ -3912,6 +3946,380 @@ Speculation::Speculatability MatmulOp::getSpeculatability() {
|
||||
return getGenericSpeculatabilityImpl(cast<LinalgOp>(getOperation()));
|
||||
}
|
||||
|
||||
SmallVector<AffineMap>
|
||||
MatmulTransposeAOp::getDefaultIndexingMaps(OpBuilder &builder) {
|
||||
AffineExpr d0, d1, d2;
|
||||
MLIRContext *context = builder.getContext();
|
||||
bindDims(context, d0, d1, d2);
|
||||
AffineMap mapLHS = AffineMap::get(3, 0, {d2, d0}, context);
|
||||
AffineMap mapRHS = AffineMap::get(3, 0, {d2, d1}, context);
|
||||
AffineMap mapOut = AffineMap::get(3, 0, {d0, d1}, context);
|
||||
return {mapLHS, mapRHS, mapOut};
|
||||
}
|
||||
|
||||
bool MatmulTransposeAOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{2, 0} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{2, 1} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1};
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeAOp
|
||||
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeAOp
|
||||
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeAOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
result.addAttribute("cast", cast);
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeAOp
|
||||
MatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
bool MatmulTransposeAOp::classof(Operation *op) {
|
||||
return dyn_cast_or_null<linalg::MatmulOp>(op) &&
|
||||
MatmulTransposeAOp::isDefaultIndexingMaps(
|
||||
op->getAttr("indexing_maps"));
|
||||
}
|
||||
|
||||
SmallVector<AffineMap>
|
||||
MatmulTransposeBOp::getDefaultIndexingMaps(OpBuilder &builder) {
|
||||
AffineExpr d0, d1, d2;
|
||||
MLIRContext *context = builder.getContext();
|
||||
bindDims(context, d0, d1, d2);
|
||||
AffineMap mapLHS = AffineMap::get(3, 0, {d0, d2}, context);
|
||||
AffineMap mapRHS = AffineMap::get(3, 0, {d1, d2}, context);
|
||||
AffineMap mapOut = AffineMap::get(3, 0, {d0, d1}, context);
|
||||
return {mapLHS, mapRHS, mapOut};
|
||||
}
|
||||
|
||||
bool MatmulTransposeBOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 2} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{1, 2} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1};
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeBOp
|
||||
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeBOp
|
||||
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::MatmulTransposeBOp::build(OpBuilder &builder,
|
||||
OperationState &result,
|
||||
TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
result.addAttribute("cast", cast);
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
MatmulOp::getRegionBuilder(), getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
MatmulTransposeBOp
|
||||
MatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
|
||||
auto res = dyn_cast<MatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
bool MatmulTransposeBOp::classof(Operation *op) {
|
||||
return dyn_cast_or_null<linalg::MatmulOp>(op) &&
|
||||
MatmulTransposeBOp::isDefaultIndexingMaps(
|
||||
op->getAttr("indexing_maps"));
|
||||
}
|
||||
|
||||
SmallVector<AffineMap>
|
||||
BatchMatmulTransposeAOp::getDefaultIndexingMaps(OpBuilder &builder) {
|
||||
AffineExpr d0, d1, d2, d3;
|
||||
MLIRContext *context = builder.getContext();
|
||||
bindDims(context, d0, d1, d2, d3);
|
||||
AffineMap mapLHS = AffineMap::get(4, 0, {d0, d3, d1}, context);
|
||||
AffineMap mapRHS = AffineMap::get(4, 0, {d0, d3, d2}, context);
|
||||
AffineMap mapOut = AffineMap::get(4, 0, {d0, d1, d2}, context);
|
||||
return {mapLHS, mapRHS, mapOut};
|
||||
}
|
||||
|
||||
bool BatchMatmulTransposeAOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 3, 1} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeAOp::build(
|
||||
OpBuilder &builder, OperationState &result, ValueRange inputs,
|
||||
ValueRange outputs, ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeAOp
|
||||
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeAOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeAOp
|
||||
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeAOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
result.addAttribute("cast", cast);
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeAOp
|
||||
BatchMatmulTransposeAOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeAOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
bool BatchMatmulTransposeAOp::classof(Operation *op) {
|
||||
return dyn_cast_or_null<linalg::BatchMatmulOp>(op) &&
|
||||
BatchMatmulTransposeAOp::isDefaultIndexingMaps(
|
||||
op->getAttr("indexing_maps"));
|
||||
}
|
||||
|
||||
SmallVector<AffineMap>
|
||||
BatchMatmulTransposeBOp::getDefaultIndexingMaps(OpBuilder &builder) {
|
||||
AffineExpr d0, d1, d2, d3;
|
||||
MLIRContext *context = builder.getContext();
|
||||
bindDims(context, d0, d1, d2, d3);
|
||||
AffineMap mapLHS = AffineMap::get(4, 0, {d0, d1, d3}, context);
|
||||
AffineMap mapRHS = AffineMap::get(4, 0, {d0, d2, d3}, context);
|
||||
AffineMap mapOut = AffineMap::get(4, 0, {d0, d1, d2}, context);
|
||||
return {mapLHS, mapRHS, mapOut};
|
||||
}
|
||||
|
||||
bool BatchMatmulTransposeBOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{0, 2, 3} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeBOp::build(
|
||||
OpBuilder &builder, OperationState &result, ValueRange inputs,
|
||||
ValueRange outputs, ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, std::nullopt, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeBOp
|
||||
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeBOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeBOp
|
||||
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
void linalg::BatchMatmulTransposeBOp::build(
|
||||
OpBuilder &builder, OperationState &result, TypeRange resultTensorTypes,
|
||||
ValueRange inputs, ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
result.addAttribute("cast", cast);
|
||||
buildMatmulOp(builder, result, resultTensorTypes, inputs, outputs, attributes,
|
||||
BatchMatmulOp::getRegionBuilder(),
|
||||
getDefaultIndexingMaps(builder));
|
||||
}
|
||||
|
||||
BatchMatmulTransposeBOp
|
||||
BatchMatmulTransposeBOp::create(OpBuilder &builder, Location location,
|
||||
TypeRange resultTensorTypes, ValueRange inputs,
|
||||
ValueRange outputs, Attribute cast,
|
||||
ArrayRef<NamedAttribute> attributes) {
|
||||
OperationState state(location, getOperationName());
|
||||
build(builder, state, resultTensorTypes, inputs, outputs, cast, attributes);
|
||||
auto res = dyn_cast<BatchMatmulTransposeBOp>(builder.create(state));
|
||||
assert(res && "builder didn't return the right type");
|
||||
return res;
|
||||
}
|
||||
|
||||
bool BatchMatmulTransposeBOp::classof(Operation *op) {
|
||||
return dyn_cast_or_null<linalg::BatchMatmulOp>(op) &&
|
||||
BatchMatmulTransposeBOp::isDefaultIndexingMaps(
|
||||
op->getAttr("indexing_maps"));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ContractOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -4120,6 +4528,20 @@ BatchMatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
|
||||
return indexingMaps;
|
||||
}
|
||||
|
||||
bool BatchMatmulOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{0, 1, 2};
|
||||
}
|
||||
|
||||
SmallVector<utils::IteratorType> BatchMatmulOp::getIteratorTypesArray() {
|
||||
return SmallVector<utils::IteratorType>{
|
||||
utils::IteratorType::parallel, utils::IteratorType::parallel,
|
||||
@ -5646,6 +6068,19 @@ BatchReduceMatmulOp::getDefaultIndexingMaps(MLIRContext *context) {
|
||||
return indexingMaps;
|
||||
}
|
||||
|
||||
bool BatchReduceMatmulOp::isDefaultIndexingMaps(Attribute attr) {
|
||||
ArrayAttr maps = dyn_cast<ArrayAttr>(attr);
|
||||
if (!maps)
|
||||
return false;
|
||||
if (maps.size() != 3)
|
||||
return false;
|
||||
auto positions = getAffineResultPositions(maps);
|
||||
if (failed(positions))
|
||||
return false;
|
||||
return (*positions)[0] == SmallVector<int64_t>{0, 1, 3} &&
|
||||
(*positions)[1] == SmallVector<int64_t>{0, 3, 2} &&
|
||||
(*positions)[2] == SmallVector<int64_t>{1, 2};
|
||||
}
|
||||
unsigned BatchReduceMatmulOp::getNumRegionArgs() { return 3; }
|
||||
|
||||
std::string BatchReduceMatmulOp::getLibraryCallName() {
|
||||
|
@ -320,10 +320,6 @@ void linalg::populateBlockPackMatmulPatterns(
|
||||
RewritePatternSet &patterns, const ControlBlockPackMatmulFn &controlFn) {
|
||||
patterns.add<BlockPackMatmul<linalg::GenericOp>,
|
||||
BlockPackMatmul<linalg::MatmulOp>,
|
||||
BlockPackMatmul<linalg::BatchMatmulOp>,
|
||||
BlockPackMatmul<linalg::MatmulTransposeAOp>,
|
||||
BlockPackMatmul<linalg::BatchMatmulTransposeAOp>,
|
||||
BlockPackMatmul<linalg::MatmulTransposeBOp>,
|
||||
BlockPackMatmul<linalg::BatchMatmulTransposeBOp>>(
|
||||
patterns.getContext(), controlFn);
|
||||
BlockPackMatmul<linalg::BatchMatmulOp>>(patterns.getContext(),
|
||||
controlFn);
|
||||
}
|
||||
|
@ -1052,12 +1052,8 @@ struct RankReduceMatmul : RankReduceContractionOps<FromOpTy, ToOpTy> {
|
||||
static bool constexpr reduceLeft =
|
||||
(std::is_same_v<FromOpTy, BatchMatmulOp> &&
|
||||
std::is_same_v<ToOpTy, BatchVecmatOp>) ||
|
||||
(std::is_same_v<FromOpTy, BatchMatmulTransposeAOp> &&
|
||||
std::is_same_v<ToOpTy, BatchVecmatOp>) ||
|
||||
(std::is_same_v<FromOpTy, MatmulOp> &&
|
||||
std::is_same_v<ToOpTy, VecmatOp>) ||
|
||||
(std::is_same_v<FromOpTy, MatmulTransposeAOp> &&
|
||||
std::is_same_v<ToOpTy, VecmatOp>) ||
|
||||
(std::is_same_v<FromOpTy, MatvecOp> && std::is_same_v<ToOpTy, DotOp>);
|
||||
|
||||
/// Look for non-batch spatial dims to collapse.
|
||||
@ -1113,27 +1109,15 @@ void mlir::linalg::populateContractionOpRankReducingPatterns(
|
||||
MLIRContext *context = patterns.getContext();
|
||||
// Unbatching patterns for unit batch size
|
||||
patterns.add<RankReduceToUnBatched<BatchMatmulOp, MatmulOp>>(context);
|
||||
patterns
|
||||
.add<RankReduceToUnBatched<BatchMatmulTransposeAOp, MatmulTransposeAOp>>(
|
||||
context);
|
||||
patterns
|
||||
.add<RankReduceToUnBatched<BatchMatmulTransposeBOp, MatmulTransposeBOp>>(
|
||||
context);
|
||||
patterns.add<RankReduceToUnBatched<BatchMatvecOp, MatvecOp>>(context);
|
||||
patterns.add<RankReduceToUnBatched<BatchVecmatOp, VecmatOp>>(context);
|
||||
|
||||
// Non-batch rank 1 reducing patterns
|
||||
patterns.add<RankReduceMatmul<MatmulOp, VecmatOp>>(context);
|
||||
patterns.add<RankReduceMatmul<MatmulOp, MatvecOp>>(context);
|
||||
patterns.add<RankReduceMatmul<MatmulTransposeAOp, VecmatOp>>(context);
|
||||
patterns.add<RankReduceMatmul<MatmulTransposeBOp, MatvecOp>>(context);
|
||||
// Batch rank 1 reducing patterns
|
||||
patterns.add<RankReduceMatmul<BatchMatmulOp, BatchVecmatOp>>(context);
|
||||
patterns.add<RankReduceMatmul<BatchMatmulOp, BatchMatvecOp>>(context);
|
||||
patterns.add<RankReduceMatmul<BatchMatmulTransposeAOp, BatchVecmatOp>>(
|
||||
context);
|
||||
patterns.add<RankReduceMatmul<BatchMatmulTransposeBOp, BatchMatvecOp>>(
|
||||
context);
|
||||
|
||||
// Non-batch rank 0 reducing patterns
|
||||
patterns.add<RankReduceMatmul<MatvecOp, DotOp>>(context);
|
||||
|
@ -234,19 +234,8 @@ static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
|
||||
|
||||
/// Codegen the different matmul variants.
|
||||
if (numOfBatchDims) {
|
||||
if (a == IndexMatchResult::Transposed)
|
||||
return replaceWithMatmulVariant<BatchMatmulTransposeAOp>(rewriter,
|
||||
genericOp);
|
||||
if (b == IndexMatchResult::Transposed)
|
||||
return replaceWithMatmulVariant<BatchMatmulTransposeBOp>(rewriter,
|
||||
genericOp);
|
||||
return replaceWithMatmulVariant<BatchMatmulOp>(rewriter, genericOp);
|
||||
}
|
||||
|
||||
if (a == IndexMatchResult::Transposed)
|
||||
return replaceWithMatmulVariant<MatmulTransposeAOp>(rewriter, genericOp);
|
||||
if (b == IndexMatchResult::Transposed)
|
||||
return replaceWithMatmulVariant<MatmulTransposeBOp>(rewriter, genericOp);
|
||||
return replaceWithMatmulVariant<MatmulOp>(rewriter, genericOp);
|
||||
}
|
||||
|
||||
|
@ -52,19 +52,19 @@ FailureOr<Operation *> mlir::linalg::transposeMatmul(RewriterBase &rewriter,
|
||||
dynamicDims.push_back(tensor::DimOp::create(rewriter, loc, input, 0));
|
||||
|
||||
ArrayRef<int64_t> shape = type.getShape();
|
||||
Value empty = tensor::EmptyOp::create(rewriter, loc,
|
||||
ArrayRef<int64_t>{shape[1], shape[0]},
|
||||
type.getElementType(), dynamicDims);
|
||||
auto transposeOp = linalg::TransposeOp::create(rewriter, loc, input, empty,
|
||||
ArrayRef<int64_t>{1, 0});
|
||||
Value empty = rewriter.create<tensor::EmptyOp>(
|
||||
loc, ArrayRef<int64_t>{shape[1], shape[0]}, type.getElementType(),
|
||||
dynamicDims);
|
||||
auto transposeOp = rewriter.create<linalg::TransposeOp>(
|
||||
loc, input, empty, ArrayRef<int64_t>{1, 0});
|
||||
Operation *newMatmulOp;
|
||||
if (transposeLHS) {
|
||||
newMatmulOp = linalg::MatmulTransposeAOp::create(
|
||||
newMatmulOp = MatmulTransposeAOp::create(
|
||||
rewriter, loc, matmulOp.getResultTypes(),
|
||||
ValueRange{transposeOp->getResult(0), matmulOp.getInputs()[1]},
|
||||
matmulOp.getOutputs());
|
||||
} else {
|
||||
newMatmulOp = linalg::MatmulTransposeBOp::create(
|
||||
newMatmulOp = MatmulTransposeBOp::create(
|
||||
rewriter, loc, matmulOp.getResultTypes(),
|
||||
ValueRange{matmulOp.getInputs()[0], transposeOp->getResult(0)},
|
||||
matmulOp.getOutputs());
|
||||
@ -112,16 +112,16 @@ mlir::linalg::transposeBatchMatmul(RewriterBase &rewriter,
|
||||
Value empty = tensor::EmptyOp::create(
|
||||
rewriter, loc, ArrayRef<int64_t>{shape[0], shape[2], shape[1]},
|
||||
type.getElementType(), dynamicDims);
|
||||
auto transposeOp = linalg::TransposeOp::create(rewriter, loc, input, empty,
|
||||
ArrayRef<int64_t>{0, 2, 1});
|
||||
auto transposeOp = rewriter.create<linalg::TransposeOp>(
|
||||
loc, input, empty, ArrayRef<int64_t>{0, 2, 1});
|
||||
Operation *newMatmulOp;
|
||||
if (transposeLHS) {
|
||||
newMatmulOp = linalg::BatchMatmulTransposeAOp::create(
|
||||
newMatmulOp = BatchMatmulTransposeAOp::create(
|
||||
rewriter, loc, batchMatmulOp.getResultTypes(),
|
||||
ValueRange{transposeOp->getResult(0), batchMatmulOp.getInputs()[1]},
|
||||
batchMatmulOp.getOutputs());
|
||||
} else {
|
||||
newMatmulOp = linalg::BatchMatmulTransposeBOp::create(
|
||||
newMatmulOp = BatchMatmulTransposeBOp::create(
|
||||
rewriter, loc, batchMatmulOp.getResultTypes(),
|
||||
ValueRange{batchMatmulOp.getInputs()[0], transposeOp->getResult(0)},
|
||||
batchMatmulOp.getOutputs());
|
||||
|
@ -2563,7 +2563,7 @@ vectorizeScalableVectorPrecondition(Operation *op,
|
||||
"vectorization";
|
||||
return failure();
|
||||
}
|
||||
if (isa<linalg::MatmulOp>(op) || isa<linalg::MatmulTransposeAOp>(op)) {
|
||||
if (isa<linalg::MatmulOp>(op)) {
|
||||
LDBG()
|
||||
<< "Scalable vectorization of the reduction dim in Matmul-like ops "
|
||||
"is not supported";
|
||||
@ -2604,15 +2604,9 @@ vectorizeScalableVectorPrecondition(Operation *op,
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Check to not let go the matmul with extended semantic, through this
|
||||
// transform.
|
||||
if (linalgOp.hasUserDefinedMaps())
|
||||
return failure();
|
||||
|
||||
// Cond 4: Only the following ops are supported in the
|
||||
// presence of scalable vectors
|
||||
return success(isElementwise(linalgOp) || isa<linalg::MatmulOp>(op) ||
|
||||
isa<linalg::MatmulTransposeAOp>(op) ||
|
||||
isa<linalg::DepthwiseConv1DNwcWcOp>(op) ||
|
||||
isa<linalg::MatvecOp>(op) || isa<linalg::Mmt4DOp>(op) ||
|
||||
hasReductionIterator(linalgOp));
|
||||
|
@ -373,42 +373,6 @@ def quantized_matmul(
|
||||
)
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def matmul_transpose_a(
|
||||
A=TensorDef(T1, S.K, S.N),
|
||||
B=TensorDef(T2, S.K, S.M),
|
||||
C=TensorDef(U, S.M, S.N, output=True),
|
||||
cast=TypeFnAttrDef(default=TypeFn.cast_signed),
|
||||
):
|
||||
"""Performs a matrix multiplication of two 2D inputs with lhs operand
|
||||
transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
"""
|
||||
domain(D.m, D.n, D.k)
|
||||
implements(ContractionOpInterface)
|
||||
C[D.m, D.n] += cast(U, A[D.k, D.m]) * cast(U, B[D.k, D.n])
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def matmul_transpose_b(
|
||||
A=TensorDef(T1, S.M, S.K),
|
||||
B=TensorDef(T2, S.N, S.K),
|
||||
C=TensorDef(U, S.M, S.N, output=True),
|
||||
cast=TypeFnAttrDef(default=TypeFn.cast_signed),
|
||||
):
|
||||
"""Performs a matrix multiplication of two 2D inputs with rhs operand
|
||||
transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
"""
|
||||
domain(D.m, D.n, D.k)
|
||||
implements(ContractionOpInterface)
|
||||
C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.n, D.k])
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def mmt4d(
|
||||
lhs=TensorDef(TV.LhsType, S.M, S.K, S.M0, S.K0),
|
||||
@ -453,44 +417,6 @@ def batch_mmt4d(
|
||||
) * TypeFn.cast_signed(TV.AccumType, rhs[D.b, D.n, D.k, D.n0, D.k0])
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def batch_matmul_transpose_a(
|
||||
A=TensorDef(T1, Batch, S.K, S.M),
|
||||
B=TensorDef(T2, Batch, S.K, S.N),
|
||||
C=TensorDef(U, Batch, S.M, S.N, output=True),
|
||||
):
|
||||
"""Performs a batched matrix multiplication of two 3D inputs where lhs operand
|
||||
has its non-batch dimensions transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
"""
|
||||
domain(D.b, D.m, D.n, D.k)
|
||||
implements(ContractionOpInterface)
|
||||
C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.k, D.m]) * TypeFn.cast_signed(
|
||||
U, B[D.b, D.k, D.n]
|
||||
)
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def batch_matmul_transpose_b(
|
||||
A=TensorDef(T1, Batch, S.M, S.K),
|
||||
B=TensorDef(T2, Batch, S.N, S.K),
|
||||
C=TensorDef(U, Batch, S.M, S.N, output=True),
|
||||
):
|
||||
"""Performs a batched matrix multiplication of two 3D inputs where rhs operand
|
||||
has its non-batch dimensions transposed.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
"""
|
||||
domain(D.b, D.m, D.n, D.k)
|
||||
implements(ContractionOpInterface)
|
||||
C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
|
||||
U, B[D.b, D.n, D.k]
|
||||
)
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def quantized_batch_matmul(
|
||||
A=TensorDef(T1, Batch, S.M, S.K),
|
||||
@ -512,25 +438,6 @@ def quantized_batch_matmul(
|
||||
) * (TypeFn.cast_signed(U, B[D.b, D.k, D.n]) - TypeFn.cast_signed(U, BZp))
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def batch_reduce_matmul(
|
||||
A=TensorDef(T1, Batch, S.M, S.K),
|
||||
B=TensorDef(T2, Batch, S.K, S.N),
|
||||
C=TensorDef(U, S.M, S.N, output=True),
|
||||
):
|
||||
"""Performs a batch-reduce matrix multiplication of two 3D inputs.
|
||||
The partial multiplication results are reduced into a 2D output.
|
||||
|
||||
Numeric casting is performed on the operands to the inner multiply, promoting
|
||||
them to the same data type as the accumulator/output.
|
||||
"""
|
||||
domain(D.b, D.m, D.n, D.k)
|
||||
implements(ContractionOpInterface)
|
||||
C[D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
|
||||
U, B[D.b, D.k, D.n]
|
||||
)
|
||||
|
||||
|
||||
@linalg_structured_op
|
||||
def matvec(
|
||||
A=TensorDef(T1, S.M, S.N), y=TensorDef(T2, S.N), x=TensorDef(U, S.M, output=True)
|
||||
|
@ -20,20 +20,6 @@ func.func @block_matmul(
|
||||
return %0 : tensor<64x64xf32>
|
||||
}
|
||||
|
||||
func.func @block_matmul_transpose_a(
|
||||
%A: tensor<128x64xf32>, %B: tensor<128x64xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
|
||||
%0 = linalg.matmul_transpose_a ins(%A, %B : tensor<128x64xf32>, tensor<128x64xf32>)
|
||||
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
|
||||
return %0 : tensor<64x64xf32>
|
||||
}
|
||||
|
||||
func.func @block_matmul_transpose_b(
|
||||
%A: tensor<64x128xf32>, %B: tensor<64x128xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
|
||||
%0 = linalg.matmul_transpose_b ins(%A, %B : tensor<64x128xf32>, tensor<64x128xf32>)
|
||||
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
|
||||
return %0 : tensor<64x64xf32>
|
||||
}
|
||||
|
||||
// MMT4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
|
||||
// MMT4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
|
||||
// MMT4D-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
|
||||
@ -43,18 +29,6 @@ func.func @block_matmul_transpose_b(
|
||||
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MMT4D-COUNT-1: linalg.unpack
|
||||
// MMT4D-LABEL: func @block_matmul_transpose_a
|
||||
// MMT4D-COUNT-3: linalg.pack
|
||||
// MMT4D: linalg.generic
|
||||
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MMT4D-COUNT-1: linalg.unpack
|
||||
// MMT4D-LABEL: func @block_matmul_transpose_b
|
||||
// MMT4D-COUNT-3: linalg.pack
|
||||
// MMT4D: linalg.generic
|
||||
// MMT4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MMT4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MMT4D-COUNT-1: linalg.unpack
|
||||
|
||||
// MM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
|
||||
// MM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
|
||||
@ -65,18 +39,6 @@ func.func @block_matmul_transpose_b(
|
||||
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MM4D-COUNT-1: linalg.unpack
|
||||
// MM4D-LABEL: func @block_matmul_transpose_a
|
||||
// MM4D-COUNT-3: linalg.pack
|
||||
// MM4D: linalg.generic
|
||||
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MM4D-COUNT-1: linalg.unpack
|
||||
// MM4D-LABEL: func @block_matmul_transpose_b
|
||||
// MM4D-COUNT-3: linalg.pack
|
||||
// MM4D: linalg.generic
|
||||
// MM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MM4D-COUNT-1: linalg.unpack
|
||||
|
||||
// MTM4D-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d5, d3)>
|
||||
// MTM4D-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d5, d4)>
|
||||
@ -87,15 +49,3 @@ func.func @block_matmul_transpose_b(
|
||||
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MTM4D-COUNT-1: linalg.unpack
|
||||
// MTM4D-LABEL: func @block_matmul_transpose_a
|
||||
// MTM4D-COUNT-3: linalg.pack
|
||||
// MTM4D: linalg.generic
|
||||
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MTM4D-COUNT-1: linalg.unpack
|
||||
// MTM4D-LABEL: func @block_matmul_transpose_b
|
||||
// MTM4D-COUNT-3: linalg.pack
|
||||
// MTM4D: linalg.generic
|
||||
// MTM4D-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// MTM4D-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// MTM4D-COUNT-1: linalg.unpack
|
||||
|
@ -197,150 +197,6 @@ func.func @block_batch_matmul(
|
||||
|
||||
// -----
|
||||
|
||||
func.func @block_matmul_transpose_a(
|
||||
%A: tensor<128x64xf32>, %B: tensor<128x64xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
|
||||
%0 = linalg.matmul_transpose_a ins(%A, %B : tensor<128x64xf32>, tensor<128x64xf32>)
|
||||
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
|
||||
return %0 : tensor<64x64xf32>
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
|
||||
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
|
||||
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
|
||||
|
||||
// CHECK-LABEL: func @block_matmul_transpose_a(
|
||||
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<128x64xf32>, %[[B:[0-9a-z]+]]: tensor<128x64xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
|
||||
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
|
||||
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
|
||||
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [32, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<128x64xf32> -> tensor<2x2x32x64xf32>
|
||||
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
|
||||
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
|
||||
// CHECK-SAME: outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [16, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<128x64xf32> -> tensor<4x2x16x64xf32>
|
||||
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
|
||||
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
|
||||
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
|
||||
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
|
||||
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
|
||||
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
|
||||
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
|
||||
|
||||
// -----
|
||||
|
||||
func.func @block_batch_matmul_transpose_a(
|
||||
%A: tensor<512x128x64xf32>, %B: tensor<512x128x64xf32>, %C: tensor<512x64x64xf32>) -> tensor<512x64x64xf32> {
|
||||
%0 = linalg.batch_matmul_transpose_a ins(%A, %B : tensor<512x128x64xf32>, tensor<512x128x64xf32>)
|
||||
outs(%C : tensor<512x64x64xf32>) -> tensor<512x64x64xf32>
|
||||
return %0 : tensor<512x64x64xf32>
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d3, d4, d6)>
|
||||
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d2, d3, d5, d6)>
|
||||
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d5)>
|
||||
|
||||
// CHECK-LABEL: func @block_batch_matmul_transpose_a(
|
||||
// CHECK-SAME: %[[A:.+]]: tensor<512x128x64xf32>, %[[B:.+]]: tensor<512x128x64xf32>, %[[C:.+]]: tensor<512x64x64xf32>
|
||||
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
|
||||
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [32, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x128x64xf32> -> tensor<512x2x2x32x64xf32>
|
||||
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
|
||||
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 2, 1] inner_dims_pos = [2, 1] inner_tiles = [16, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x128x64xf32> -> tensor<512x4x2x16x64xf32>
|
||||
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
|
||||
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
|
||||
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
|
||||
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
|
||||
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
|
||||
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
|
||||
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
|
||||
|
||||
// -----
|
||||
|
||||
func.func @block_matmul_transpose_b(
|
||||
%A: tensor<64x128xf32>, %B: tensor<64x128xf32>, %C: tensor<64x64xf32>) -> tensor<64x64xf32> {
|
||||
%0 = linalg.matmul_transpose_b ins(%A, %B : tensor<64x128xf32>, tensor<64x128xf32>)
|
||||
outs(%C : tensor<64x64xf32>) -> tensor<64x64xf32>
|
||||
return %0 : tensor<64x64xf32>
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
|
||||
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d4, d5)>
|
||||
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
|
||||
|
||||
// CHECK-LABEL: func @block_matmul_transpose_b(
|
||||
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<64x128xf32>, %[[B:[0-9a-z]+]]: tensor<64x128xf32>, %[[C:[0-9a-z]+]]: tensor<64x64xf32>
|
||||
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<2x2x32x64xf32>
|
||||
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [32, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<64x128xf32> -> tensor<2x2x32x64xf32>
|
||||
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<4x2x16x64xf32>
|
||||
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [16, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<64x128xf32> -> tensor<4x2x16x64xf32>
|
||||
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<2x4x32x16xf32>
|
||||
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
|
||||
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<64x64xf32> -> tensor<2x4x32x16xf32>
|
||||
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<2x2x32x64xf32>, tensor<4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<2x4x32x16xf32>)
|
||||
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
|
||||
// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[C]] : tensor<2x4x32x16xf32> -> tensor<64x64xf32>
|
||||
// CHECK: return %[[RES_UNPACKED]] : tensor<64x64xf32>
|
||||
|
||||
// -----
|
||||
|
||||
func.func @block_batch_matmul_transpose_b(
|
||||
%A: tensor<512x64x128xf32>, %B: tensor<512x64x128xf32>, %C: tensor<512x64x64xf32>) -> tensor<512x64x64xf32> {
|
||||
%0 = linalg.batch_matmul_transpose_b ins(%A, %B : tensor<512x64x128xf32>, tensor<512x64x128xf32>)
|
||||
outs(%C : tensor<512x64x64xf32>) -> tensor<512x64x64xf32>
|
||||
return %0 : tensor<512x64x64xf32>
|
||||
}
|
||||
|
||||
// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d3, d4, d6)>
|
||||
// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d2, d3, d5, d6)>
|
||||
// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d4, d5)>
|
||||
|
||||
// CHECK-LABEL: func @block_batch_matmul_transpose_b(
|
||||
// CHECK-SAME: %[[A:.+]]: tensor<512x64x128xf32>, %[[B:.+]]: tensor<512x64x128xf32>, %[[C:.+]]: tensor<512x64x64xf32>
|
||||
// CHECK: %[[PACK_DST_0:.+]] = tensor.empty() : tensor<512x2x2x32x64xf32>
|
||||
// CHECK: %[[A_PACKED:.+]] = linalg.pack %[[A]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [32, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_0]] : tensor<512x64x128xf32> -> tensor<512x2x2x32x64xf32>
|
||||
// CHECK: %[[PACK_DST_1:.+]] = tensor.empty() : tensor<512x4x2x16x64xf32>
|
||||
// CHECK: %[[B_PACKED:.+]] = linalg.pack %[[B]]
|
||||
// CHECK-SAME: outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [16, 64]
|
||||
// CHECK-SAME: into %[[PACK_DST_1]] : tensor<512x64x128xf32> -> tensor<512x4x2x16x64xf32>
|
||||
// CHECK: %[[PACK_DST_2:.+]] = tensor.empty() : tensor<512x2x4x32x16xf32>
|
||||
// CHECK: %[[C_PACKED:.+]] = linalg.pack %[[C]]
|
||||
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[PACK_DST_2]] : tensor<512x64x64xf32> -> tensor<512x2x4x32x16xf32>
|
||||
// CHECK: %[[GEMM_RES_PACKED:.+]] = linalg.generic
|
||||
// CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP2]]]
|
||||
// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]
|
||||
// CHECK-SAME: ins(%[[A_PACKED]], %[[B_PACKED]] : tensor<512x2x2x32x64xf32>, tensor<512x4x2x16x64xf32>) outs(%[[C_PACKED]] : tensor<512x2x4x32x16xf32>)
|
||||
// CHECK: %[[RES_UNPACKED:.+]] = linalg.unpack %[[GEMM_RES_PACKED]]
|
||||
// CHECK-SAME: inner_dims_pos = [1, 2] inner_tiles = [32, 16]
|
||||
// CHECK-SAME: into %[[C]] : tensor<512x2x4x32x16xf32> -> tensor<512x64x64xf32>
|
||||
// CHECK: return %[[RES_UNPACKED]] : tensor<512x64x64xf32>
|
||||
|
||||
// -----
|
||||
|
||||
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
|
||||
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
|
||||
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||
|
@ -157,36 +157,6 @@ module attributes {transform.with_named_sequence} {
|
||||
|
||||
// -----
|
||||
|
||||
!type = tensor<2048x2048xf32>
|
||||
func.func @fold_add_on_transposed_matmuls(%arg0: !type, %arg1: !type) -> !type {
|
||||
%0 = arith.constant dense<1.111111e+00> : !type
|
||||
%cst = arith.constant 0.000000e+00 : f32
|
||||
%1 = tensor.empty() : !type
|
||||
%2 = linalg.fill ins(%cst : f32) outs(%1 : !type) -> !type
|
||||
%3 = linalg.matmul_transpose_a ins(%arg0, %0 : !type, !type) outs(%2 : !type) -> !type
|
||||
%4 = linalg.matmul_transpose_b ins(%arg1, %0 : !type, !type) outs(%2 : !type) -> !type
|
||||
%5 = linalg.add ins(%3, %4 : !type, !type) outs(%1 : !type) -> !type
|
||||
return %5 : !type
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func.func @fold_add_on_transposed_matmuls
|
||||
// CHECK: %[[ACC:.+]] = linalg.matmul_transpose_a
|
||||
// CHECK-NEXT: %[[RES:.+]] = linalg.matmul_transpose_b ins({{.+}}) outs(%[[ACC]]
|
||||
// CHECK-NOT: linalg.add
|
||||
// CHECK-NEXT: return %[[RES]]
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
|
||||
%func = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
transform.apply_patterns to %func {
|
||||
transform.apply_patterns.linalg.fold_add_into_dest
|
||||
} : !transform.any_op
|
||||
transform.yield
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
!type = tensor<2048x2048xf32>
|
||||
func.func @expect_no_fold_of_add_as_dominated_op_is_not_a_contraction(%arg0: !type, %arg1: !type) -> !type {
|
||||
%0 = arith.constant dense<1.111111e+00> : !type
|
||||
|
@ -1222,17 +1222,6 @@ func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @matmul_transpose_a
|
||||
// CHECK: linalg.matmul_transpose_a
|
||||
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<5x3xf32>, memref<5x7xf32>)
|
||||
// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>)
|
||||
func.func @matmul_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5x7xf32>, %arg2: memref<3x7xf32>) {
|
||||
linalg.matmul_transpose_a ins(%arg0, %arg1 : memref<5x3xf32>, memref<5x7xf32>) outs(%arg2: memref<3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @matmul_transpose_a_explicit
|
||||
// CHECK: linalg.matmul
|
||||
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<5x3xf32>, memref<5x7xf32>)
|
||||
@ -1478,17 +1467,6 @@ func.func @matmul_bcast_b_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5xf3
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @matmul_transpose_b
|
||||
// CHECK: linalg.matmul_transpose_b
|
||||
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<3x5xf32>, memref<7x5xf32>)
|
||||
// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>)
|
||||
func.func @matmul_transpose_b(%arg0: memref<3x5xf32>, %arg1: memref<7x5xf32>, %arg2: memref<3x7xf32>) {
|
||||
linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) outs(%arg2: memref<3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
|
||||
// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
|
||||
// CHECK: #[[$ATTR_2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
@ -1806,28 +1784,6 @@ func.func @bcast_A_transpose_B(%A: memref<3x5xf32>, %B: memref<2x7x5xf32>, %C: m
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @batchmatmul_transpose_a
|
||||
// CHECK: linalg.batch_matmul_transpose_a
|
||||
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x5x3xf32>, memref<2x5x7xf32>)
|
||||
// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>)
|
||||
func.func @batchmatmul_transpose_a(%arg0: memref<2x5x3xf32>, %arg1: memref<2x5x7xf32>, %arg2: memref<2x3x7xf32>) {
|
||||
linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : memref<2x5x3xf32>, memref<2x5x7xf32>) outs(%arg2: memref<2x3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: func @batchmatmul_transpose_b
|
||||
// CHECK: linalg.batch_matmul_transpose_b
|
||||
// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x3x5xf32>, memref<2x7x5xf32>)
|
||||
// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>)
|
||||
func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5xf32>, %arg2: memref<2x3x7xf32>) {
|
||||
linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) outs(%arg2: memref<2x3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
|
||||
// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
|
||||
// CHECK: #[[$ATTR_2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
|
@ -92,38 +92,6 @@ func.func @singleton_batch_vecmat(%arg0 : tensor<1x?xf32>, %arg1 : tensor<1x?x?x
|
||||
|
||||
// -----
|
||||
|
||||
func.func @singleton_batchmatmul_transpose_a(%arg0: memref<1x5x3xf32>, %arg1: memref<1x5x7xf32>, %arg2: memref<1x3x7xf32>) {
|
||||
// CHECK-LABEL: @singleton_batchmatmul_transpose_a
|
||||
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: memref<1x5x3xf32>
|
||||
// CHECK-SAME: %[[RHS:[a-zA-Z0-9]+]]: memref<1x5x7xf32>
|
||||
// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: memref<1x3x7xf32>
|
||||
// CHECK-NEXT: %[[COLLAPSED_LHS:.*]] = memref.collapse_shape %[[LHS]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: %[[COLLAPSED_RHS:.*]] = memref.collapse_shape %[[RHS]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: %[[COLLAPSED_INIT:.*]] = memref.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: linalg.matmul_transpose_a ins(%[[COLLAPSED_LHS]], %[[COLLAPSED_RHS]] : memref<5x3xf32>, memref<5x7xf32>) outs(%[[COLLAPSED_INIT]] : memref<3x7xf32>)
|
||||
// CHECK-NEXT: return
|
||||
linalg.batch_matmul_transpose_a ins(%arg0, %arg1 : memref<1x5x3xf32>, memref<1x5x7xf32>) outs(%arg2: memref<1x3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @singleton_batchmatmul_transpose_b(%arg0: memref<1x3x5xf32>, %arg1: memref<1x7x5xf32>, %arg2: memref<1x3x7xf32>) {
|
||||
// CHECK-LABEL: @singleton_batchmatmul_transpose_b
|
||||
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: memref<1x3x5xf32>
|
||||
// CHECK-SAME: %[[RHS:[a-zA-Z0-9]+]]: memref<1x7x5xf32>
|
||||
// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: memref<1x3x7xf32>
|
||||
// CHECK-NEXT: %[[COLLAPSED_LHS:.*]] = memref.collapse_shape %[[LHS]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: %[[COLLAPSED_RHS:.*]] = memref.collapse_shape %[[RHS]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: %[[COLLAPSED_INIT:.*]] = memref.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]]
|
||||
// CHECK-NEXT: linalg.matmul_transpose_b ins(%[[COLLAPSED_LHS]], %[[COLLAPSED_RHS]] : memref<3x5xf32>, memref<7x5xf32>) outs(%[[COLLAPSED_INIT]] : memref<3x7xf32>)
|
||||
// CHECK-NEXT: return
|
||||
linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<1x3x5xf32>, memref<1x7x5xf32>) outs(%arg2: memref<1x3x7xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @matmul_to_matvec_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x1xf32>, %arg2: tensor<?x1xf32>) -> tensor<?x1xf32> {
|
||||
// CHECK-LABEL: @matmul_to_matvec_tensor
|
||||
// CHECK-SAME: %[[LHS:[a-zA-Z0-9]+]]: tensor<?x?xf32>
|
||||
@ -226,59 +194,6 @@ func.func @matvec_to_dot_tensor(%arg0: tensor<1x?xf32>, %arg1: tensor<?xf32>, %a
|
||||
|
||||
// -----
|
||||
|
||||
func.func @matmul_transpose_a_to_vecmat(%arg0: tensor<256x1xf32>, %arg1: tensor<256x512xf32>, %arg2: tensor<1x512xf32>) -> tensor<1x512xf32> {
|
||||
// CHECK-LABEL: @matmul_transpose_a_to_vecmat
|
||||
// CHECK: collapse_shape {{.*}} into tensor<256xf32>
|
||||
// CHECK: collapse_shape {{.*}} into tensor<512xf32>
|
||||
// CHECK: linalg.vecmat
|
||||
// CHECK: expand_shape {{.*}} into tensor<1x512xf32>
|
||||
%0 = linalg.matmul_transpose_a ins(%arg0, %arg1: tensor<256x1xf32>, tensor<256x512xf32>) outs(%arg2: tensor<1x512xf32>) -> tensor<1x512xf32>
|
||||
return %0 : tensor<1x512xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @batch_matmul_transpose_a_to_batch_vecmat(%arg0: tensor<64x256x1xf32>, %arg1: tensor<64x256x512xf32>, %arg2: tensor<64x1x512xf32>) -> tensor<64x1x512xf32> {
|
||||
// CHECK-LABEL: @batch_matmul_transpose_a_to_batch_vecmat
|
||||
// CHECK: collapse_shape {{.*}} into tensor<64x256xf32>
|
||||
// CHECK: collapse_shape {{.*}} into tensor<64x512xf32>
|
||||
// CHECK: linalg.batch_vecmat
|
||||
// CHECK: expand_shape {{.*}} into tensor<64x1x512xf32>
|
||||
%0 = linalg.batch_matmul_transpose_a ins(%arg0, %arg1: tensor<64x256x1xf32>, tensor<64x256x512xf32>) outs(%arg2: tensor<64x1x512xf32>) -> tensor<64x1x512xf32>
|
||||
return %0 : tensor<64x1x512xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @matmul_transpose_b_to_matvec(%arg0: memref<?x?xf32>, %arg1: memref<1x?xf32>, %arg2: memref<?x1xf32>) {
|
||||
// CHECK-LABEL: @matmul_transpose_b_to_matvec
|
||||
// CHECK: linalg.matvec
|
||||
linalg.matmul_transpose_b ins(%arg0, %arg1: memref<?x?xf32>, memref<1x?xf32>) outs(%arg2: memref<?x1xf32>)
|
||||
return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @batchmatmul_transpose_b_to_batchmatvec_tensor(%arg0: tensor<64x128x256xf32>, %arg1: tensor<64x1x256xf32>, %arg2: tensor<64x128x1xf32>) -> tensor<64x128x1xf32> {
|
||||
// CHECK: collapse_shape {{.*}} into tensor<64x256xf32>
|
||||
// CHECK: collapse_shape {{.*}} into tensor<64x128xf32>
|
||||
// CHECK: linalg.batch_matvec
|
||||
// CHECK: expand_shape {{.*}} into tensor<64x128x1xf32>
|
||||
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1: tensor<64x128x256xf32>, tensor<64x1x256xf32>) outs(%arg2: tensor<64x128x1xf32>) -> tensor<64x128x1xf32>
|
||||
return %0 : tensor<64x128x1xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @batchmatmul_transpose_b_to_to_dot(%arg0: tensor<1x1x?xf32>, %arg1: tensor<1x1x?xf32>, %arg2: tensor<1x1x1xf32>) -> tensor<1x1x1xf32> {
|
||||
// CHECK-LABEL: @batchmatmul_transpose_b_to_to_dot
|
||||
// CHECK: linalg.dot
|
||||
%0 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1: tensor<1x1x?xf32>, tensor<1x1x?xf32>) outs(%arg2: tensor<1x1x1xf32>) -> tensor<1x1x1xf32>
|
||||
return %0 : tensor<1x1x1xf32>
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @nonsingleton_batch_matmul(%arg0 : tensor<2x?x?xf32>, %arg1 : tensor<2x?x?xf32>, %arg2: tensor<2x?x?xf32>) -> tensor<2x?x?xf32> {
|
||||
// CHECK-LABEL: @nonsingleton_batch_matmul
|
||||
// CHECK-NOT: collapse_shape
|
||||
|
@ -504,7 +504,7 @@ func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
|
||||
%0 = transform.structured.match ops{["linalg.matmul_transpose_b"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
|
||||
%c10 = transform.param.constant 10 : i64 -> !transform.param<i64>
|
||||
%c20 = transform.param.constant 20 : i64 -> !transform.param<i64>
|
||||
%sz = transform.merge_handles %c10, %c20 : !transform.param<i64>
|
||||
|
@ -465,14 +465,14 @@ module attributes {transform.with_named_sequence} {
|
||||
// CHECK: %[[RHS:.*]] = tensor.pad
|
||||
// CHECK: scf.for
|
||||
// CHECK-DAG: tensor.extract_slice %[[LHS]][0, %{{.*}}] [%{{.*}}, 32]
|
||||
// CHECK-DAG: tensor.extract_slice %[[RHS]][0, %{{.*}}] [%{{.*}}, 32]
|
||||
// CHECK-DAG: tensor.extract_slice %[[RHS]][%{{.*}}, 0] [32, %{{.*}}]
|
||||
func.func @dyn_pad_tiling(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
%0 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
return %0 : tensor<?x?xf32>
|
||||
}
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
|
||||
%0 = transform.structured.match ops{["linalg.matmul_transpose_b"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%padded, %pad, %copy = transform.structured.pad %0 pad_to_multiple_of [32] use_prescribed_tensor_shapes {padding_dimensions = [2], padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
||||
%tiled_linalg_op, %loops = transform.structured.tile_using_for %padded tile_sizes [0, 0, 32] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
|
||||
%1 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
|
@ -30,66 +30,6 @@ module attributes {transform.with_named_sequence} {
|
||||
|
||||
// -----
|
||||
|
||||
#map = affine_map<(d0, d1, d2) -> (d2, d0)>
|
||||
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
|
||||
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||
func.func @matmul_transpose_a(%arg0: memref<5x3xf32>, %arg1: memref<5x7xf32>, %arg2: memref<3x7xf32>) {
|
||||
linalg.generic
|
||||
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
ins(%arg0, %arg1 : memref<5x3xf32>, memref<5x7xf32>) outs(%arg2 : memref<3x7xf32>) {
|
||||
^bb0(%in: f32, %in_0: f32, %out: f32):
|
||||
%0 = arith.mulf %in, %in_0 : f32
|
||||
%1 = arith.addf %out, %0 : f32
|
||||
linalg.yield %1 : f32
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @matmul_transpose_a
|
||||
// CHECK-SAME: %[[ARG0:.+]]: memref<5x3xf32>, %[[ARG1:.+]]: memref<5x7xf32>, %[[ARG2:.+]]: memref<3x7xf32>) {
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: linalg.matmul_transpose_a ins(%[[ARG0]], %[[ARG1]] : memref<5x3xf32>, memref<5x7xf32>) outs(%[[ARG2]] : memref<3x7xf32>)
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
|
||||
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
|
||||
transform.yield
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
|
||||
#map1 = affine_map<(d0, d1, d2) -> (d1, d2)>
|
||||
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||
func.func @matmul_transpose_b(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
|
||||
%0 = linalg.generic
|
||||
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) {
|
||||
^bb0(%in: f32, %in_0: f32, %out: f32):
|
||||
%1 = arith.mulf %in, %in_0 : f32
|
||||
%2 = arith.addf %out, %1 : f32
|
||||
linalg.yield %2 : f32
|
||||
} -> tensor<?x?xf32>
|
||||
return %0 : tensor<?x?xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @matmul_transpose_b
|
||||
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?xf32>, %[[ARG1:.+]]: tensor<?x?xf32>, %[[ARG2:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: linalg.matmul_transpose_b ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
|
||||
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
|
||||
transform.yield
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
|
||||
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
|
||||
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
@ -117,32 +57,3 @@ module attributes {transform.with_named_sequence} {
|
||||
transform.yield
|
||||
}
|
||||
}
|
||||
|
||||
// -----
|
||||
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
|
||||
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
|
||||
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
func.func @batch_matmul_transpose_b(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
|
||||
%0 = linalg.generic
|
||||
{indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
|
||||
ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%arg2 : tensor<?x?x?xf32>) {
|
||||
^bb0(%in: f32, %in_0: f32, %out: f32):
|
||||
%1 = arith.mulf %in, %in_0 : f32
|
||||
%2 = arith.addf %out, %1 : f32
|
||||
linalg.yield %2 : f32
|
||||
} -> tensor<?x?x?xf32>
|
||||
return %0 : tensor<?x?x?xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @batch_matmul_transpose_b
|
||||
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>, %[[ARG1:.+]]: tensor<?x?x?xf32>, %[[ARG2:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: linalg.batch_matmul_transpose_b ins(%[[ARG0]], %[[ARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[ARG2]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
|
||||
%0 = transform.structured.match interface{LinalgOp} in %arg0 : (!transform.any_op) -> !transform.any_op
|
||||
%1 = transform.structured.specialize %0 : (!transform.any_op) -> !transform.any_op
|
||||
transform.yield
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,20 @@
|
||||
// RUN: mlir-opt -transform-preload-library='transform-library-paths=%p/transpose-matmul-a.mlir' -transform-interpreter -split-input-file %s | FileCheck %s --check-prefixes=CHECK,TRANSPOSE-A
|
||||
// RUN: mlir-opt -transform-preload-library='transform-library-paths=%p/transpose-matmul-b.mlir' -transform-interpreter -split-input-file %s | FileCheck %s --check-prefixes=CHECK,TRANSPOSE-B
|
||||
|
||||
// TRANSPOSE-A-DAG: #[[$MA:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
|
||||
// TRANSPOSE-A-DAG: #[[$MB:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
|
||||
// TRANSPOSE-A-DAG: #[[$MC:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||
// TRANSPOSE-A-DAG: #[[$BMA:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
|
||||
// TRANSPOSE-A-DAG: #[[$BMB:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
|
||||
// TRANSPOSE-A-DAG: #[[$BMC:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
|
||||
// TRANSPOSE-B-DAG: #[[$MA:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>
|
||||
// TRANSPOSE-B-DAG: #[[$MB:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)>
|
||||
// TRANSPOSE-B-DAG: #[[$MC:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>
|
||||
// TRANSPOSE-B-DAG: #[[$BMA:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
|
||||
// TRANSPOSE-B-DAG: #[[$BMB:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
|
||||
// TRANSPOSE-B-DAG: #[[$BMC:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
|
||||
|
||||
// CHECK-LABEL: func.func @matmul_static(
|
||||
// CHECK-SAME: %[[A:.*]]: tensor<16x8xf32>,
|
||||
// CHECK-SAME: %[[B:.*]]: tensor<8x16xf32>) -> tensor<16x16xf32> {
|
||||
@ -9,10 +23,10 @@
|
||||
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<16x16xf32>) -> tensor<16x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty() : tensor<8x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<16x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<8x16xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<8x16xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<8x16xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<16x8xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<16x8xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<16x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<16x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<16x16xf32>) -> tensor<16x16xf32>
|
||||
// CHECK: return %[[C]] : tensor<16x16xf32>
|
||||
// CHECK: }
|
||||
func.func @matmul_static(%A: tensor<16x8xf32>, %B: tensor<8x16xf32>) -> (tensor<16x16xf32>) {
|
||||
@ -38,11 +52,11 @@ func.func @matmul_static(%A: tensor<16x8xf32>, %B: tensor<8x16xf32>) -> (tensor<
|
||||
// TRANSPOSE-A: %[[A_DIM1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM1]], %[[A_DIM0]]) : tensor<?x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x?xf32>) outs(%[[A_TRANSP_INIT]] : tensor<?x?xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_DIM0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty(%[[B_DIM1]], %[[B_DIM0]]) : tensor<?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<?x?xf32>) outs(%[[B_TRANSP_INIT]] : tensor<?x?xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
// CHECK: return %[[C]] : tensor<?x?xf32>
|
||||
// CHECK: }
|
||||
func.func @matmul_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
|
||||
@ -69,10 +83,10 @@ func.func @matmul_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>) -> (tensor<?
|
||||
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<?x16xf32>) -> tensor<?x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM0]]) : tensor<8x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<8x?xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-A: %[[B0:.*]] = linalg.matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<8x?xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
|
||||
// TRANSPOSE-A: %[[B0:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<8x?xf32>, tensor<8x16xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<16x8xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<16x8xf32>) permutation = [1, 0]
|
||||
// TRANSPOSE-B: %[[B0:.*]] = linalg.matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
|
||||
// TRANSPOSE-B: %[[B0:.*]] = linalg.matmul indexing_maps = [#[[$MA]], #[[$MB]], #[[$MC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x8xf32>, tensor<16x8xf32>) outs(%[[C_ZERO]] : tensor<?x16xf32>) -> tensor<?x16xf32>
|
||||
// CHECK: return %[[B0]] : tensor<?x16xf32>
|
||||
// CHECK: }
|
||||
func.func @matmul_mixed(%A: tensor<?x8xf32>, %B: tensor<8x16xf32>) -> (tensor<?x16xf32>) {
|
||||
@ -96,10 +110,10 @@ func.func @matmul_mixed(%A: tensor<?x8xf32>, %B: tensor<8x16xf32>) -> (tensor<?x
|
||||
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x8x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<2x16x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<2x8x16xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x16xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x16xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x16x8xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<2x8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<2x16x8xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
|
||||
// CHECK: return %[[C]] : tensor<2x16x16xf32>
|
||||
// CHECK: }
|
||||
func.func @batch_matmul_static(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>) -> (tensor<2x16x16xf32>) {
|
||||
@ -127,12 +141,12 @@ func.func @batch_matmul_static(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>) -
|
||||
// TRANSPOSE-A: %[[A_DIM2:.*]] = tensor.dim %[[A]], %[[C2]] : tensor<?x?x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM0]], %[[A_DIM2]], %[[A_DIM1]]) : tensor<?x?x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<?x?x?xf32>) outs(%[[A_TRANSP_INIT]] : tensor<?x?x?xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
// TRANSPOSE-A: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_DIM0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_DIM1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor<?x?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty(%[[B_DIM0]], %[[B_DIM2]], %[[B_DIM1]]) : tensor<?x?x?xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<?x?x?xf32>) outs(%[[B_TRANSP_INIT]] : tensor<?x?x?xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
// TRANSPOSE-B: %[[C:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[C_ZERO]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
|
||||
// CHECK: return %[[C]] : tensor<?x?x?xf32>
|
||||
// CHECK: }
|
||||
func.func @batch_matmul_dynamic(%A: tensor<?x?x?xf32>, %B: tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>) {
|
||||
@ -161,10 +175,10 @@ func.func @batch_matmul_dynamic(%A: tensor<?x?x?xf32>, %B: tensor<?x?x?xf32>) ->
|
||||
// CHECK: %[[C_ZERO:.*]] = linalg.fill ins(%[[C0_F32]] : f32) outs(%[[C_INIT]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP_INIT:.*]] = tensor.empty(%[[A_DIM1]]) : tensor<2x8x?xf32>
|
||||
// TRANSPOSE-A: %[[A_TRANSP:.*]] = linalg.transpose ins(%[[A]] : tensor<2x?x8xf32>) outs(%[[A_TRANSP_INIT]] : tensor<2x8x?xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-A: %[[B0:.*]] = linalg.batch_matmul_transpose_a ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x?xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
|
||||
// TRANSPOSE-A: %[[B0:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A_TRANSP]], %[[B]] : tensor<2x8x?xf32>, tensor<2x8x16xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP_INIT:.*]] = tensor.empty() : tensor<2x16x8xf32>
|
||||
// TRANSPOSE-B: %[[B_TRANSP:.*]] = linalg.transpose ins(%[[B]] : tensor<2x8x16xf32>) outs(%[[B_TRANSP_INIT]] : tensor<2x16x8xf32>) permutation = [0, 2, 1]
|
||||
// TRANSPOSE-B: %[[B0:.*]] = linalg.batch_matmul_transpose_b ins(%[[A]], %[[B_TRANSP]] : tensor<2x?x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
|
||||
// TRANSPOSE-B: %[[B0:.*]] = linalg.batch_matmul indexing_maps = [#[[$BMA]], #[[$BMB]], #[[$BMC]]] ins(%[[A]], %[[B_TRANSP]] : tensor<2x?x8xf32>, tensor<2x16x8xf32>) outs(%[[C_ZERO]] : tensor<2x?x16xf32>) -> tensor<2x?x16xf32>
|
||||
// CHECK: return %[[B0]] : tensor<2x?x16xf32>
|
||||
// CHECK: }
|
||||
func.func @batch_matmul_mixed(%A: tensor<2x?x8xf32>, %B: tensor<2x8x16xf32>) -> (tensor<2x?x16xf32>) {
|
||||
|
@ -9,7 +9,12 @@
|
||||
// RUN: FileCheck %s
|
||||
|
||||
func.func @matmul_transpose_a(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>, %C : tensor<?x?xf32>) {
|
||||
%res = linalg.matmul_transpose_a ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
|
||||
%res = linalg.matmul
|
||||
indexing_maps = [
|
||||
affine_map<(d0, d1, d2) -> (d2, d0)>,
|
||||
affine_map<(d0, d1, d2) -> (d2, d1)>,
|
||||
affine_map<(d0, d1, d2) -> (d0, d1)>]
|
||||
ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
|
||||
outs(%C: tensor<?x?xf32>) -> tensor<?x?xf32>
|
||||
%xf = tensor.cast %res : tensor<?x?xf32> to tensor<*xf32>
|
||||
call @printMemrefF32(%xf) : (tensor<*xf32>) -> ()
|
||||
@ -56,7 +61,7 @@ func.func @main() {
|
||||
|
||||
module attributes {transform.with_named_sequence} {
|
||||
transform.named_sequence @__transform_main(%module : !transform.any_op {transform.readonly}) {
|
||||
%matmul_transpose_a = transform.structured.match ops{["linalg.matmul_transpose_a"]} in %module
|
||||
%matmul_transpose_a = transform.structured.match ops{["linalg.matmul"]} in %module
|
||||
: (!transform.any_op) -> !transform.any_op
|
||||
|
||||
// Step 1: Tile for size [4] x [4], which corresponds to SVLs x SVLs, where
|
||||
|
@ -1,7 +1,7 @@
|
||||
# RUN: %PYTHON -m mlir.dialects.linalg.opdsl.dump_oplib .ops.core_named_ops | FileCheck %s
|
||||
|
||||
# Just verify that at least one known op is generated.
|
||||
# CHECK: name: matmul
|
||||
# CHECK: name: copy
|
||||
|
||||
# verify some special cases: negf->NegFOp, powf->PowFOp
|
||||
# CHECK cpp_class_name: NegFOp
|
||||
|
@ -4,7 +4,6 @@ module.exports = {
|
||||
linalg_dialect : $ => prec.right(choice(
|
||||
seq(choice(
|
||||
'linalg.batch_matmul',
|
||||
'linalg.batch_matmul_transpose_b',
|
||||
'linalg.batch_matvec',
|
||||
'linalg.batch_reduce_matmul', 'linalg.broadcast',
|
||||
'linalg.conv_1d_ncw_fcw', 'linalg.conv_1d_nwc_wcf',
|
||||
@ -27,7 +26,6 @@ module.exports = {
|
||||
'linalg.dot', 'linalg.elemwise_binary',
|
||||
'linalg.elemwise_unary', 'linalg.fill',
|
||||
'linalg.fill_rng_2d', 'linalg.matmul',
|
||||
'linalg.matmul_transpose_b',
|
||||
'linalg.matmul_unsigned', 'linalg.matvec',
|
||||
'linalg.mmt4d', 'linalg.pooling_nchw_max',
|
||||
'linalg.pooling_nchw_sum',
|
||||
|
@ -213,7 +213,6 @@
|
||||
"bufferization.to_tensor"
|
||||
|
||||
"linalg.batch_matmul"
|
||||
"linalg.batch_matmul_transpose_b"
|
||||
"linalg.batch_matvec"
|
||||
"linalg.batch_reduce_matmul"
|
||||
"linalg.broadcast"
|
||||
@ -244,7 +243,6 @@
|
||||
"linalg.fill"
|
||||
"linalg.fill_rng_2d"
|
||||
"linalg.matmul"
|
||||
"linalg.matmul_transpose_b"
|
||||
"linalg.matmul_unsigned"
|
||||
"linalg.matvec"
|
||||
"linalg.mmt4d"
|
||||
|
Loading…
x
Reference in New Issue
Block a user