Rename and restructure tiling-related transform ops from the structured extension to be more homogeneous. In particular, all ops now follow a consistent naming scheme: - `transform.structured.tile_using_for`; - `transform.structured.tile_using_forall`; - `transform.structured.tile_reduction_using_for`; - `transform.structured.tile_reduction_using_forall`. This drops the "_op" naming artifact from `tile_to_forall_op` that shouldn't have been included in the first place, consistently specifies the name of the control flow op to be produced for loops (instead of `tile_reduction_using_scf` since `scf.forall` also belongs to `scf`), and opts for the `using` connector to avoid ambiguity. The loops produced by tiling are now systematically placed as *trailing* results of the transform op. While this required changing 3 out of 4 ops (except for `tile_using_for`), this is the only choice that makes sense when producing multiple `scf.for` ops that can be associated with a variadic number of handles. This choice is also most consistent with *other* transform ops from the structured extension, in particular with fusion ops, that produce the structured op as the leading result and the loop as the trailing result.
41 lines
2.2 KiB
MLIR
41 lines
2.2 KiB
MLIR
// RUN: mlir-opt %s --test-transform-dialect-interpreter -test-transform-dialect-erase-schedule --test-lower-to-llvm --split-input-file | FileCheck %s
|
|
|
|
// CHECK-LABEL: llvm.func @matmul_tensors
|
|
func.func @matmul_tensors(
|
|
%arg0: tensor<2x4xf32>, %arg1: tensor<4x6xf32>, %arg2: tensor<2x6xf32>)
|
|
-> tensor<2x6xf32> {
|
|
// CHECK-NOT: linalg
|
|
// CHECK: llvm.intr.fmuladd{{.*}}
|
|
%0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>)
|
|
outs(%arg2: tensor<2x6xf32>)
|
|
-> tensor<2x6xf32>
|
|
return %0 : tensor<2x6xf32>
|
|
}
|
|
|
|
transform.sequence failures(propagate) {
|
|
^bb1(%module_op: !transform.any_op):
|
|
%0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!transform.any_op) -> !transform.any_op
|
|
%1, %loops:3 = transform.structured.tile_using_for %0 [2, 2, 2] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
|
|
%2 = get_parent_op %1 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
|
|
transform.structured.vectorize_children_and_apply_patterns %2 : (!transform.any_op) -> !transform.any_op
|
|
%b = transform.bufferization.one_shot_bufferize layout{IdentityLayoutMap}
|
|
%module_op {bufferize_function_boundaries = true}
|
|
: (!transform.any_op) -> !transform.any_op
|
|
|
|
%f = transform.structured.match ops{["func.func"]} in %b
|
|
: (!transform.any_op) -> !transform.any_op
|
|
|
|
// TODO: group these lower-level controls into various properly named vector
|
|
// lowering TD macros.
|
|
transform.apply_patterns to %f {
|
|
transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct"
|
|
transform.apply_patterns.vector.transfer_permutation_patterns
|
|
transform.apply_patterns.vector.lower_multi_reduction lowering_strategy = "innerparallel"
|
|
transform.apply_patterns.vector.split_transfer_full_partial split_transfer_strategy = "linalg-copy"
|
|
transform.apply_patterns.vector.transfer_to_scf max_transfer_rank = 1 full_unroll = true
|
|
transform.apply_patterns.vector.lower_transfer max_transfer_rank = 1
|
|
transform.apply_patterns.vector.lower_shape_cast
|
|
transform.apply_patterns.vector.lower_transpose lowering_strategy = "shuffle_1d"
|
|
} : !transform.any_op
|
|
}
|