Matthias Springer 39ec46bd83 [mlir][bufferize] Extract buffer hoisting into separate function
This improves the modularity of the bufferization.

From now on, all ops that do not implement BufferizableOpInterface are considered hoisting barriers. Previously, all ops that do not implement the interface were not considered barriers and such ops had to be marked as barriers explicitly. This was unsafe because we could've hoisted across unknown ops where it was not safe to hoist.

As a side effect, this allows for cleaning up AffineBufferizableOpInterfaceImpl. This build unit no longer needed and can be deleted.

Differential Revision: https://reviews.llvm.org/D121519
2022-03-15 21:25:03 +09:00

192 lines
7.7 KiB
MLIR

// RUN: mlir-opt -linalg-bufferize -canonicalize -cse -split-input-file %s | FileCheck %s
#map0 = affine_map<(d0) -> (d0)>
// In-depth checking of a basic case, this is testing
// - bufferization.to_memref / bufferization.to_tensor materializations are
// properly inserted
// - payload is correctly carried over
// - affine maps are correctly carried over
// Later tests will not check all these details.
// CHECK: #map = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @basic(
// CHECK-SAME: %[[TENSOR:.*]]: tensor<4xf32>) -> tensor<4xf32> {
// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<4xf32>
// CHECK-DAG: %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32>
// CHECK: linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
// CHECK-SAME: ins(%[[MEMREF]] : memref<4xf32>)
// CHECK-SAME: outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
// CHECK: ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32):
// CHECK: %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
// CHECK: linalg.yield %[[DIM1]] : f32
// CHECK: }
// CHECK: %[[RESULT:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<4xf32>
// CHECK: return %[[RESULT]] : tensor<4xf32>
func @basic(%arg0: tensor<4xf32>) -> tensor<4xf32> {
%0 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]
} ins(%arg0 : tensor<4xf32>)
outs(%arg0 : tensor<4xf32>) {
^bb0(%gen_arg1: f32, %out: f32):
%tmp1 = math.exp %gen_arg1 : f32
linalg.yield %tmp1 : f32
} -> tensor<4xf32>
return %0 : tensor<4xf32>
}
// -----
#map0 = affine_map<(d0) -> (d0)>
// Same as above but with linalg.init_tensor op.
// CHECK: #map = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @init_tensor(
// CHECK-SAME: %[[IN:.*]]: tensor<?xf32>, %[[SIZE:.*]]: index)
// CHECK-DAG: %[[MEMREF:.*]] = bufferization.to_memref %[[IN]] : memref<?xf32>
// CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[MEMREF]] : memref<?xf32>)
// CHECK-SAME: outs(%[[OUT_BUF]] : memref<?xf32>) {
func @init_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
%init = linalg.init_tensor [%size] : tensor<?xf32>
%0 = linalg.generic {
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]
} ins(%in : tensor<?xf32>)
outs(%init : tensor<?xf32>) {
^bb0(%gen_arg1: f32, %out: f32):
%tmp1 = math.exp %gen_arg1 : f32
linalg.yield %tmp1 : f32
} -> tensor<?xf32>
return %0 : tensor<?xf32>
}
// -----
#map0 = affine_map<(d0) -> (d0)>
// CHECK-LABEL: func @multiple_results
// CHECK: %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32>
// CHECK: %[[RESULT0:.*]] = memref.alloc() {{.*}} : memref<4xf32>
// CHECK: linalg.generic
// CHECK-SAME: ins(%{{.*}} : memref<4xf32>)
// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
// CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
%0, %1 = linalg.generic {
indexing_maps = [#map0, #map0, #map0],
iterator_types = ["parallel"]
} ins(%arg0 : tensor<4xf32>)
outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) {
^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
%tmp1 = math.exp %gen_arg1 : f32
linalg.yield %tmp1, %tmp1 : f32, f32
} -> (tensor<4xf32>, tensor<4xf32>)
return %0, %1 : tensor<4xf32>, tensor<4xf32>
}
// -----
#map_2d = affine_map<(d0, d1) -> (d0, d1)>
// Check that the allocs properly consider the different shapes of the output
// operands. The permuted indexing maps translate to different output shapes.
// CHECK-LABEL: func @dynamic_results(
// CHECK-SAME: %[[ARG:.*]]: tensor<?x?xf32>
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK: %[[DIM0:.*]] = tensor.dim %[[ARG]], %[[C0]] : tensor<?x?xf32>
// CHECK: %[[DIM1:.*]] = tensor.dim %[[ARG]], %[[C1]] : tensor<?x?xf32>
// CHECK: %[[RESULT1:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
// CHECK: %[[RESULT0:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {{.*}} : memref<?x?xf32>
// CHECK: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref<?x?xf32>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
func @dynamic_results(%arg0: tensor<?x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>) {
%0, %1 = linalg.generic {
indexing_maps = [#map_2d, #map_2d, #map_2d],
iterator_types = ["parallel", "parallel"]
} ins(%arg0 : tensor<?x?xf32>)
outs (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
%tmp1 = math.exp %gen_arg1 : f32
linalg.yield %tmp1, %tmp1 : f32, f32
} -> (tensor<?x?xf32>, tensor<?x?xf32>)
return %0, %1 : tensor<?x?xf32>, tensor<?x?xf32>
}
// -----
#accesses = [
affine_map<(i, j, k) -> (j, i, k)>,
affine_map<(i, j, k) -> (i, j)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel", "parallel", "reduction"]
}
// Check the bufferization of init tensors.
// CHECK-LABEL: func @generic_with_init_tensor(
// CHECK-SAME: %[[ARG0_TENSOR:.*]]: tensor<2x3x4xvector<3x4xi4>>,
// CHECK-SAME: %[[ARG1_TENSOR:.*]]: tensor<3x2xf32>) -> tensor<3x2xf32> {
// CHECK-DAG: %[[INIT_BUFFER:.*]] = memref.alloc() {{.*}} : memref<3x2xf32>
// CHECK-DAG: %[[ARG0_MEMREF:.*]] = bufferization.to_memref %[[ARG0_TENSOR]] : memref<2x3x4xvector<3x4xi4>>
// CHECK-DAG: %[[ARG1_MEMREF:.*]] = bufferization.to_memref %[[ARG1_TENSOR]] : memref<3x2xf32>
// CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32>
// CHECK: linalg.generic
// CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
// CHECK-SAME: outs(%[[INIT_BUFFER]] : memref<3x2xf32>) {
func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
%arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
%0 = linalg.generic #trait
ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
outs(%arg1 : tensor<3x2xf32>) {
^bb(%v0: vector<3x4xi4>, %v1: f32) :
linalg.yield %v1 : f32
} -> tensor<3x2xf32>
return %0 : tensor<3x2xf32>
}
// -----
// CHECK-LABEL: func @bufferize_fill(
// CHECK-SAME: %[[IN:.*]]: tensor<?xf32>
func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
%c0 = arith.constant 0.0 : f32
// CHECK: %[[ALLOC:.*]] = memref.alloc
// CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>)
// CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32>
// CHECK: return %[[TENSOR]]
%0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
return %0 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @bufferize_dot
func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
%dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>)
outs(%out : tensor<f32>) -> tensor<f32>
return %dot : tensor<f32>
// CHECK: %[[ALLOC:.*]] = memref.alloc
// TODO: The copy is not necessary.
// CHECK: memref.copy {{.*}}, %[[ALLOC]]
// CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
// CHECK-SAME: outs(%[[ALLOC:.*]] : memref<f32>)
// CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32>
// CHECK: return %[[OUT_TENSOR]]
}