llvm-project/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
River Riddle 3655069234 [mlir] Move the Builtin FuncOp to the Func dialect
This commit moves FuncOp out of the builtin dialect, and into the Func
dialect. This move has been planned in some capacity from the moment
we made FuncOp an operation (years ago). This commit handles the
functional aspects of the move, but various aspects are left untouched
to ease migration: func::FuncOp is re-exported into mlir to reduce
the actual API churn, the assembly format still accepts the unqualified
`func`. These temporary measures will remain for a little while to
simplify migration before being removed.

Differential Revision: https://reviews.llvm.org/D121266
2022-03-16 17:07:03 -07:00

1789 lines
71 KiB
MLIR

// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
// Run fuzzer with different seeds.
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
//===----------------------------------------------------------------------===//
// Simple cases
//===----------------------------------------------------------------------===//
// -----
// CHECK-LABEL: func @extract_slice_fun(
func @extract_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<4xf32>, tensor<8xf32>)
{
// tensor.extract_slice is not used in a write, it is not compelled to
// bufferize out of place. Let callers decide whether they want to create
// aliasing subviews at all call sites or whether they allocate.
// This is true irrespective of whether the function argument is inplaceable.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32>
return %r0, %r1: tensor<4xf32>, tensor<8xf32>
}
// -----
// CHECK-LABEL: func @insert_slice_fun(
func @insert_slice_fun(%A : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read-write"
%C : tensor<4xf32> {linalg.inplaceable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<?xf32>, tensor<?xf32>)
{
// must bufferize out of place.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
%r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
// bufferizes inplace.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
%r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, 1]
return %r0, %r1: tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @conflict_on_B(
func @conflict_on_B(%A : tensor<4x4xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<4x4xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "read-write"
-> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>)
{
// matmul output operand interferes with input operand.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
%C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%B: tensor<4x4xf32>)
-> tensor<4x4xf32>
// matmul output operand interferes with input operand.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
%D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%B: tensor<4x4xf32>)
-> tensor<4x4xf32>
// matmul output operand does not interferes with input operand.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%B: tensor<4x4xf32>)
-> tensor<4x4xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1]
return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>
}
//===----------------------------------------------------------------------===//
// Length-1 producer-consumer cases.
//===----------------------------------------------------------------------===//
// -----
// CHECK-LABEL: func @extract_slice_extract_slice(
func @extract_slice_extract_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<2xf32>, tensor<2xf32>)
{
// tensor.extract_slice is not used in a write, it is not compelled to
// bufferize out of place. Let callers decide whether they want to create
// aliasing subviews at all call sites or whether they allocate.
// This is true irrespective of whether the function argument is inplaceable.
// CHECK: {__inplace_operands_attr__ = ["true"]}
%r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
// CHECK: {__inplace_operands_attr__ = ["true"]}
%r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32>
return %r1, %r3: tensor<2xf32>, tensor<2xf32>
}
// -----
// CHECK-LABEL: func @insert_slice_insert_slice(
func @insert_slice_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read-write"
%A2 : tensor<4xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read-write"
%A3 : tensor<2xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%B : tensor<?xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B2 : tensor<4xf32> {linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "read"
%B3 : tensor<2xf32> {linalg.inplaceable = false})
// CHECK-SAME: bufferization.access = "read"
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK: {__inplace_operands_attr__ = ["true", "true"]}
%r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "true"]}
%r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "false"]}
%r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32>
// CHECK: {__inplace_operands_attr__ = ["true", "false"]}
%r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %r1, %r3: tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice
func @extract_slice_nonmatching_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false},
%idx: index)
-> (tensor<?xf32>, tensor<?xf32>)
{
// %r1 bufferizes inplace because %A is inplaceable.
// %r0 is an overlapping tensor.extract_slice that does not match, it must be
// out of place.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
%r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
// %r1 can bufferize inplace fine.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
%r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
// %r3 does bufferizes inplace because %B is not inplaceable.
// %r0 is an overlapping tensor.extract_slice that does not match, but does
// not alias with the buffer coming from %r3 so it can actually bufferize
// inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
// %r3 cannot bufferize inplace since %B is not inplaceable.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]}
%r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %r1, %r3: tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @extract_slice_matching_insert_slice
func @extract_slice_matching_insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = false})
-> (tensor<?xf32>, tensor<?xf32>)
{
// %r1 bufferizes inplace because %A is inplaceable.
// %r0 is a tensor.extract_slice that matches, it can also be bufferized
// inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
%r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
// %r2 is a tensor.extract_slice that matches %r3, it can be bufferized
// inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32>
// tensor.insert_slice cannot bufferize inplace.
// This should have been captured by a canonicalization pattern and it would
// be unproductive to have special logic in bufferization to encode matching
// insert_slice(extract_slice(A), A).
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]}
%r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %r1, %r3: tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: @read_of_matching_insert_slice_source
func @read_of_matching_insert_slice_source(
%A : tensor<?xf32> {linalg.inplaceable = true},
%idx : index,
%idx2 : index)
-> (tensor<?xf32>, vector<5xf32>)
{
%cst = arith.constant 0.0 : f32
%cst2 = arith.constant 1.0 : f32
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
%0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
%2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
%3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %2, %3 : tensor<?xf32>, vector<5xf32>
}
// -----
// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved
func @read_of_matching_insert_slice_source_interleaved(
%A : tensor<?xf32> {linalg.inplaceable = true},
%idx : index,
%idx2 : index,
%idx3 : index)
-> (tensor<?xf32>, vector<5xf32>)
{
%cst = arith.constant 0.0 : f32
%cst2 = arith.constant 1.0 : f32
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
%0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
%2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
%4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32>
%3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
%6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %6, %3 : tensor<?xf32>, vector<5xf32>
}
// -----
// CHECK-LABEL: func @extract_slice_linalg_readonly_use
func @extract_slice_linalg_readonly_use(
%A : tensor<?x?xf32> {linalg.inplaceable = false},
%B : tensor<4x4xf32> {linalg.inplaceable = false},
%C : tensor<4x4xf32> {linalg.inplaceable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// tensor.extract_slice is only used as a read, no interference irrespective
// of user's inplace status.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
// matmul output operand is not inplaceable at the function boundary.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
%D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%B: tensor<4x4xf32>)
-> tensor<4x4xf32>
// matmul output operand is inplaceable at the function boundary.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%C: tensor<4x4xf32>)
-> tensor<4x4xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, 2]
return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
}
// -----
// CHECK-LABEL: func @extract_slice_to_linalg_write_use
func @extract_slice_to_linalg_write_use(
%A : tensor<4x4xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = false},
%C : tensor<?x?xf32> {linalg.inplaceable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// Step 4. %sB forward propagates to a write in %D but it is not inplace.
// So this is only ever read and can bufferize inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
// Step 3. %sB has a read interference in %E, it does not bufferize inplace.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
%D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%sB: tensor<4x4xf32>)
-> tensor<4x4xf32>
// Step 2. %sC forward propagates to an inplace write in %E.
// %sC backward propagates to %C which is inplaceable.
// As a consequence this is bufferized inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
// Step 1. %sC backprops to the tensor.extract_slice producer which is not
// considered an interference. This bufferizes inplace.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%sC: tensor<4x4xf32>)
-> tensor<4x4xf32>
return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
}
// -----
// CHECK-LABEL: func @insert_slice_double_extract_slice
func @insert_slice_double_extract_slice(
%s1: index,
%s2: index,
%s3: index,
%s4: index,
%A: tensor<8x6xf32> {linalg.inplaceable = false},
%B: tensor<6x6xf32> {linalg.inplaceable = false},
%C: tensor<30x20xf32> {linalg.inplaceable = true})
-> tensor<30x20xf32>
{
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]}
%15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32>
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
%19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]}
%20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [6]
return %20 : tensor<30x20xf32>
}
//===----------------------------------------------------------------------===//
// Transitive cases
//===----------------------------------------------------------------------===//
// -----
// CHECK-LABEL: func @extract_slice_to_linalg_write_use
func @extract_slice_to_linalg_write_use(
%A : tensor<4x4xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = false},
%C : tensor<?x?xf32> {linalg.inplaceable = true})
-> (tensor<4x4xf32>, tensor<4x4xf32>)
{
// Step 4. %sB forward propagates to an inplace write in %D.
// %sB backward propagates to %B which is not inplaceable.
// As a consequence this is bufferized out of place.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
%sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
// Step 3. %sB backprops to the tensor.extract_slice producer which is not
// considered an interference. This bufferizes inplace.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
outs(%sB: tensor<4x4xf32>)
-> tensor<4x4xf32>
// Step 2. %sC forward propagates to an inplace write in %E.
// %sC backward propagates to %C which is inplaceable.
// As a consequence this is bufferized inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
%sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
// Step 1. %sC backprops to the tensor.extract_slice producer which is not
// considered an interference. This bufferizes inplace.
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
outs(%sC: tensor<4x4xf32>)
-> tensor<4x4xf32>
return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
}
// -----
// CHECK-LABEL: func @nested_extract_slice_and_insert
func @nested_extract_slice_and_insert(
%A : tensor<?x?xf32> {linalg.inplaceable = false},
%B : tensor<?x?xf32> {linalg.inplaceable = true},
%C : tensor<?x?xf32> {linalg.inplaceable = true},
%idx : index,
%sz1 : index,
%sz2 : index)
-> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
{
%f0 = arith.constant 0.0 : f32
// 2-level matching tensor.extract_slice / tensor.insert_slice into non
// inplaceable %A.
// - %rA is not inplaceable because %A is not inplaceable at function boundary.
// - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable
// - this propagates to %FA and %ssA being inplaceable.
// - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not
// inplaceable and so %sA is not inplaceable.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK-NEXT: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]}
%sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
%FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
%rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
%rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
// 3-level matching tensor.extract_slice / tensor.insert_slice into
// inplaceable %B.
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK-NEXT: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
%sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
%sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
%FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
%rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
%rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
%rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
// 2-level matching tensor.extract_slice / tensor.insert_slice into
// inplaceable %C with a twist.
// Throw a wrench in the system: %rsC production sizes do not match %ssC.
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
// The tensor.insert_slice that would be candidate for matching does not actually
// match. That tensor.insert_slice can still be bufferized inplace nonetheless
// but this tensor.extract_slice, which bufferizes to an inplace write, cannot.
// CHECK-NEXT: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]}
// CHECK-NEXT: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
%sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
%FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
%rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32>
%rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2]
return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
}
//===----------------------------------------------------------------------===//
// Simple loop cases
//===----------------------------------------------------------------------===//
// -----
// CHECK-LABEL: func @scf_for_yield_only
func @scf_for_yield_only(
%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%lb : index,
%ub : index,
%step : index)
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK: scf.for
// CHECK-NEXT: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
%r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
scf.yield %t : tensor<?xf32>
}
// CHECK: scf.for
// CHECK-NEXT: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
%r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %B) -> (tensor<?xf32>) {
scf.yield %t : tensor<?xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, 1]
return %r0, %r1: tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
func @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {linalg.inplaceable = false},
%B : tensor<?xf32> {linalg.inplaceable = true},
%C : tensor<4xf32> {linalg.inplaceable = false},
%lb : index,
%ub : index,
%step : index)
-> (tensor<?xf32>, tensor<?xf32>)
{
// CHECK: scf.for
// scf.for bbArgs are always inplaceable seen from ops inside the body:
// 1. Either the matching tensor is not inplaceable and an alloc occurs
// which makes bbArg inplaceable.
// 2. Or it is already inplaceable and so is bbArg.
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
// CHECK-NEXT: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
// CHECK-NEXT: scf.yield {__inplace_operands_attr__ = ["true", "true"]}
// CHECK-NEXT: } {__inplace_operands_attr__ = ["none", "none", "none", "false", "true"]}
%r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
-> (tensor<?xf32>, tensor<?xf32>)
{
%ttA = tensor.insert_slice %C into %tA[0][4][1] : tensor<4xf32> into tensor<?xf32>
%ttB = tensor.insert_slice %C into %tB[0][4][1] : tensor<4xf32> into tensor<?xf32>
scf.yield %ttA, %ttB : tensor<?xf32>, tensor<?xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [-1, 1]
return %r0#0, %r0#1: tensor<?xf32>, tensor<?xf32>
}
// -----
func private @some_use(tensor<?xf32>) -> ()
// CHECK-LABEL: func @scf_for_deps
func @scf_for_deps(
%A : tensor<?xf32> {linalg.inplaceable = true},
%B : tensor<?xf32> {linalg.inplaceable = true},
%lb : index,
%ub : index,
%step : index)
-> (tensor<?xf32>)
{
// %r0 must be out of place because one use of %t in the subsequent production
// of %r1 is read.
// CHECK: scf.for
// CHECK-NEXT: call
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
// CHECK-NEXT: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "false"]}
%r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
call @some_use(%t) : (tensor<?xf32>) -> ()
scf.yield %t : tensor<?xf32>
}
// %r1 bufferizes inplace fine.
// CHECK: scf.for
// CHECK-NEXT: call
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]}
// CHECK-NEXT: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]}
%r1 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) {
call @some_use(%t) : (tensor<?xf32>) -> ()
scf.yield %t : tensor<?xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r1: tensor<?xf32>
}
// -----
//===----------------------------------------------------------------------===//
// Cross function boundary cases.
//===----------------------------------------------------------------------===//
func private @foo(tensor<64xf32>)
// CHECK-LABEL: dependence_through_call
func @dependence_through_call(%I : tensor<64xf32> {linalg.inplaceable = true}) {
%f1 = arith.constant 1.000000e+00 : f32
%f2 = arith.constant 2.000000e+00 : f32
// 2. %B already bufferizes inplace, %A would alias and have a different
// value. The calls to `foo` are determined to read conservatively, so %A
// cannot bufferize inplace.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
%A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
// 1. Bufferizes inplace: no alias to %A is yet possible.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
call @foo(%A) : (tensor<64xf32>) -> ()
call @foo(%B) : (tensor<64xf32>) -> ()
return
}
// -----
func private @foo(tensor<64xf32>)
func private @bar(%A : tensor<64xf32>) {
call @foo(%A) : (tensor<64xf32>) -> ()
return
}
func @read_dependence_through_scf_and_call(
%I : tensor<64xf32> {linalg.inplaceable = true},
%I2 : tensor<64xf32> {linalg.inplaceable = true}) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
%f1 = arith.constant 1.000000e+00 : f32
%f2 = arith.constant 2.000000e+00 : f32
// 5. %B bufferizes inplace, %A would alias and have a different value.
// The calls to `foo` are determined to read conservatively, so %A cannot
// bufferize inplace.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
%A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
// 4. Bufferizes inplace: no alias to %A is yet possible.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
// 3. Does not read or write, bufferizes inplace.
// CHECK: scf.for
// CHECK-NEXT: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]}
%r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B)
-> (tensor<64xf32>, tensor<64xf32>)
{
scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32>
}
call @foo(%r#0) : (tensor<64xf32>) -> ()
call @foo(%r#1) : (tensor<64xf32>) -> ()
// 2. %B2 already bufferizes inplace, %A2 would alias and have a different
// value. The calls to `foo` are determined to read conservatively, so %A2
// cannot bufferize inplace.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
%A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
// 1. Bufferizes inplace: no alias to %A2 is yet possible.
// CHECK: fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
call @bar(%A2) : (tensor<64xf32>) -> ()
call @bar(%B2) : (tensor<64xf32>) -> ()
return
}
// -----
//===----------------------------------------------------------------------===//
// Transitive cases through extract_slice.
//===----------------------------------------------------------------------===//
// CHECK-LABEL: func @write_into_constant_via_alias
func @write_into_constant_via_alias(%v : vector<5xi32>,
%s1 : index, %s2 : index,
%s3 : index) -> tensor<?xi32> {
%A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]}
%b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32>
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
%r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32>
return %r : tensor<?xi32>
}
// -----
func.func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
%7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
%8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
%11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
%sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
%r = linalg.matmul
ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [2]
return %r : tensor<256x256xf32>
}
// -----
func.func @matmul_on_tensors(
%arg0: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg1: tensor<518x518xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
%arg2: tensor<256x256xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
-> tensor<256x256xf32>
{
%c0 = arith.constant 0 : index
%cst_0 = arith.constant 0.000000e+00 : f32
%cst_1 = arith.constant 1.000000e+00 : f32
%7 = linalg.init_tensor [256, 256] : tensor<256x256xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
%9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
%10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
%12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
%13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
// CHECK: linalg.matmul
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
%sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32>
%sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
%r = linalg.matmul
ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [2]
return %r : tensor<256x256xf32>
}
// -----
//===----------------------------------------------------------------------===//
// Chain of tensor.insert_slice is better traversed in reverse order without
// prioritizing the tensor.insert_slice ops.
//===----------------------------------------------------------------------===//
// CHECK-LABEL: func @insert_slice_chain(
func @insert_slice_chain(
%v1: vector<32x90xf32>,
%v2: vector<30x90xf32>,
%arg0: tensor<62x126xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "none"
%arg1: tensor<126x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
// CHECK-SAME: bufferization.access = "none"
%arg2: tensor<62x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "write"
-> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
{
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
// CHECK: linalg.fill
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
%0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [4]
return %15 : tensor<62x90xf32>
}
// -----
//===----------------------------------------------------------------------===//
// Insert point issue cases.
//===----------------------------------------------------------------------===//
// Only test IR validity wrt dominance.
// CHECK-LABEL: func @ip
func @ip(%t: tensor<10x20xf32> {linalg.inplaceable = true},
%x: index, %y: index, %v: vector<5x6xf32>)
-> tensor<10x20xf32>
{
%c0 = arith.constant 0 : index
%c256 = arith.constant 256 : index
%c257 = arith.constant 257 : index
%r = scf.for %arg0 = %c0 to %c257 step %c256 iter_args(%arg1 = %t) -> (tensor<10x20xf32>) {
%t1 = tensor.extract_slice %arg1[%x, 0] [5, %y] [1, 1] : tensor<10x20xf32> to tensor<5x?xf32>
%t11 = tensor.extract_slice %t1[0, 0] [5, %y] [1, 1] : tensor<5x?xf32> to tensor<5x?xf32>
%t2 = vector.transfer_write %v, %t11[%c0, %c0] : vector<5x6xf32>, tensor<5x?xf32>
%t3 = tensor.insert_slice %t2 into %arg1[%x, 0] [5, %y] [1, 1] : tensor<5x?xf32> into tensor<10x20xf32>
scf.yield %t3 : tensor<10x20xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r : tensor<10x20xf32>
}
// -----
#accesses = [
affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel"]
}
// CHECK-LABEL: func @linalg_op_same_out_tensors(
func @linalg_op_same_out_tensors(
%t1: tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%t2: tensor<?xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>){
// CHECK: linalg.generic
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]
%o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
^bb(%0: f32, %1: f32, %2 : f32) :
linalg.yield %0, %0 : f32, f32
} -> (tensor<?xf32>, tensor<?xf32>)
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [1, -1]
return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
}
// -----
#accesses = [
affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>,
affine_map<(i) -> (i)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel"]
}
// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
func @linalg_op_same_out_tensors_2(
%t1: tensor<?xf32> {linalg.inplaceable = true},
// CHECK-SAME: bufferization.access = "read"
%t2: tensor<?xf32> {linalg.inplaceable = true})
// CHECK-SAME: bufferization.access = "write"
-> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
// CHECK: linalg.generic
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"]
%o:3 = linalg.generic #trait
ins(%t1 : tensor<?xf32>)
outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
linalg.yield %0, %0, %0 : f32, f32, f32
} -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [1, -1, -1]
return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @double_insert_slice_into_alias
func @double_insert_slice_into_alias(
%v1: vector<32x90xf32>,
%v2: vector<30x90xf32>,
%arg2: tensor<62x90xf32> {linalg.inplaceable = true},
%s1: index, %s2: index, %s3: index, %s4: index)
-> (tensor<62x90xf32>, tensor<?x?xf32>)
{
%c0 = arith.constant 0 : index
// Cannot bufferize inplace this extract_slice because both operand and result
// are modified and returned separately.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"]
%e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32>
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
%14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [2, -1]
return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32>
}
// -----
// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1
func @interleaved_extract_insert_slice_chain_1(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
// TODO: This should bufferize inplace once we have a proper range analysis.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]
%10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %15 : tensor<62x90xf32>
}
// -----
// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2
func @interleaved_extract_insert_slice_chain_2(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]
%2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
// The slices are overlapping, so this can never bufferize inplace.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]
%10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %15 : tensor<62x90xf32>
}
// -----
// CHECK-LABEL: func @extract_once_insert_twice
func @extract_once_insert_twice(
%arg2: tensor<62x90xf32> {linalg.inplaceable = true})
-> (tensor<62x90xf32>)
{
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false"]
%2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]
%15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %15 : tensor<62x90xf32>
}
// -----
#accesses = [
affine_map<(i) -> (i)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel"]
}
// CHECK-LABEL: func @reading_scf_for
func @reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
// Write to %t1.
// CHECK: vector.transfer_write
// CHECK-SAME: __inplace_operands_attr__ = ["none", "false", "none"]
%t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
// Read the old value of %t1 inside the loop via an alias.
// CHECK: scf.for {{.*}} {
%r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: __inplace_operands_attr__ = ["true", "none", "none"]
%e = tensor.extract_slice %t2[%s][%s][1] : tensor<?xf32> to tensor<?xf32>
// Read from %t1 via alias %e.
%v2 = vector.transfer_read %e[%s], %cst : tensor<?xf32>, vector<5xf32>
scf.yield %t2, %v2 : tensor<?xf32>, vector<5xf32>
}
// CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "none"]}
// Use %t3 in some way without reading it, so that it does not get DCE'd.
// CHECK: linalg.generic
// CHECK-SAME: __inplace_operands_attr__ = ["true"]
%o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
^bb(%0: f32) :
linalg.yield %cst : f32
} -> (tensor<?xf32>)
return %o, %v3 : tensor<?xf32>, vector<5xf32>
}
// -----
#accesses = [
affine_map<(i) -> (i)>
]
#trait = {
indexing_maps = #accesses,
iterator_types = ["parallel"]
}
// CHECK-LABEL: func @non_reading_scf_for
func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%s: index, %v: vector<5xf32>) -> (tensor<?xf32>, vector<5xf32>) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%cst = arith.constant 0.0 : f32
// Write to %t1.
// CHECK: vector.transfer_write
// CHECK-SAME: __inplace_operands_attr__ = ["none", "true", "none"]
%t3 = vector.transfer_write %v, %t1[%s] : vector<5xf32>, tensor<?xf32>
// This loop does not read from %t1. It only writes to it.
// CHECK: scf.for
%r, %v3 = scf.for %i = %c0 to %s step %c1 iter_args(%t2 = %t1, %v0 = %v) -> (tensor<?xf32>, vector<5xf32>) {
// Write to %t1 via %t2. (Overwrite %t3.)
// CHECK: linalg.generic
// CHECK-SAME: __inplace_operands_attr__ = ["true"]
%o2 = linalg.generic #trait outs (%t2 : tensor<?xf32>) {
^bb(%0: f32) :
linalg.yield %cst : f32
} -> (tensor<?xf32>)
// Read overwritten value. This is not a read of %t1.
%v2 = vector.transfer_read %o2[%s], %cst : tensor<?xf32>, vector<5xf32>
scf.yield %o2, %v2 : tensor<?xf32>, vector<5xf32>
}
// Use %t3 in some way without reading it, so that it does not get DCE'd.
// CHECK: linalg.generic
// CHECK-SAME: __inplace_operands_attr__ = ["true"]
%o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
^bb(%0: f32) :
linalg.yield %cst : f32
} -> (tensor<?xf32>)
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %o, %v3 : tensor<?xf32>, vector<5xf32>
}
// -----
//===----------------------------------------------------------------------===//
// scf.if cases
//===----------------------------------------------------------------------===//
// This example passes analysis, but it fails when bufferizing.
// CHECK-LABEL: func @scf_if_inplace1
func @scf_if_inplace1(%t1: tensor<?xf32> {linalg.inplaceable = true},
%t2: tensor<?xf32> {linalg.inplaceable = true},
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
}
return %r : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_inplace2
func @scf_if_inplace2(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
scf.yield %t2 : tensor<?xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_inplace3
func @scf_if_inplace3(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t2 = vector.transfer_write %v1, %e[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
} else {
// Writing the same tensor through an alias. This is OK.
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t3 : tensor<?xf32>
}
return %r : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_in_place4
func @scf_if_in_place4(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
}
%r_alias = scf.if %cond2 -> (tensor<?xf32>) {
// Reading %r is OK. No conflict.
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %r : tensor<?xf32>
} else {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %r : tensor<?xf32>
}
%v2 = vector.transfer_read %r_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0, -1]
return %r_alias, %v2 : tensor<?xf32>, vector<10xf32>
}
// -----
// CHECK-LABEL: func @scf_if_inplace5
func @scf_if_inplace5(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %e : tensor<?xf32>
} else {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
%f = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %f : tensor<?xf32>
}
// Inserting into an equivalent tensor at the same offset. This bufferizes
// inplace.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
%r2 = tensor.insert_slice %r into %t1[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r2 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_inplace6
func @scf_if_inplace6(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>,
%v3: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> tensor<?xf32> {
// Test nested scf.if ops.
%r = scf.if %cond -> (tensor<?xf32>) {
%t2 = scf.if %cond2 -> (tensor<?xf32>) {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t3 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t3 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t4 = vector.transfer_write %v3, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t4 : tensor<?xf32>
}
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t3 : tensor<?xf32>
}
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_inplace7
func @scf_if_inplace7(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v1: vector<5xf32>, %v2: vector<5xf32>, %idx: index,
%idx2: index, %cond: i1) -> (tensor<?xf32>, vector<5xf32>) {
%cst = arith.constant 0.0 : f32
%r, %v_r2 = scf.if %cond -> (tensor<?xf32>, vector<5xf32>) {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%t2 = vector.transfer_write %v1, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
scf.yield %t2, %v1 : tensor<?xf32>, vector<5xf32>
} else {
// Writing the same tensor through an alias.
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
%t3 = vector.transfer_write %v2, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// Read the original value of %t1. This requires the write in this branch
// to be out-of-place. But the write in the other branch can still be
// inplace.
%v_r = vector.transfer_read %t1[%idx2], %cst : tensor<?xf32>, vector<5xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
scf.yield %t3, %v_r : tensor<?xf32>, vector<5xf32>
}
return %r, %v_r2 : tensor<?xf32>, vector<5xf32>
}
// -----
// CHECK-LABEL: func @scf_if_out_of_place1a
func @scf_if_out_of_place1a(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %e : tensor<?xf32>
} else {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
}
// Reading from and writing to the same tensor via different args. This is a
// conflict.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]
%r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
return %r2 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_out_of_place1b
func @scf_if_out_of_place1b(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index, %idx3: index,
%cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %e : tensor<?xf32>
} else {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
%f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %f : tensor<?xf32>
}
// Reading from and writing to the same tensor via different args. This is a
// conflict. In contrast to scf_if_out_of_place1a, the fact that %r aliases
// with %t1 is only detected when analyzing the tensor.extract_slices. That's
// why the tensor.insert_slice is inplace and the two extract_slices are
// out-of-place.
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
%r2 = tensor.insert_slice %r into %t1[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r2 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_out_of_place1c
func @scf_if_out_of_place1c(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx: index, %idx2: index, %cond: i1) -> tensor<?xf32> {
%r = scf.if %cond -> (tensor<?xf32>) {
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
%e = tensor.extract_slice %t1[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %e : tensor<?xf32>
} else {
// TODO: This one could bufferize inplace, but the analysis is too restrictive.
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]
%f = tensor.extract_slice %t1[%idx2][%idx2][1] : tensor<?xf32> to tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %f : tensor<?xf32>
}
// CHECK: tensor.insert_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]
%r2 = tensor.insert_slice %r into %t1[%idx2][%idx2][1] : tensor<?xf32> into tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %r2 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @scf_if_out_of_place2
func @scf_if_out_of_place2(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
%t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
}
// Read the old value of %t1. Forces the transfer_write to bufferize
// out-of-place.
%v2 = vector.transfer_read %t1[%idx], %cst : tensor<?xf32>, vector<10xf32>
return %r, %v2 : tensor<?xf32>, vector<10xf32>
}
// -----
// CHECK-LABEL: func @scf_if_out_of_place3
func @scf_if_out_of_place3(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1, %cond2: i1) -> (tensor<?xf32>, vector<10xf32>) {
%cst = arith.constant 0.0 : f32
%r = scf.if %cond -> (tensor<?xf32>) {
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
%t2 = vector.transfer_write %v, %t1[%idx] : vector<5xf32>, tensor<?xf32>
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t2 : tensor<?xf32>
}
%t1_alias = scf.if %cond2 -> (tensor<?xf32>) {
// scf.yield bufferizes to a read. That is a conflict in this example.
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
} else {
// CHECK: scf.yield
// CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
scf.yield %t1 : tensor<?xf32>
}
%v2 = vector.transfer_read %t1_alias[%idx], %cst : tensor<?xf32>, vector<10xf32>
return %r, %v2 : tensor<?xf32>, vector<10xf32>
}
// -----
// CHECK-LABEL: func @some_use
func @some_use(%A : tensor<?xf32> {linalg.inplaceable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
%idx = arith.constant 0 : index
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]
%0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32>
return %0 : tensor<?xf32>
}
// CHECK-LABEL: func @main_func
func @main_func(%A : tensor<?xf32> {linalg.inplaceable = true},
%v : vector<5xf32>) -> (tensor<?xf32>) {
// CHECK: call
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
%0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>)
return %0 : tensor<?xf32>
}
// -----
// CHECK-LABEL: func @to_tensor_op_not_writable
func @to_tensor_op_not_writable(%m: memref<?xf32>, %v: vector<5xf32>,
%idx1: index, %idx2: index)
-> vector<10xf32> {
%0 = bufferization.to_tensor %m : memref<?xf32>
// Write to the tensor. Cannot be inplace due to tensor_load.
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
%w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32>
// Read from the tensor and return result.
%cst = arith.constant 0.0 : f32
%r = vector.transfer_read %w[%idx2], %cst : tensor<?xf32>, vector<10xf32>
return %r : vector<10xf32>
}
// -----
// CHECK-LABEL: func @to_memref_op_is_reading
func @to_memref_op_is_reading(%t1: tensor<?xf32> {linalg.inplaceable = true},
%idx1: index, %idx2: index, %idx3: index,
%v1: vector<5xf32>)
-> (vector<5xf32>, vector<5xf32>) {
// Write + read to/from tensor.
// CHECK: vector.transfer_write
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]
%1 = vector.transfer_write %v1, %t1[%idx2] : vector<5xf32>, tensor<?xf32>
%cst = arith.constant 0.0 : f32
%r1 = vector.transfer_read %1[%idx3], %cst : tensor<?xf32>, vector<5xf32>
// Write + read to/from same memref.
%0 = bufferization.to_memref %t1 : memref<?xf32>
vector.transfer_write %v1, %0[%idx1] : vector<5xf32>, memref<?xf32>
%r2 = vector.transfer_read %0[%idx3], %cst : memref<?xf32>, vector<5xf32>
return %r1, %r2 : vector<5xf32>, vector<5xf32>
}
// -----
// CHECK-LABEL: func @inner_func
func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %t : tensor<?xf32>
}
func @equivalent_func_arg(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
// This test does not check IR. It just asserts there is no failure due to
// non-equivalent scf.for yield values.
%1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
%3 = call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32>
scf.yield %3 : tensor<?xf32>
}
return %1: tensor<?xf32>
}
// -----
// CHECK-LABEL: func @inner_func_2
func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> {
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
%0 = tensor.insert %f into %t[%c0] : tensor<?xf32>
// CHECK: return
// CHECK-SAME: __equivalent_func_args__ = [0]
return %0 : tensor<?xf32>
}
func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> {
// This test does not check IR. It just asserts there is no failure due to
// non-equivalent scf.for yield values.
%1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) {
%3 = call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
scf.yield %3 : tensor<?xf32>
}
return %1: tensor<?xf32>
}
// -----
// CHECK-LABEL: func @write_after_select_read_one
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func @write_after_select_read_one(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%c : i1)
-> (f32, tensor<?xf32>)
{
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "true"]}
%s = arith.select %c, %t1, %t2 : tensor<?xf32>
// CHECK: tensor.insert
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
%w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
// CHECK: tensor.extract
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
%f = tensor.extract %t1[%idx] : tensor<?xf32>
return %f, %w : f32, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @write_after_select_read_both
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func @write_after_select_read_both(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%c : i1)
-> (f32, f32, tensor<?xf32>)
{
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "false"]}
%s = arith.select %c, %t1, %t2 : tensor<?xf32>
// CHECK: tensor.insert
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
%w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
// CHECK: tensor.extract
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
%f = tensor.extract %t1[%idx] : tensor<?xf32>
// CHECK: tensor.extract
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
%f2 = tensor.extract %t2[%idx] : tensor<?xf32>
return %f, %f2, %w : f32, f32, tensor<?xf32>
}
// -----
// CHECK-LABEL: func @write_after_select_no_conflict
// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32>
func @write_after_select_no_conflict(
%t1 : tensor<?xf32> {linalg.inplaceable = true},
%t2 : tensor<?xf32> {linalg.inplaceable = true},
%c : i1)
-> (f32, tensor<?xf32>)
{
%cst = arith.constant 0.0 : f32
%idx = arith.constant 0 : index
// CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]]
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "true"]}
%s = arith.select %c, %t1, %t2 : tensor<?xf32>
// CHECK: tensor.insert
// CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]}
%w = tensor.insert %cst into %s[%idx] : tensor<?xf32>
// CHECK: tensor.extract
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]}
%f = tensor.extract %w[%idx] : tensor<?xf32>
return %f, %w : f32, tensor<?xf32>
}