Alex Zinenko 519847fefc [mlir] materialize strided memref layout as attribute
Introduce a new attribute to represent the strided memref layout. Strided
layouts are omnipresent in code generation flows and are the only kind of
layouts produced and supported by a half of operation in the memref dialect
(view-related, shape-related). However, they are internally represented as
affine maps that require a somewhat fragile extraction of the strides from the
linear form that also comes with an overhead. Furthermore, textual
representation of strided layouts as affine maps is difficult to read: compare
`affine_map<(d0, d1, d2)[s0, s1] -> (d0*32 + d1*s0 + s1 + d2)>` with
`strides: [32, ?, 1], offset: ?`. While a rudimentary support for parsing a
syntactically sugared version of the strided layout has existed in the codebase
for a long time, it does not go as far as this commit to make the strided
layout a first-class attribute in the IR.

This introduces the attribute and updates the tests that using the pre-existing
sugared form to use the new attribute instead. Most memref created
programmatically, e.g., in passes, still use the affine form with further
extraction of strides and will be updated separately.

Update and clean-up the memref type documentation that has gotten stale and has
been referring to the details of affine map composition that are long gone.

See https://discourse.llvm.org/t/rfc-materialize-strided-memref-layout-as-an-attribute/64211.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D132864
2022-08-30 17:19:58 +02:00

1317 lines
44 KiB
MLIR

// RUN: mlir-opt -verify-diagnostics -buffer-deallocation -split-input-file %s | FileCheck %s
// This file checks the behaviour of BufferDeallocation pass for moving and
// inserting missing DeallocOps in their correct positions. Furthermore,
// copies and their corresponding AllocOps are inserted.
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: bb2 contains an AllocOp which is
// passed to bb3. In the latter block, there should be an deallocation.
// Since bb1 does not contain an adequate alloc and the alloc in bb2 is not
// moved to bb0, we need to insert allocs and copies.
// CHECK-LABEL: func @condBranch
func.func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: cf.cond_br
// CHECK: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
// CHECK: %[[ALLOC1:.*]] = memref.alloc
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC2]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
// CHECK-NEXT: return
// -----
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: The existing AllocOp has a dynamic
// dependency to block argument %0 in bb2. Since the dynamic type is passed
// to bb3 via the block argument %2, it is currently required to allocate a
// temporary buffer for %2 that gets copies of %arg0 and %1 with their
// appropriate shape dimensions. The copy buffer deallocation will be applied
// to %2 in block bb3.
// CHECK-LABEL: func @condBranchDynamicType
func.func @condBranchDynamicType(
%arg0: i1,
%arg1: memref<?xf32>,
%arg2: memref<?xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
^bb1:
cf.br ^bb3(%arg1 : memref<?xf32>)
^bb2(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
cf.br ^bb3(%1 : memref<?xf32>)
^bb3(%2: memref<?xf32>):
test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>)
return
}
// CHECK-NEXT: cf.cond_br
// CHECK: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: cf.br ^bb3
// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
// CHECK: test.copy(%[[ALLOC3]],
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: return
// -----
// Test case: See above.
// CHECK-LABEL: func @condBranchUnrankedType
func.func @condBranchUnrankedType(
%arg0: i1,
%arg1: memref<*xf32>,
%arg2: memref<*xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
^bb1:
cf.br ^bb3(%arg1 : memref<*xf32>)
^bb2(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
%2 = memref.cast %1 : memref<?xf32> to memref<*xf32>
test.buffer_based in(%arg1: memref<*xf32>) out(%2: memref<*xf32>)
cf.br ^bb3(%2 : memref<*xf32>)
^bb3(%3: memref<*xf32>):
test.copy(%3, %arg2) : (memref<*xf32>, memref<*xf32>)
return
}
// CHECK-NEXT: cf.cond_br
// CHECK: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK: test.buffer_based
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: cf.br ^bb3
// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
// CHECK: test.copy(%[[ALLOC3]],
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: return
// -----
// Test Case:
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// | / \
// | bb3 bb4
// | \ /
// \ bb5
// \ /
// bb6
// |
// bb7
// BufferDeallocation expected behavior: The existing AllocOp has a dynamic
// dependency to block argument %0 in bb2. Since the dynamic type is passed to
// bb5 via the block argument %2 and to bb6 via block argument %3, it is
// currently required to allocate temporary buffers for %2 and %3 that gets
// copies of %1 and %arg0 1 with their appropriate shape dimensions. The copy
// buffer deallocations will be applied to %2 in block bb5 and to %3 in block
// bb6. Furthermore, there should be no copy inserted for %4.
// CHECK-LABEL: func @condBranchDynamicTypeNested
func.func @condBranchDynamicTypeNested(
%arg0: i1,
%arg1: memref<?xf32>,
%arg2: memref<?xf32>,
%arg3: index) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg3: index)
^bb1:
cf.br ^bb6(%arg1 : memref<?xf32>)
^bb2(%0: index):
%1 = memref.alloc(%0) : memref<?xf32>
test.buffer_based in(%arg1: memref<?xf32>) out(%1: memref<?xf32>)
cf.cond_br %arg0, ^bb3, ^bb4
^bb3:
cf.br ^bb5(%1 : memref<?xf32>)
^bb4:
cf.br ^bb5(%1 : memref<?xf32>)
^bb5(%2: memref<?xf32>):
cf.br ^bb6(%2 : memref<?xf32>)
^bb6(%3: memref<?xf32>):
cf.br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>):
test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>)
return
}
// CHECK-NEXT: cf.cond_br{{.*}}
// CHECK-NEXT: ^bb1
// CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: cf.br ^bb6(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
// CHECK: cf.cond_br
// CHECK: ^bb3:
// CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}})
// CHECK: ^bb4:
// CHECK-NEXT: cf.br ^bb5(%[[ALLOC1]]{{.*}})
// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: cf.br ^bb6(%[[ALLOC3]]{{.*}})
// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
// CHECK-NEXT: cf.br ^bb7(%[[ALLOC4]]{{.*}})
// CHECK-NEXT: ^bb7(%[[ALLOC5:.*]]:{{.*}})
// CHECK: test.copy(%[[ALLOC5]],
// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
// CHECK-NEXT: return
// -----
// Test Case: Existing AllocOp with no users.
// BufferDeallocation expected behavior: It should insert a DeallocOp right
// before ReturnOp.
// CHECK-LABEL: func @emptyUsesValue
func.func @emptyUsesValue(%arg0: memref<4xf32>) {
%0 = memref.alloc() : memref<4xf32>
return
}
// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC]]
// CHECK-NEXT: return
// -----
// Test Case:
// bb0
// / \
// | bb1 <- Initial position of AllocOp
// \ /
// bb2
// BufferDeallocation expected behavior: It should insert a DeallocOp at the
// exit block after CopyOp since %1 is an alias for %0 and %arg1. Furthermore,
// we have to insert a copy and an alloc in the beginning of the function.
// CHECK-LABEL: func @criticalEdge
func.func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
^bb1:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^bb2(%0 : memref<2xf32>)
^bb2(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOC0:.*]] = bufferization.clone
// CHECK-NEXT: cf.cond_br
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
// CHECK-NEXT: return
// -----
// Test Case:
// bb0 <- Initial position of AllocOp
// / \
// | bb1
// \ /
// bb2
// BufferDeallocation expected behavior: It only inserts a DeallocOp at the
// exit block after CopyOp since %1 is an alias for %0 and %arg1.
// CHECK-LABEL: func @invCriticalEdge
func.func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
^bb1:
cf.br ^bb2(%0 : memref<2xf32>)
^bb2(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: dealloc
// CHECK-NEXT: return
// -----
// Test Case:
// bb0 <- Initial position of the first AllocOp
// / \
// bb1 bb2
// \ /
// bb3 <- Initial position of the second AllocOp
// BufferDeallocation expected behavior: It only inserts two missing
// DeallocOps in the exit block. %5 is an alias for %0. Therefore, the
// DeallocOp for %0 should occur after the last BufferBasedOp. The Dealloc for
// %7 should happen after CopyOp.
// CHECK-LABEL: func @ifElse
func.func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
%7 = memref.alloc() : memref<2xf32>
test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: memref.dealloc %[[FIRST_ALLOC]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]]
// CHECK-NEXT: return
// -----
// Test Case: No users for buffer in if-else CFG
// bb0 <- Initial position of AllocOp
// / \
// bb1 bb2
// \ /
// bb3
// BufferDeallocation expected behavior: It only inserts a missing DeallocOp
// in the exit block since %5 or %6 are the latest aliases of %0.
// CHECK-LABEL: func @ifElseNoUsers
func.func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
test.copy(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[FIRST_ALLOC]]
// CHECK-NEXT: return
// -----
// Test Case:
// bb0 <- Initial position of the first AllocOp
// / \
// bb1 bb2
// | / \
// | bb3 bb4
// \ \ /
// \ /
// bb5 <- Initial position of the second AllocOp
// BufferDeallocation expected behavior: Two missing DeallocOps should be
// inserted in the exit block.
// CHECK-LABEL: func @ifElseNested
func.func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
^bb3(%5: memref<2xf32>):
cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
^bb4(%6: memref<2xf32>):
cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
%9 = memref.alloc() : memref<2xf32>
test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: memref.dealloc %[[FIRST_ALLOC]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[SECOND_ALLOC]]
// CHECK-NEXT: return
// -----
// Test Case: Dead operations in a single block.
// BufferDeallocation expected behavior: It only inserts the two missing
// DeallocOps after the last BufferBasedOp.
// CHECK-LABEL: func @redundantOperations
func.func @redundantOperations(%arg0: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
%1 = memref.alloc() : memref<2xf32>
test.buffer_based in(%0: memref<2xf32>) out(%1: memref<2xf32>)
return
}
// CHECK: (%[[ARG0:.*]]: {{.*}})
// CHECK-NEXT: %[[FIRST_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based in(%[[ARG0]]{{.*}}out(%[[FIRST_ALLOC]]
// CHECK: %[[SECOND_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based in(%[[FIRST_ALLOC]]{{.*}}out(%[[SECOND_ALLOC]]
// CHECK: dealloc
// CHECK-NEXT: dealloc
// CHECK-NEXT: return
// -----
// Test Case:
// bb0
// / \
// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: We need to introduce a copy for each
// buffer since the buffers are passed to bb3. The both missing DeallocOps are
// inserted in the respective block of the allocs. The copy is freed in the exit
// block.
// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
func.func @moving_alloc_and_inserting_missing_dealloc(
%cond: i1,
%arg0: memref<2xf32>,
%arg1: memref<2xf32>) {
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^exit(%0 : memref<2xf32>)
^bb2:
%1 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
cf.br ^exit(%1 : memref<2xf32>)
^exit(%arg2: memref<2xf32>):
test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: cf.cond_br{{.*}}
// CHECK-NEXT: ^bb1
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %[[ALLOC0]]
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC1]]
// CHECK-NEXT: ^bb2
// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK-NEXT: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC2]]
// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: cf.br ^bb3(%[[ALLOC3]]
// CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}})
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
// CHECK-NEXT: return
// -----
// Test Case: Invalid position of the DeallocOp. There is a user after
// deallocation.
// bb0
// / \
// bb1 bb2 <- Initial position of AllocOp
// \ /
// bb3
// BufferDeallocation expected behavior: The existing DeallocOp should be
// moved to exit block.
// CHECK-LABEL: func @moving_invalid_dealloc_op_complex
func.func @moving_invalid_dealloc_op_complex(
%cond: i1,
%arg0: memref<2xf32>,
%arg1: memref<2xf32>) {
%1 = memref.alloc() : memref<2xf32>
cf.cond_br %cond, ^bb1, ^bb2
^bb1:
cf.br ^exit(%arg0 : memref<2xf32>)
^bb2:
test.buffer_based in(%arg0: memref<2xf32>) out(%1: memref<2xf32>)
memref.dealloc %1 : memref<2xf32>
cf.br ^exit(%1 : memref<2xf32>)
^exit(%arg2: memref<2xf32>):
test.copy(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: cf.cond_br
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: return
// -----
// Test Case: Inserting missing DeallocOp in a single block.
// CHECK-LABEL: func @inserting_missing_dealloc_simple
func.func @inserting_missing_dealloc_simple(
%arg0 : memref<2xf32>,
%arg1: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// -----
// Test Case: Moving invalid DeallocOp (there is a user after deallocation) in a
// single block.
// CHECK-LABEL: func @moving_invalid_dealloc_op
func.func @moving_invalid_dealloc_op(%arg0 : memref<2xf32>, %arg1: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg0: memref<2xf32>) out(%0: memref<2xf32>)
memref.dealloc %0 : memref<2xf32>
test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// -----
// Test Case: Nested regions - This test defines a BufferBasedOp inside the
// region of a RegionBufferBasedOp.
// BufferDeallocation expected behavior: The AllocOp for the BufferBasedOp
// should remain inside the region of the RegionBufferBasedOp and it should insert
// the missing DeallocOp in the same region. The missing DeallocOp should be
// inserted after CopyOp.
// CHECK-LABEL: func @nested_regions_and_cond_branch
func.func @nested_regions_and_cond_branch(
%arg0: i1,
%arg1: memref<2xf32>,
%arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
%0 = memref.alloc() : memref<2xf32>
test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
%1 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
%tmp1 = math.exp %gen1_arg0 : f32
test.region_yield %tmp1 : f32
}
cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
// CHECK: %[[ALLOC0:.*]] = bufferization.clone %[[ARG1]]
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
// CHECK: %[[ALLOC2:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]]
// CHECK: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: %{{.*}} = math.exp
// CHECK: %[[ALLOC3:.*]] = bufferization.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
// -----
// Test Case: buffer deallocation escaping
// BufferDeallocation expected behavior: It must not dealloc %arg1 and %x
// since they are operands of return operation and should escape from
// deallocating. It should dealloc %y after CopyOp.
// CHECK-LABEL: func @memref_in_function_results
func.func @memref_in_function_results(
%arg0: memref<5xf32>,
%arg1: memref<10xf32>,
%arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) {
%x = memref.alloc() : memref<15xf32>
%y = memref.alloc() : memref<5xf32>
test.buffer_based in(%arg0: memref<5xf32>) out(%y: memref<5xf32>)
test.copy(%y, %arg2) : (memref<5xf32>, memref<5xf32>)
return %arg1, %x : memref<10xf32>, memref<15xf32>
}
// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
// CHECK: %[[X:.*]] = memref.alloc()
// CHECK: %[[Y:.*]] = memref.alloc()
// CHECK: test.copy
// CHECK: memref.dealloc %[[Y]]
// CHECK: return %[[ARG1]], %[[X]]
// -----
// Test Case: nested region control flow
// The alloc %1 flows through both if branches until it is finally returned.
// Hence, it does not require a specific dealloc operation. However, %3
// requires a dealloc.
// CHECK-LABEL: func @nested_region_control_flow
func.func @nested_region_control_flow(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
%0 = arith.cmpi eq, %arg0, %arg1 : index
%1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32>
} else {
%3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
scf.yield %1 : memref<?x?xf32>
}
return %2 : memref<?x?xf32>
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
// CHECK: scf.yield %[[ALLOC0]]
// CHECK: %[[ALLOC2:.*]] = memref.alloc(%arg0, %arg1)
// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: scf.yield %[[ALLOC0]]
// CHECK: return %[[ALLOC1]]
// -----
// Test Case: nested region control flow with a nested buffer allocation in a
// divergent branch.
// Buffer deallocation places a copy for both %1 and %3, since they are
// returned in the end.
// CHECK-LABEL: func @nested_region_control_flow_div
func.func @nested_region_control_flow_div(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
%0 = arith.cmpi eq, %arg0, %arg1 : index
%1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32>
} else {
%3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
scf.yield %3 : memref<?x?xf32>
}
return %2 : memref<?x?xf32>
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC0]]
// CHECK: scf.yield %[[ALLOC2]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1)
// CHECK-NEXT: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]]
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: return %[[ALLOC1]]
// -----
// Test Case: nested region control flow within a region interface.
// No copies are required in this case since the allocation finally escapes
// the method.
// CHECK-LABEL: func @inner_region_control_flow
func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
%0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = test.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>) then {
^bb0(%arg1 : memref<?x?xf32>):
test.region_if_yield %arg1 : memref<?x?xf32>
} else {
^bb0(%arg1 : memref<?x?xf32>):
test.region_if_yield %arg1 : memref<?x?xf32>
} join {
^bb0(%arg1 : memref<?x?xf32>):
test.region_if_yield %arg1 : memref<?x?xf32>
}
return %1 : memref<?x?xf32>
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if
// CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}):
// CHECK-NEXT: test.region_if_yield %[[ALLOC2]]
// CHECK: ^bb0(%[[ALLOC3:.*]]:{{.*}}):
// CHECK-NEXT: test.region_if_yield %[[ALLOC3]]
// CHECK: ^bb0(%[[ALLOC4:.*]]:{{.*}}):
// CHECK-NEXT: test.region_if_yield %[[ALLOC4]]
// CHECK: return %[[ALLOC1]]
// -----
// CHECK-LABEL: func @subview
func.func @subview(%arg0 : index, %arg1 : index, %arg2 : memref<?x?xf32>) {
%0 = memref.alloc() : memref<64x4xf32, strided<[4, 1], offset: 0>>
%1 = memref.subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] :
memref<64x4xf32, strided<[4, 1], offset: 0>>
to memref<?x?xf32, strided<[?, ?], offset: ?>>
test.copy(%1, %arg2) :
(memref<?x?xf32, strided<[?, ?], offset: ?>>, memref<?x?xf32>)
return
}
// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: memref.subview
// CHECK-NEXT: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC]]
// CHECK-NEXT: return
// -----
// Test Case: In the presence of AllocaOps only the AllocOps has top be freed.
// Therefore, all allocas are not handled.
// CHECK-LABEL: func @condBranchAlloca
func.func @condBranchAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
%0 = memref.alloca() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: cf.cond_br
// CHECK: %[[ALLOCA:.*]] = memref.alloca()
// CHECK: cf.br ^bb3(%[[ALLOCA:.*]])
// CHECK-NEXT: ^bb3
// CHECK-NEXT: test.copy
// CHECK-NEXT: return
// -----
// Test Case: In the presence of AllocaOps only the AllocOps has top be freed.
// Therefore, all allocas are not handled. In this case, only alloc %0 has a
// dealloc.
// CHECK-LABEL: func @ifElseAlloca
func.func @ifElseAlloca(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
%7 = memref.alloca() : memref<2xf32>
test.buffer_based in(%5: memref<2xf32>) out(%7: memref<2xf32>)
test.copy(%7, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: %[[ALLOCA:.*]] = memref.alloca()
// CHECK-NEXT: test.buffer_based
// CHECK: memref.dealloc %[[ALLOC]]
// CHECK: test.copy
// CHECK-NEXT: return
// -----
// CHECK-LABEL: func @ifElseNestedAlloca
func.func @ifElseNestedAlloca(
%arg0: i1,
%arg1: memref<2xf32>,
%arg2: memref<2xf32>) {
%0 = memref.alloca() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>)
cf.cond_br %arg0,
^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
cf.br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
cf.cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
^bb3(%5: memref<2xf32>):
cf.br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
^bb4(%6: memref<2xf32>):
cf.br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
%9 = memref.alloc() : memref<2xf32>
test.buffer_based in(%7: memref<2xf32>) out(%9: memref<2xf32>)
test.copy(%9, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-NEXT: %[[ALLOCA:.*]] = memref.alloca()
// CHECK-NEXT: test.buffer_based
// CHECK: %[[ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC]]
// CHECK-NEXT: return
// -----
// CHECK-LABEL: func @nestedRegionsAndCondBranchAlloca
func.func @nestedRegionsAndCondBranchAlloca(
%arg0: i1,
%arg1: memref<2xf32>,
%arg2: memref<2xf32>) {
cf.cond_br %arg0, ^bb1, ^bb2
^bb1:
cf.br ^bb3(%arg1 : memref<2xf32>)
^bb2:
%0 = memref.alloc() : memref<2xf32>
test.region_buffer_based in(%arg1: memref<2xf32>) out(%0: memref<2xf32>) {
^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
%1 = memref.alloca() : memref<2xf32>
test.buffer_based in(%arg1: memref<2xf32>) out(%1: memref<2xf32>)
%tmp1 = math.exp %gen1_arg0 : f32
test.region_yield %tmp1 : f32
}
cf.br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cf.cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
// CHECK: ^[[BB1]]:
// CHECK: %[[ALLOC0:.*]] = bufferization.clone
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
// CHECK: %[[ALLOCA:.*]] = memref.alloca()
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]]
// CHECK: %{{.*}} = math.exp
// CHECK: %[[ALLOC2:.*]] = bufferization.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
// -----
// CHECK-LABEL: func @nestedRegionControlFlowAlloca
func.func @nestedRegionControlFlowAlloca(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
%0 = arith.cmpi eq, %arg0, %arg1 : index
%1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32>
} else {
%3 = memref.alloca(%arg0, %arg1) : memref<?x?xf32>
scf.yield %1 : memref<?x?xf32>
}
return %2 : memref<?x?xf32>
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
// CHECK: scf.yield %[[ALLOC0]]
// CHECK: %[[ALLOCA:.*]] = memref.alloca(%arg0, %arg1)
// CHECK-NEXT: scf.yield %[[ALLOC0]]
// CHECK: return %[[ALLOC1]]
// -----
// Test Case: structured control-flow loop using a nested alloc.
// The iteration argument %iterBuf has to be freed before yielding %3 to avoid
// memory leaks.
// CHECK-LABEL: func @loop_alloc
func.func @loop_alloc(
%lb: index,
%ub: index,
%step: index,
%buf: memref<2xf32>,
%res: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
%1 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%2 = arith.cmpi eq, %i, %ub : index
%3 = memref.alloc() : memref<2xf32>
scf.yield %3 : memref<2xf32>
}
test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3
// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: arith.cmpi
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc()
// CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]]
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: }
// CHECK: test.copy(%[[ALLOC2]], %arg4)
// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
// -----
// Test Case: structured control-flow loop with a nested if operation.
// The loop yields buffers that have been defined outside of the loop and the
// backedges only use the iteration arguments (or one of its aliases).
// Therefore, we do not have to (and are not allowed to) free any buffers
// that are passed via the backedges.
// CHECK-LABEL: func @loop_nested_if_no_alloc
func.func @loop_nested_if_no_alloc(
%lb: index,
%ub: index,
%step: index,
%buf: memref<2xf32>,
%res: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
%1 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%2 = arith.cmpi eq, %i, %ub : index
%3 = scf.if %2 -> (memref<2xf32>) {
scf.yield %0 : memref<2xf32>
} else {
scf.yield %iterBuf : memref<2xf32>
}
scf.yield %3 : memref<2xf32>
}
test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] =
// CHECK: %[[ALLOC2:.*]] = scf.if
// CHECK: scf.yield %[[ALLOC0]]
// CHECK: scf.yield %[[IALLOC]]
// CHECK: scf.yield %[[ALLOC2]]
// CHECK: test.copy(%[[ALLOC1]], %arg4)
// CHECK: memref.dealloc %[[ALLOC0]]
// -----
// Test Case: structured control-flow loop with a nested if operation using
// a deeply nested buffer allocation.
// Since the innermost allocation happens in a divergent branch, we have to
// introduce additional copies for the nested if operation. Since the loop's
// yield operation "returns" %3, it will return a newly allocated buffer.
// Therefore, we have to free the iteration argument %iterBuf before
// "returning" %3.
// CHECK-LABEL: func @loop_nested_if_alloc
func.func @loop_nested_if_alloc(
%lb: index,
%ub: index,
%step: index,
%buf: memref<2xf32>) -> memref<2xf32> {
%0 = memref.alloc() : memref<2xf32>
%1 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%2 = arith.cmpi eq, %i, %ub : index
%3 = scf.if %2 -> (memref<2xf32>) {
%4 = memref.alloc() : memref<2xf32>
scf.yield %4 : memref<2xf32>
} else {
scf.yield %0 : memref<2xf32>
}
scf.yield %3 : memref<2xf32>
}
return %1 : memref<2xf32>
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3
// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = scf.if
// CHECK: %[[ALLOC4:.*]] = memref.alloc()
// CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[ALLOC4]]
// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
// CHECK-NEXT: scf.yield %[[ALLOC5]]
// CHECK: %[[ALLOC6:.*]] = bufferization.clone %[[ALLOC0]]
// CHECK-NEXT: scf.yield %[[ALLOC6]]
// CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC3]]
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
// CHECK: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: return %[[ALLOC2]]
// -----
// Test Case: several nested structured control-flow loops with a deeply nested
// buffer allocation inside an if operation.
// Same behavior is an loop_nested_if_alloc: we have to insert deallocations
// before each yield in all loops recursively.
// CHECK-LABEL: func @loop_nested_alloc
func.func @loop_nested_alloc(
%lb: index,
%ub: index,
%step: index,
%buf: memref<2xf32>,
%res: memref<2xf32>) {
%0 = memref.alloc() : memref<2xf32>
%1 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%2 = scf.for %i2 = %lb to %ub step %step
iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> {
%3 = scf.for %i3 = %lb to %ub step %step
iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> {
%4 = memref.alloc() : memref<2xf32>
%5 = arith.cmpi eq, %i, %ub : index
%6 = scf.if %5 -> (memref<2xf32>) {
%7 = memref.alloc() : memref<2xf32>
scf.yield %7 : memref<2xf32>
} else {
scf.yield %iterBuf3 : memref<2xf32>
}
scf.yield %6 : memref<2xf32>
}
scf.yield %3 : memref<2xf32>
}
scf.yield %2 : memref<2xf32>
}
test.copy(%1, %res) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: %[[ALLOC1:.*]] = bufferization.clone %arg3
// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]])
// CHECK-NEXT: %[[ALLOC2:.*]] = bufferization.clone %[[IALLOC0]]
// CHECK-NEXT: memref.dealloc %[[IALLOC0]]
// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]])
// CHECK-NEXT: %[[ALLOC5:.*]] = bufferization.clone %[[IALLOC1]]
// CHECK-NEXT: memref.dealloc %[[IALLOC1]]
// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC2:.*]] = %[[ALLOC5]])
// CHECK: %[[ALLOC8:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC8]]
// CHECK: %[[ALLOC9:.*]] = scf.if
// CHECK: %[[ALLOC11:.*]] = memref.alloc()
// CHECK-NEXT: %[[ALLOC12:.*]] = bufferization.clone %[[ALLOC11]]
// CHECK-NEXT: memref.dealloc %[[ALLOC11]]
// CHECK-NEXT: scf.yield %[[ALLOC12]]
// CHECK: %[[ALLOC13:.*]] = bufferization.clone %[[IALLOC2]]
// CHECK-NEXT: scf.yield %[[ALLOC13]]
// CHECK: memref.dealloc %[[IALLOC2]]
// CHECK-NEXT: %[[ALLOC10:.*]] = bufferization.clone %[[ALLOC9]]
// CHECK-NEXT: memref.dealloc %[[ALLOC9]]
// CHECK-NEXT: scf.yield %[[ALLOC10]]
// CHECK: %[[ALLOC7:.*]] = bufferization.clone %[[ALLOC6]]
// CHECK-NEXT: memref.dealloc %[[ALLOC6]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
// CHECK: %[[ALLOC4:.*]] = bufferization.clone %[[ALLOC3]]
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC4]]
// CHECK: test.copy(%[[VAL_7]], %arg4)
// CHECK-NEXT: memref.dealloc %[[VAL_7]]
// -----
// CHECK-LABEL: func @affine_loop
func.func @affine_loop() {
%buffer = memref.alloc() : memref<1024xf32>
%sum_init_0 = arith.constant 0.0 : f32
%res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 {
%t = affine.load %buffer[%i] : memref<1024xf32>
%sum_next = arith.addf %sum_iter, %t : f32
affine.yield %sum_next : f32
}
// CHECK: %[[M:.*]] = memref.alloc
// CHECK: affine.for
// CHECK: }
// CHECK-NEXT: memref.dealloc %[[M]]
return
}
// -----
// Test Case: explicit control-flow loop with a dynamically allocated buffer.
// The BufferDeallocation transformation should fail on this explicit
// control-flow loop since they are not supported.
// expected-error@+1 {{Only structured control-flow loops are supported}}
func.func @loop_dynalloc(
%arg0 : i32,
%arg1 : i32,
%arg2: memref<?xf32>,
%arg3: memref<?xf32>) {
%const0 = arith.constant 0 : i32
cf.br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>)
^loopHeader(%i : i32, %buff : memref<?xf32>):
%lessThan = arith.cmpi slt, %i, %arg1 : i32
cf.cond_br %lessThan,
^loopBody(%i, %buff : i32, memref<?xf32>),
^exit(%buff : memref<?xf32>)
^loopBody(%val : i32, %buff2: memref<?xf32>):
%const1 = arith.constant 1 : i32
%inc = arith.addi %val, %const1 : i32
%size = arith.index_cast %inc : i32 to index
%alloc1 = memref.alloc(%size) : memref<?xf32>
cf.br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
^exit(%buff3 : memref<?xf32>):
test.copy(%buff3, %arg3) : (memref<?xf32>, memref<?xf32>)
return
}
// -----
// Test Case: explicit control-flow loop with a dynamically allocated buffer.
// The BufferDeallocation transformation should fail on this explicit
// control-flow loop since they are not supported.
// expected-error@+1 {{Only structured control-flow loops are supported}}
func.func @do_loop_alloc(
%arg0 : i32,
%arg1 : i32,
%arg2: memref<2xf32>,
%arg3: memref<2xf32>) {
%const0 = arith.constant 0 : i32
cf.br ^loopBody(%const0, %arg2 : i32, memref<2xf32>)
^loopBody(%val : i32, %buff2: memref<2xf32>):
%const1 = arith.constant 1 : i32
%inc = arith.addi %val, %const1 : i32
%alloc1 = memref.alloc() : memref<2xf32>
cf.br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
^loopHeader(%i : i32, %buff : memref<2xf32>):
%lessThan = arith.cmpi slt, %i, %arg1 : i32
cf.cond_br %lessThan,
^loopBody(%i, %buff : i32, memref<2xf32>),
^exit(%buff : memref<2xf32>)
^exit(%buff3 : memref<2xf32>):
test.copy(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>)
return
}
// -----
// CHECK-LABEL: func @assumingOp(
func.func @assumingOp(
%arg0: !shape.witness,
%arg2: memref<2xf32>,
%arg3: memref<2xf32>) {
// Confirm the alloc will be dealloc'ed in the block.
%1 = shape.assuming %arg0 -> memref<2xf32> {
%0 = memref.alloc() : memref<2xf32>
shape.assuming_yield %arg2 : memref<2xf32>
}
// Confirm the alloc will be returned and dealloc'ed after its use.
%3 = shape.assuming %arg0 -> memref<2xf32> {
%2 = memref.alloc() : memref<2xf32>
shape.assuming_yield %2 : memref<2xf32>
}
test.copy(%3, %arg3) : (memref<2xf32>, memref<2xf32>)
return
}
// CHECK-SAME: %[[ARG0:.*]]: !shape.witness,
// CHECK-SAME: %[[ARG1:.*]]: {{.*}},
// CHECK-SAME: %[[ARG2:.*]]: {{.*}}
// CHECK: %[[UNUSED_RESULT:.*]] = shape.assuming %[[ARG0]]
// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: shape.assuming_yield %[[ARG1]]
// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]]
// CHECK-NEXT: %[[TMP_ALLOC:.*]] = memref.alloc()
// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = bufferization.clone %[[TMP_ALLOC]]
// CHECK-NEXT: memref.dealloc %[[TMP_ALLOC]]
// CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]]
// CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]])
// CHECK-NEXT: memref.dealloc %[[ASSUMING_RESULT]]
// -----
// Test Case: The op "test.bar" does not implement the RegionBranchOpInterface.
// This is not allowed in buffer deallocation.
func.func @noRegionBranchOpInterface() {
// expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
%0 = "test.bar"() ({
// expected-error@+1 {{All operations with attached regions need to implement the RegionBranchOpInterface.}}
%1 = "test.bar"() ({
"test.yield"() : () -> ()
}) : () -> (i32)
"test.yield"() : () -> ()
}) : () -> (i32)
"test.terminator"() : () -> ()
}
// -----
// CHECK-LABEL: func @dealloc_existing_clones
// CHECK: (%[[ARG0:.*]]: memref<?x?xf64>, %[[ARG1:.*]]: memref<?x?xf64>)
// CHECK: %[[RES0:.*]] = bufferization.clone %[[ARG0]]
// CHECK: %[[RES1:.*]] = bufferization.clone %[[ARG1]]
// CHECK-NOT: memref.dealloc %[[RES0]]
// CHECK: memref.dealloc %[[RES1]]
// CHECK: return %[[RES0]]
func.func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64>) -> memref<?x?xf64> {
%0 = bufferization.clone %arg0 : memref<?x?xf64> to memref<?x?xf64>
%1 = bufferization.clone %arg1 : memref<?x?xf64> to memref<?x?xf64>
return %0 : memref<?x?xf64>
}
// -----
// CHECK-LABEL: func @while_two_arg
func.func @while_two_arg(%arg0: index) {
%a = memref.alloc(%arg0) : memref<?xf32>
// CHECK: %[[WHILE:.*]]:2 = scf.while (%[[ARG1:.*]] = %[[ALLOC:.*]], %[[ARG2:.*]] = %[[CLONE:.*]])
scf.while (%arg1 = %a, %arg2 = %a) : (memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>) {
// CHECK-NEXT: make_condition
%0 = "test.make_condition"() : () -> i1
// CHECK-NEXT: bufferization.clone %[[ARG2]]
// CHECK-NEXT: memref.dealloc %[[ARG2]]
scf.condition(%0) %arg1, %arg2 : memref<?xf32>, memref<?xf32>
} do {
^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>):
// CHECK: %[[ALLOC2:.*]] = memref.alloc
%b = memref.alloc(%arg0) : memref<?xf32>
// CHECK: memref.dealloc %[[ARG2]]
// CHECK: %[[CLONE2:.*]] = bufferization.clone %[[ALLOC2]]
// CHECK: memref.dealloc %[[ALLOC2]]
scf.yield %arg1, %b : memref<?xf32>, memref<?xf32>
}
// CHECK: }
// CHECK-NEXT: memref.dealloc %[[WHILE]]#1
// CHECK-NEXT: memref.dealloc %[[ALLOC]]
// CHECK-NEXT: return
return
}
// -----
func.func @while_three_arg(%arg0: index) {
// CHECK: %[[ALLOC:.*]] = memref.alloc
%a = memref.alloc(%arg0) : memref<?xf32>
// CHECK-NEXT: %[[CLONE1:.*]] = bufferization.clone %[[ALLOC]]
// CHECK-NEXT: %[[CLONE2:.*]] = bufferization.clone %[[ALLOC]]
// CHECK-NEXT: %[[CLONE3:.*]] = bufferization.clone %[[ALLOC]]
// CHECK-NEXT: memref.dealloc %[[ALLOC]]
// CHECK-NEXT: %[[WHILE:.*]]:3 = scf.while
// FIXME: This is non-deterministic
// CHECK-SAME-DAG: [[CLONE1]]
// CHECK-SAME-DAG: [[CLONE2]]
// CHECK-SAME-DAG: [[CLONE3]]
scf.while (%arg1 = %a, %arg2 = %a, %arg3 = %a) : (memref<?xf32>, memref<?xf32>, memref<?xf32>) -> (memref<?xf32>, memref<?xf32>, memref<?xf32>) {
%0 = "test.make_condition"() : () -> i1
scf.condition(%0) %arg1, %arg2, %arg3 : memref<?xf32>, memref<?xf32>, memref<?xf32>
} do {
^bb0(%arg1: memref<?xf32>, %arg2: memref<?xf32>, %arg3: memref<?xf32>):
%b = memref.alloc(%arg0) : memref<?xf32>
%q = memref.alloc(%arg0) : memref<?xf32>
scf.yield %q, %b, %arg2: memref<?xf32>, memref<?xf32>, memref<?xf32>
}
// CHECK-DAG: memref.dealloc %[[WHILE]]#0
// CHECK-DAG: memref.dealloc %[[WHILE]]#1
// CHECK-DAG: memref.dealloc %[[WHILE]]#2
// CHECK-NEXT: return
return
}
// -----
func.func @select_aliases(%arg0: index, %arg1: memref<?xi8>, %arg2: i1) {
// CHECK: memref.alloc
// CHECK: memref.alloc
// CHECK: arith.select
// CHECK: test.copy
// CHECK: memref.dealloc
// CHECK: memref.dealloc
%0 = memref.alloc(%arg0) : memref<?xi8>
%1 = memref.alloc(%arg0) : memref<?xi8>
%2 = arith.select %arg2, %0, %1 : memref<?xi8>
test.copy(%2, %arg1) : (memref<?xi8>, memref<?xi8>)
return
}