llvm-project/mlir/test/Transforms/remove-dead-values.mlir
Matthias Springer c4750d0575
[mlir] Consolidate patterns into RegionBranchOpInterface patterns (#174094)
Instead of op-specific cleanup patterns for region branch ops to remove
unused results / block arguments, etc., add a set of patterns that can
handle all `RegionBranchOpInterface` ops. These patterns are enabled
only for selected SCF dialect ops at the moment:
* `scf.execute_region`
* `scf.for`
* `scf.if`
* `scf.index_switch`
* `scf.while`

It is currently not possible to register canoncalization patterns for op
interfaces and some ops have incorrect interface implementations. In
follow-up PRs, the set of ops will be gradually extended within the SCF
dialect (`scf.forall`) and across other dialects
(`gpu.warp_execute_on_lane0`, (maybe) various affine dialect ops, ...),
and maybe eventually to apply to all `RegionBranchOpInterface` ops.

This commit removes many similar canonicalization patterns from the SCF
dialect. The newly added canonicalization patterns allow users to get
the same canonicalizations for free for their own ops. And even a few
additional new canonicalizations
([example](https://github.com/llvm/llvm-project/pull/174094/files#diff-54318cd685386d5519c42be49818e388b09d934edcbe4280548baa3601802977R2241),
[example](https://github.com/llvm/llvm-project/pull/174094/files#diff-54318cd685386d5519c42be49818e388b09d934edcbe4280548baa3601802977R1101),
...).

Implementation outline: This commit adds 3 canonicalization patterns.
* `MakeRegionBranchOpSuccessorInputsDead`: Remove uses of successor
inputs, by swapping them for successor operand values.
* `RemoveDuplicateSuccessorInputUses`: Remove uses of successor inputs
that are duplicates. (Similar to `WhileRemoveDuplicatedResults` in the
SCF dialect.)
* `RemoveDeadRegionBranchOpSuccessorInputs`: Remove dead successor
inputs if all of their "tied" successor inputs are also dead. (Similar
to `WhileUnusedResult` in the SCF dialect.)
2026-01-13 07:22:09 +00:00

799 lines
30 KiB
MLIR

// RUN: mlir-opt %s -remove-dead-values="canonicalize=0" -split-input-file | FileCheck %s
// RUN: mlir-opt %s -remove-dead-values="canonicalize=1" -split-input-file | FileCheck %s --check-prefix=CHECK-CANONICALIZE
// The IR is updated regardless of memref.global private constant
//
module {
// CHECK: memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64}
memref.global "private" constant @__constant_4xi32 : memref<4xi32> = dense<[1, 2, 3, 4]> {alignment = 16 : i64}
func.func @main(%arg0: i32) -> i32 {
%0 = tensor.empty() : tensor<10xbf16>
// CHECK-NOT: memref.get_global
%1 = memref.get_global @__constant_4xi32 : memref<4xi32>
// CHECK-NOT: tensor.empty
return %arg0 : i32
}
}
// -----
// Dead values are removed from the IR even if the module has a name
//
module @named_module_acceptable {
func.func @main(%arg0: tensor<10xf32>) -> tensor<10xf32> {
%0 = tensor.empty() : tensor<10xbf16>
// CHECK-NOT: tensor.empty
return %arg0 : tensor<10xf32>
}
}
// -----
// The IR contains both conditional and unconditional branches with a loop
// in which the last cf.cond_br is referncing the first cf.br
//
func.func @acceptable_ir_has_cleanable_loop_of_conditional_and_branch_op(%arg0: i1) {
%non_live = arith.constant 0 : i32
// CHECK-NOT: arith.constant
cf.br ^bb1(%non_live : i32)
// CHECK: cf.br ^[[BB1:bb[0-9]+]]
^bb1(%non_live_1 : i32):
// CHECK: ^[[BB1]]:
%non_live_5 = arith.constant 1 : i32
cf.br ^bb3(%non_live_1, %non_live_5 : i32, i32)
// CHECK: cf.br ^[[BB3:bb[0-9]+]]
// CHECK-NOT: i32
^bb3(%non_live_2 : i32, %non_live_6 : i32):
// CHECK: ^[[BB3]]:
cf.cond_br %arg0, ^bb1(%non_live_2 : i32), ^bb4(%non_live_2 : i32)
// CHECK: cf.cond_br %arg0, ^[[BB1]], ^[[BB4:bb[0-9]+]]
^bb4(%non_live_4 : i32):
// CHECK: ^[[BB4]]:
return
}
// -----
// Checking that iter_args are properly handled
//
// CHECK-CANONICALIZE-LABEL: func @cleanable_loop_iter_args_value
func.func @cleanable_loop_iter_args_value(%arg0: index) -> index {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
%non_live = arith.constant 0 : index
// CHECK-CANONICALIZE: [[RESULT:%.+]] = scf.for [[ARG_1:%.*]] = %c0 to %c10 step %c1 iter_args([[ARG_2:%.*]] = %arg0) -> (index) {
%result, %result_non_live = scf.for %i = %c0 to %c10 step %c1 iter_args(%live_arg = %arg0, %non_live_arg = %non_live) -> (index, index) {
// CHECK-CANONICALIZE: [[SUM:%.+]] = arith.addi [[ARG_2]], [[ARG_1]] : index
%new_live = arith.addi %live_arg, %i : index
// CHECK-CANONICALIZE: scf.yield [[SUM:%.+]]
scf.yield %new_live, %non_live_arg : index, index
}
// CHECK-CANONICALIZE: return [[RESULT]] : index
return %result : index
}
// -----
// Checking that the arguments of linalg.generic are properly handled
// All code below is removed as a result of the pass
//
#map = affine_map<(d0, d1, d2) -> (0, d1, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
module {
// CHECK-LABEL: @dead_linalg_generic
func.func @dead_linalg_generic() {
%cst_3 = arith.constant dense<54> : tensor<1x25x13xi32>
%cst_7 = arith.constant dense<11> : tensor<1x25x13xi32>
// CHECK-NOT: arith.constant
%0 = tensor.empty() : tensor<1x25x13xi32>
// CHECK-NOT: tensor
%1 = linalg.generic {indexing_maps = [#map, #map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst_3, %cst_7 : tensor<1x25x13xi32>, tensor<1x25x13xi32>) outs(%0 : tensor<1x25x13xi32>) {
// CHECK-NOT: linalg.generic
^bb0(%in: i32, %in_15: i32, %out: i32):
%29 = arith.xori %in, %in_15 : i32
// CHECK-NOT: arith.xori
linalg.yield %29 : i32
// CHECK-NOT: linalg.yield
} -> tensor<1x25x13xi32>
return
}
}
// -----
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK-LABEL: func.func private @clean_func_op_remove_argument_and_return_value() {
// CHECK-NEXT: return
// CHECK-NEXT: }
// CHECK: func.func @main(%[[arg0:.*]]: i32) {
// CHECK-NEXT: call @clean_func_op_remove_argument_and_return_value() : () -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
func.func private @clean_func_op_remove_argument_and_return_value(%arg0: i32) -> (i32) {
return %arg0 : i32
}
func.func @main(%arg0 : i32) {
%non_live = func.call @clean_func_op_remove_argument_and_return_value(%arg0) : (i32) -> (i32)
return
}
// -----
// CHECK-LABEL: func.func private @clean_func_op_remove_side_effecting_op() {
// CHECK-NEXT: return
// CHECK-NEXT: }
func.func private @clean_func_op_remove_side_effecting_op(%arg0: i32) -> (i32) {
// vector.print has a side effect but the op is dead.
vector.print %arg0 : i32
return %arg0 : i32
}
// -----
// %arg0 is not live because it is never used. %arg1 is not live because its
// user `arith.addi` doesn't have any uses and the value that it is forwarded to
// (%non_live_0) also doesn't have any uses.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK-LABEL: func.func private @clean_func_op_remove_arguments() -> i32 {
// CHECK-NEXT: %[[c0:.*]] = arith.constant 0
// CHECK-NEXT: return %[[c0]]
// CHECK-NEXT: }
// CHECK: func.func @main(%[[arg2:.*]]: memref<i32>, %[[arg3:.*]]: i32, %[[DEVICE:.*]]: i32) -> (i32, memref<i32>) {
// CHECK-NEXT: %[[live:.*]] = test.call_on_device @clean_func_op_remove_arguments(), %[[DEVICE]] : (i32) -> i32
// CHECK-NEXT: return %[[live]], %[[arg2]]
// CHECK-NEXT: }
func.func private @clean_func_op_remove_arguments(%arg0 : memref<i32>, %arg1 : i32) -> (i32, i32) {
%c0 = arith.constant 0 : i32
%non_live = arith.addi %arg1, %arg1 : i32
return %c0, %arg1 : i32, i32
}
func.func @main(%arg2 : memref<i32>, %arg3 : i32, %device : i32) -> (i32, memref<i32>) {
%live, %non_live_0 = test.call_on_device @clean_func_op_remove_arguments(%arg2, %arg3), %device : (memref<i32>, i32, i32) -> (i32, i32)
return %live, %arg2 : i32, memref<i32>
}
// -----
// Even though %non_live_0 is not live, the first return value of
// @clean_func_op_remove_return_values isn't removed because %live is live
// (liveness is checked across all callers).
//
// Also, the second return value of @clean_func_op_remove_return_values is
// removed despite %c0 being live because neither %non_live nor %non_live_1 were
// live (removal doesn't depend on the liveness of the operand itself but on the
// liveness of where it is forwarded).
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK: func.func private @clean_func_op_remove_return_values(%[[arg0:.*]]: memref<i32>) -> i32 {
// CHECK-NEXT: %[[c0]] = arith.constant 0
// CHECK-NEXT: memref.store %[[c0]], %[[arg0]][]
// CHECK-NEXT: return %[[c0]]
// CHECK-NEXT: }
// CHECK: func.func @main(%[[arg1:.*]]: memref<i32>) -> i32 {
// CHECK-NEXT: %[[live:.*]] = call @clean_func_op_remove_return_values(%[[arg1]]) : (memref<i32>) -> i32
// CHECK-NEXT: %[[non_live_0:.*]] = call @clean_func_op_remove_return_values(%[[arg1]]) : (memref<i32>) -> i32
// CHECK-NEXT: return %[[live]] : i32
// CHECK-NEXT: }
func.func private @clean_func_op_remove_return_values(%arg0 : memref<i32>) -> (i32, i32) {
%c0 = arith.constant 0 : i32
memref.store %c0, %arg0[] : memref<i32>
return %c0, %c0 : i32, i32
}
func.func @main(%arg1 : memref<i32>) -> (i32) {
%live, %non_live = func.call @clean_func_op_remove_return_values(%arg1) : (memref<i32>) -> (i32, i32)
%non_live_0, %non_live_1 = func.call @clean_func_op_remove_return_values(%arg1) : (memref<i32>) -> (i32, i32)
return %live : i32
}
// -----
// None of the return values of @clean_func_op_dont_remove_return_values can be
// removed because the first one is forwarded to a live value %live and the
// second one is forwarded to a live value %live_0.
//
// CHECK-LABEL: func.func private @clean_func_op_dont_remove_return_values() -> (i32, i32) {
// CHECK-NEXT: %[[c0:.*]] = arith.constant 0 : i32
// CHECK-NEXT: return %[[c0]], %[[c0]] : i32, i32
// CHECK-NEXT: }
// CHECK-LABEL: func.func @main() -> (i32, i32) {
// CHECK-NEXT: %[[live_and_non_live:.*]]:2 = call @clean_func_op_dont_remove_return_values() : () -> (i32, i32)
// CHECK-NEXT: %[[non_live_0_and_live_0:.*]]:2 = call @clean_func_op_dont_remove_return_values() : () -> (i32, i32)
// CHECK-NEXT: return %[[live_and_non_live]]#0, %[[non_live_0_and_live_0]]#1 : i32, i32
// CHECK-NEXT: }
func.func private @clean_func_op_dont_remove_return_values() -> (i32, i32) {
%c0 = arith.constant 0 : i32
return %c0, %c0 : i32, i32
}
func.func @main() -> (i32, i32) {
%live, %non_live = func.call @clean_func_op_dont_remove_return_values() : () -> (i32, i32)
%non_live_0, %live_0 = func.call @clean_func_op_dont_remove_return_values() : () -> (i32, i32)
return %live, %live_0 : i32, i32
}
// -----
// Values kept:
// (1) %non_live is not live. Yet, it is kept because %arg4 in `scf.condition`
// forwards to it, which has to be kept. %arg4 in `scf.condition` has to be
// kept because it forwards to %arg6 which is live.
//
// (2) %arg5 is not live. Yet, it is kept because %live_0 forwards to it, which
// also forwards to %live, which is live.
//
// Values not kept:
// (1) %arg1 is not kept as an operand of `scf.while` because it only forwards
// to %arg3, which is not kept. %arg3 is not kept because %arg3 is not live and
// only %arg1 and %arg7 forward to it, such that neither of them forward
// anywhere else. Thus, %arg7 is also not kept in the `scf.yield` op.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
// CHECK-LABEL: func.func @clean_region_branch_op_dont_remove_first_2_results_but_remove_first_operand(
// CHECK-SAME: %[[arg0:.*]]: i1, %[[arg1:.*]]: i32, %[[arg2:.*]]: i32) -> i32 {
// CHECK-NEXT: %[[p0:.*]] = ub.poison : i32
// CHECK-NEXT: %[[while:.*]]:3 = scf.while (%{{.*}} = %[[p0]], %[[arg4:.*]] = %[[arg2]]) : (i32, i32) -> (i32, i32, i32) {
// CHECK-NEXT: %[[add1:.*]] = arith.addi %[[arg4]], %[[arg4]] : i32
// CHECK-NEXT: %[[p1:.*]] = ub.poison : i32
// CHECK-NEXT: scf.condition(%[[arg0]]) %[[add1]], %[[arg4]], %[[p1]] : i32, i32, i32
// CHECK-NEXT: } do {
// CHECK-NEXT: ^bb0(%{{.*}}: i32, %[[arg6:.*]]: i32, %{{.*}}: i32):
// CHECK-NEXT: %[[add2:.*]] = arith.addi %[[arg6]], %[[arg6]] : i32
// CHECK-NEXT: %[[p2:.*]] = ub.poison : i32
// CHECK-NEXT: scf.yield %[[p2]], %[[add2]] : i32, i32
// CHECK-NEXT: }
// CHECK-NEXT: return %[[while]]#0 : i32
// CHECK-NEXT: }
// CHECK-CANONICALIZE: func.func @clean_region_branch_op_dont_remove_first_2_results_but_remove_first_operand(%[[arg0:.*]]: i1, %[[arg1:.*]]: i32, %[[arg2:.*]]: i32) -> i32 {
// CHECK-CANONICALIZE: %[[live_and_non_live:.*]]:2 = scf.while (%[[arg4:.*]] = %[[arg2]]) : (i32) -> (i32, i32) {
// CHECK-CANONICALIZE-NEXT: %[[live_0:.*]] = arith.addi %[[arg4]], %[[arg4]]
// CHECK-CANONICALIZE: scf.condition(%arg0) %[[live_0]], %[[arg4]] : i32, i32
// CHECK-CANONICALIZE-NEXT: } do {
// CHECK-CANONICALIZE-NEXT: ^bb0(%[[arg5:.*]]: i32, %[[arg6:.*]]: i32):
// CHECK-CANONICALIZE-NEXT: %[[live_1:.*]] = arith.addi %[[arg6]], %[[arg6]]
// CHECK-CANONICALIZE: scf.yield %[[live_1]] : i32
// CHECK-CANONICALIZE-NEXT: }
// CHECK-CANONICALIZE-NEXT: return %[[live_and_non_live]]#0
// CHECK-CANONICALIZE-NEXT: }
func.func @clean_region_branch_op_dont_remove_first_2_results_but_remove_first_operand(%arg0: i1, %arg1: i32, %arg2: i32) -> (i32) {
%live, %non_live, %non_live_0 = scf.while (%arg3 = %arg1, %arg4 = %arg2) : (i32, i32) -> (i32, i32, i32) {
%live_0 = arith.addi %arg4, %arg4 : i32
%non_live_1 = arith.addi %arg3, %arg3 : i32
scf.condition(%arg0) %live_0, %arg4, %non_live_1 : i32, i32, i32
} do {
^bb0(%arg5: i32, %arg6: i32, %arg7: i32):
%live_1 = arith.addi %arg6, %arg6 : i32
scf.yield %arg7, %live_1 : i32, i32
}
return %live : i32
}
// -----
// Values kept:
// (1) %live is kept because it is live.
//
// (2) %non_live is not live. Yet, it is kept because %arg3 in `scf.condition`
// forwards to it and this %arg3 has to be kept. This %arg3 in `scf.condition`
// has to be kept because it forwards to %arg6, which forwards to %arg4, which
// forwards to %live, which is live.
//
// Values not kept:
// (1) %non_live_0 is not kept because %non_live_2 in `scf.condition` forwards
// to it, which forwards to only %non_live_0 and %arg7, where both these are
// not live and have no other value forwarding to them.
//
// (2) %non_live_1 is not kept because %non_live_3 in `scf.condition` forwards
// to it, which forwards to only %non_live_1 and %arg8, where both these are
// not live and have no other value forwarding to them.
//
// (3) %c2 is not kept because it only forwards to %arg10, which is not kept.
//
// (4) %arg10 is not kept because only %c2 and %non_live_4 forward to it, none
// of them forward anywhere else, and %arg10 is not.
//
// (5) %arg7 and %arg8 are not kept because they are not live, %non_live_2 and
// %non_live_3 forward to them, and both only otherwise forward to %non_live_0
// and %non_live_1 which are not live and have no other predecessors.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK-CANONICALIZE: func.func @clean_region_branch_op_remove_last_2_results_last_2_arguments_and_last_operand(%[[arg2:.*]]: i1) -> i32 {
// CHECK-CANONICALIZE-NEXT: %[[c0:.*]] = arith.constant 0
// CHECK-CANONICALIZE-NEXT: %[[c1:.*]] = arith.constant 1
// CHECK-CANONICALIZE: %[[live_and_non_live:.*]]:2 = scf.while (%[[arg3:.*]] = %[[c0]], %[[arg4:.*]] = %[[c1]]) : (i32, i32) -> (i32, i32) {
// CHECK-CANONICALIZE-NEXT: func.call @identity() : () -> ()
// CHECK-CANONICALIZE-NEXT: scf.condition(%[[arg2]]) %[[arg3]], %[[arg4]] : i32, i32
// CHECK-CANONICALIZE-NEXT: } do {
// CHECK-CANONICALIZE-NEXT: ^bb0(%[[arg5:.*]]: i32, %[[arg6:.*]]: i32):
// CHECK-CANONICALIZE-NEXT: scf.yield %[[arg6]], %[[arg5]] : i32, i32
// CHECK-CANONICALIZE-NEXT: }
// CHECK-CANONICALIZE-NEXT: return %[[live_and_non_live]]#1 : i32
// CHECK-CANONICALIZE-NEXT: }
// CHECK-CANONICALIZE: func.func private @identity() {
// CHECK-CANONICALIZE-NEXT: return
// CHECK-CANONICALIZE-NEXT: }
func.func @clean_region_branch_op_remove_last_2_results_last_2_arguments_and_last_operand(%arg2: i1) -> (i32) {
%c0 = arith.constant 0 : i32
%c1 = arith.constant 1 : i32
%c2 = arith.constant 2 : i32
%live, %non_live, %non_live_0, %non_live_1 = scf.while (%arg3 = %c0, %arg4 = %c1, %arg10 = %c2) : (i32, i32, i32) -> (i32, i32, i32, i32) {
%non_live_2 = arith.addi %arg10, %arg10 : i32
%non_live_3 = func.call @identity(%arg10) : (i32) -> (i32)
scf.condition(%arg2) %arg4, %arg3, %non_live_2, %non_live_3 : i32, i32, i32, i32
} do {
^bb0(%arg5: i32, %arg6: i32, %arg7: i32, %arg8: i32):
%non_live_4 = arith.addi %arg7, %arg8 :i32
scf.yield %arg5, %arg6, %non_live_4 : i32, i32, i32
}
return %live : i32
}
func.func private @identity(%arg1 : i32) -> (i32) {
return %arg1 : i32
}
// -----
// The op isn't erased because it has memory effects but its unnecessary result
// is removed.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK-CANONICALIZE: func.func @clean_region_branch_op_remove_result(%[[arg0:.*]]: index, %[[arg1:.*]]: memref<i32>) {
// CHECK-CANONICALIZE-NEXT: scf.index_switch %[[arg0]]
// CHECK-CANONICALIZE-NEXT: case 1 {
// CHECK-CANONICALIZE-NEXT: %[[c10:.*]] = arith.constant 10
// CHECK-CANONICALIZE-NEXT: memref.store %[[c10]], %[[arg1]][]
// CHECK-CANONICALIZE: scf.yield
// CHECK-CANONICALIZE-NEXT: }
// CHECK-CANONICALIZE-NEXT: default {
// CHECK-CANONICALIZE: }
// CHECK-CANONICALIZE-NEXT: return
// CHECK-CANONICALIZE-NEXT: }
func.func @clean_region_branch_op_remove_result(%arg0 : index, %arg1 : memref<i32>) {
%non_live = scf.index_switch %arg0 -> i32
case 1 {
%c10 = arith.constant 10 : i32
memref.store %c10, %arg1[] : memref<i32>
scf.yield %c10 : i32
}
default {
%c11 = arith.constant 11 : i32
scf.yield %c11 : i32
}
return
}
// -----
// The simple ops which don't have memory effects or live results get removed.
// %arg5 doesn't get removed from the @main even though it isn't live because
// the signature of a public function is always left untouched.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK: func.func private @clean_simple_ops(%[[arg0:.*]]: i32, %[[arg1:.*]]: memref<i32>)
// CHECK-NEXT: %[[live_0:.*]] = arith.addi %[[arg0]], %[[arg0]]
// CHECK-NEXT: %[[c2:.*]] = arith.constant 2
// CHECK-NEXT: %[[live_1:.*]] = arith.muli %[[live_0]], %[[c2]]
// CHECK-NEXT: %[[c3:.*]] = arith.constant 3
// CHECK-NEXT: %[[live_2:.*]] = arith.addi %[[arg0]], %[[c3]]
// CHECK-NEXT: memref.store %[[live_2]], %[[arg1]][]
// CHECK-NEXT: return %[[live_1]]
// CHECK-NEXT: }
// CHECK: func.func @main(%[[arg3:.*]]: i32, %[[arg4:.*]]: memref<i32>, %[[arg5:.*]]
// CHECK-NEXT: %[[live:.*]] = call @clean_simple_ops(%[[arg3]], %[[arg4]])
// CHECK-NEXT: return %[[live]]
// CHECK-NEXT: }
func.func private @clean_simple_ops(%arg0 : i32, %arg1 : memref<i32>, %arg2 : i32) -> (i32, i32, i32, i32) {
%live_0 = arith.addi %arg0, %arg0 : i32
%c2 = arith.constant 2 : i32
%live_1 = arith.muli %live_0, %c2 : i32
%non_live_1 = arith.addi %live_1, %live_0 : i32
%non_live_2 = arith.constant 7 : i32
%non_live_3 = arith.subi %arg0, %non_live_1 : i32
%c3 = arith.constant 3 : i32
%live_2 = arith.addi %arg0, %c3 : i32
memref.store %live_2, %arg1[] : memref<i32>
return %live_1, %non_live_1, %non_live_2, %non_live_3 : i32, i32, i32, i32
}
func.func @main(%arg3 : i32, %arg4 : memref<i32>, %arg5 : i32) -> (i32) {
%live, %non_live_1, %non_live_2, %non_live_3 = func.call @clean_simple_ops(%arg3, %arg4, %arg5) : (i32, memref<i32>, i32) -> (i32, i32, i32, i32)
return %live : i32
}
// -----
// The scf.while op has no memory effects and its result isn't live.
//
// Note that this cleanup cannot be done by the `canonicalize` pass.
//
// CHECK-LABEL: func.func private @clean_region_branch_op_erase_it() {
// CHECK-NEXT: return
// CHECK-NEXT: }
// CHECK: func.func @main(%[[arg3:.*]]: i32, %[[arg4:.*]]: i1) {
// CHECK-NEXT: call @clean_region_branch_op_erase_it() : () -> ()
// CHECK-NEXT: return
// CHECK-NEXT: }
func.func private @clean_region_branch_op_erase_it(%arg0 : i32, %arg1 : i1) -> (i32) {
%non_live = scf.while (%arg2 = %arg0) : (i32) -> (i32) {
scf.condition(%arg1) %arg2 : i32
} do {
^bb0(%arg2: i32):
scf.yield %arg2 : i32
}
return %non_live : i32
}
func.func @main(%arg3 : i32, %arg4 : i1) {
%non_live_0 = func.call @clean_region_branch_op_erase_it(%arg3, %arg4) : (i32, i1) -> (i32)
return
}
// -----
// The scf.if operation represents an if-then-else construct for conditionally
// executing two regions of code. The 'the' region has exactly 1 block, and
// the 'else' region may have 0 or 1 block. This case is to ensure 'else' region
// with 0 block not crash.
// CHECK-LABEL: func.func @clean_region_branch_op_with_empty_region
func.func @clean_region_branch_op_with_empty_region(%arg0: i1, %arg1: memref<f32>) {
%cst = arith.constant 1.000000e+00 : f32
scf.if %arg0 {
memref.store %cst, %arg1[] : memref<f32>
}
return
}
// -----
#map = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
func.func @kernel(%arg0: memref<18xf32>) {
%c1 = arith.constant 1 : index
%c18 = arith.constant 18 : index
gpu.launch blocks(%arg3, %arg4, %arg5) in (%arg9 = %c18, %arg10 = %c18, %arg11 = %c18) threads(%arg6, %arg7, %arg8) in (%arg12 = %c1, %arg13 = %c1, %arg14 = %c1) {
%c1_0 = arith.constant 1 : index
%c0_1 = arith.constant 0 : index
%cst_2 = arith.constant 25.4669495 : f32
%6 = affine.apply #map(%arg3)[%c1_0, %c0_1]
memref.store %cst_2, %arg0[%6] : memref<18xf32>
gpu.terminator
} {SCFToGPU_visited}
return
}
// CHECK-LABEL: func.func @kernel(%arg0: memref<18xf32>) {
// CHECK: gpu.launch blocks
// CHECK: memref.store
// CHECK-NEXT: gpu.terminator
// -----
// CHECK-LABEL: llvm_unreachable
// CHECK-LABEL: @fn_with_llvm_unreachable
// CHECK-LABEL: @main
// CHECK: llvm.return
module @llvm_unreachable {
func.func private @fn_with_llvm_unreachable(%arg0: tensor<4x4xf32>) -> tensor<4x4xi1> {
llvm.unreachable
}
func.func private @main(%arg0: tensor<4x4xf32>) {
%0 = call @fn_with_llvm_unreachable(%arg0) : (tensor<4x4xf32>) -> tensor<4x4xi1>
llvm.return
}
}
// CHECK: func.func private @no_block_func_declaration()
func.func private @no_block_func_declaration() -> ()
// -----
// CHECK: llvm.func @no_block_external_func()
llvm.func @no_block_external_func() attributes {sym_visibility = "private"}
// -----
// Check that yielded values aren't incorrectly removed in gpu regions
gpu.module @test_module_3 {
gpu.func @gpu_all_reduce_region() {
%arg0 = arith.constant 1 : i32
%result = gpu.all_reduce %arg0 uniform {
^bb(%lhs : i32, %rhs : i32):
%xor = arith.xori %lhs, %rhs : i32
"gpu.yield"(%xor) : (i32) -> ()
} : (i32) -> (i32)
gpu.return
}
}
// CHECK-LABEL: func @gpu_all_reduce_region()
// CHECK: %[[yield:.*]] = arith.xori %{{.*}}, %{{.*}} : i32
// CHECK: gpu.yield %[[yield]] : i32
// -----
// Check that yielded values aren't incorrectly removed in linalg regions
module {
func.func @linalg_red_add(%arg0: tensor<?xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> {
%0 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (0)>],
iterator_types = ["reduction"]
} ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<1xf32>) {
^bb0(%in: f32, %out: f32):
%1 = arith.addf %in, %out : f32
%2 = arith.subf %1, %out : f32 // this should still be removed
linalg.yield %1 : f32
} -> tensor<1xf32>
return %0 : tensor<1xf32>
}
}
// CHECK-LABEL: func @linalg_red_add
// CHECK: %[[yield:.*]] = arith.addf %{{.*}}, %{{.*}} : f32
// CHECK: linalg.yield %[[yield]] : f32
// CHECK-NOT: arith.subf
// -----
// check that ops with zero operands are correctly handled
module {
func.func @test_zero_operands(%I: memref<10xindex>, %I2: memref<10xf32>) {
%v0 = arith.constant 0 : index
%result = memref.alloca_scope -> index {
%c = arith.addi %v0, %v0 : index
memref.store %c, %I[%v0] : memref<10xindex>
memref.alloca_scope.return %c: index
}
func.return
}
}
// CHECK-CANONICALIZE-LABEL: func @test_zero_operands
// CHECK-CANONICALIZE-NEXT: %[[c0:.*]] = arith.constant 0
// CHECK-CANONICALIZE-NEXT: memref.store %[[c0]]
// CHECK-CANONICALIZE-NOT: memref.alloca_scope.return
// -----
// CHECK-LABEL: func.func @test_atomic_yield
func.func @test_atomic_yield(%I: memref<10xf32>, %idx : index) {
// CHECK: memref.generic_atomic_rmw
%x = memref.generic_atomic_rmw %I[%idx] : memref<10xf32> {
^bb0(%current_value : f32):
// CHECK: arith.constant
%c1 = arith.constant 1.0 : f32
// CHECK: memref.atomic_yield
memref.atomic_yield %c1 : f32
}
func.return
}
// -----
// CHECK-LABEL: module @return_void_with_unused_argument
module @return_void_with_unused_argument {
// CHECK-LABEL: func.func private @fn_return_void_with_unused_argument
// CHECK-SAME: (%[[ARG0_FN:.*]]: i32)
func.func private @fn_return_void_with_unused_argument(%arg0: i32, %arg1: memref<4xi32>) -> () {
%sum = arith.addi %arg0, %arg0 : i32
%c0 = arith.constant 0 : index
%buf = memref.alloc() : memref<1xi32>
memref.store %sum, %buf[%c0] : memref<1xi32>
return
}
// CHECK-LABEL: func.func @main
// CHECK-SAME: (%[[ARG0_MAIN:.*]]: i32)
// CHECK: call @fn_return_void_with_unused_argument(%[[ARG0_MAIN]]) : (i32) -> ()
func.func @main(%arg0: i32) -> memref<4xi32> {
%unused = memref.alloc() : memref<4xi32>
call @fn_return_void_with_unused_argument(%arg0, %unused) : (i32, memref<4xi32>) -> ()
return %unused : memref<4xi32>
}
}
// -----
// CHECK-LABEL: module @dynamically_unreachable
module @dynamically_unreachable {
func.func @dynamically_unreachable() {
// This value is used by an operation in a dynamically unreachable block.
%zero = arith.constant 0 : i64
// Dataflow analysis knows from the constant condition that
// ^bb1 is unreachable
%false = arith.constant false
cf.cond_br %false, ^bb1, ^bb4
^bb1:
// This unreachable operation should be removed.
// CHECK-NOT: arith.cmpi
%3 = arith.cmpi eq, %zero, %zero : i64
cf.br ^bb1
^bb4:
return
}
}
// CHECK-LABEL: module @last_block_not_exit
module @last_block_not_exit {
// return value can be removed because it's private.
func.func private @terminated_with_condbr(%arg0: i1, %arg1: i1) -> i1 {
%true = arith.constant true
%false = arith.constant false
cf.cond_br %arg0, ^bb1(%false : i1), ^bb2
^bb1(%1: i1): // 2 preds: ^bb0, ^bb2
return %1 : i1
^bb2: // pred: ^bb3
cf.cond_br %arg1, ^bb1(%false : i1), ^bb1(%true : i1)
}
func.func public @call_private_but_not_use() {
%i0 = arith.constant 0: i1
%i1 = arith.constant 1: i1
call @terminated_with_condbr(%i0, %i1) : (i1, i1) -> i1
func.return
}
// CHECK-LABEL: @call_private_but_not_use
// CHECK: call @terminated_with_condbr(%false, %true) : (i1, i1)
}
// -----
// Test the elimination of function arguments.
// CHECK-LABEL: func private @single_parameter
// CHECK-SAME: () {
func.func private @single_parameter(%arg0: index) {
return
}
// CHECK-LABEL: func.func private @mutl_parameter(
// CHECK-SAME: %[[ARG0:.*]]: index)
// CHECK: return %[[ARG0]]
func.func private @mutl_parameter(%arg0: index, %arg1: index, %arg2: index) -> index {
return %arg1 : index
}
// CHECK-LABEL: func private @eliminate_parameter
// CHECK-SAME: () {
func.func private @eliminate_parameter(%arg0: index, %arg1: index) {
call @single_parameter(%arg0) : (index) -> ()
return
}
// CHECK-LABEL: func @callee
// CHECK-SAME: (%[[ARG0:.*]]: index, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index)
func.func @callee(%arg0: index, %arg1: index, %arg2: index) -> index {
// CHECK: call @eliminate_parameter() : () -> ()
call @eliminate_parameter(%arg0, %arg1) : (index, index) -> ()
// CHECK: call @mutl_parameter(%[[ARG1]]) : (index) -> index
%res = call @mutl_parameter(%arg0, %arg1, %arg2) : (index, index, index) -> (index)
return %res : index
}
// -----
// This test verifies that the induction variables in loops are not deleted, the loop has results.
// CHECK-LABEL: func @dead_value_loop_ivs
func.func @dead_value_loop_ivs_has_result(%lb: index, %ub: index, %step: index, %b: i1) -> i1 {
%loop_ret = scf.for %iv = %lb to %ub step %step iter_args(%iter = %b) -> (i1) {
cf.assert %b, "loop not dead"
scf.yield %b : i1
}
return %loop_ret : i1
}
// -----
// This test verifies that the induction variables in loops are not deleted, the loop has no results.
// CHECK-LABEL: func @dead_value_loop_ivs_no_result
func.func @dead_value_loop_ivs_no_result(%lb: index, %ub: index, %step: index, %input: memref<?xf32>, %value: f32, %pos: index) {
scf.for %iv = %lb to %ub step %step {
memref.store %value, %input[%pos] : memref<?xf32>
}
return
}
// -----
// CHECK-LABEL: func @op_block_have_dead_arg
func.func @op_block_have_dead_arg(%arg0: index, %arg1: index, %arg2: i1) {
scf.execute_region {
cf.cond_br %arg2, ^bb1(%arg0 : index), ^bb1(%arg1 : index)
^bb1(%0: index):
scf.yield
}
// CHECK-NEXT: return
return
}
// -----
// CHECK-LABEL: func private @remove_dead_branch_op()
// CHECK-NEXT: ub.unreachable
// CHECK-NEXT: ^{{.*}}:
// CHECK-NEXT: return
// CHECK-NEXT: ^{{.*}}:
// CHECK-NEXT: return
func.func private @remove_dead_branch_op(%c: i1, %arg0: i64, %arg1: i64) -> (i64) {
cf.cond_br %c, ^bb1, ^bb2
^bb1:
return %arg0 : i64
^bb2:
return %arg1 : i64
}
// -----
// CHECK-LABEL: func @affine_loop_no_use_iv_has_side_effect_op
func.func @affine_loop_no_use_iv_has_side_effect_op() {
%c1 = arith.constant 1 : index
%alloc = memref.alloc() : memref<10xindex>
affine.for %arg0 = 0 to 79 {
memref.store %c1, %alloc[%c1] : memref<10xindex>
}
// CHECK: %[[C1:.*]] = arith.constant 1 : index
// CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<10xindex>
// CHECK: affine.for %[[VAL_0:.*]] = 0 to 79 {
// CHECK: memref.store %[[C1]], %[[ALLOC]]{{\[}}%[[C1]]] : memref<10xindex>
// CHECK: }
return
}
// -----
// CHECK-LABEL: func @scf_while_dead_iter_args()
// CHECK: %[[c5:.*]] = arith.constant 5 : i32
// CHECK: %[[while:.*]]:2 = scf.while (%[[arg0:.*]] = %[[c5]]) : (i32) -> (i32, i32) {
// CHECK: vector.print %[[arg0]]
// CHECK: %[[cmpi:.*]] = arith.cmpi
// CHECK: %[[p0:.*]] = ub.poison : i32
// CHECK: scf.condition(%[[cmpi]]) %[[arg0]], %[[p0]]
// CHECK: } do {
// CHECK: ^bb0(%[[arg1:.*]]: i32, %[[arg2:.*]]: i32):
// CHECK: %[[p1:.*]] = ub.poison : i32
// CHECK: scf.yield %[[p1]]
// CHECK: }
// CHECK: return %[[while]]#0
// CHECK-CANONICALIZE-LABEL: func @scf_while_dead_iter_args()
// CHECK-CANONICALIZE: %[[c5:.*]] = arith.constant 5 : i32
// CHECK-CANONICALIZE: %[[while:.*]] = scf.while (%[[arg0:.*]] = %[[c5]]) : (i32) -> i32 {
// CHECK-CANONICALIZE: vector.print %[[arg0]]
// CHECK-CANONICALIZE: %[[cmpi:.*]] = arith.cmpi
// CHECK-CANONICALIZE: scf.condition(%[[cmpi]]) %[[arg0]]
// CHECK-CANONICALIZE: } do {
// CHECK-CANONICALIZE: ^bb0(%[[arg1:.*]]: i32):
// CHECK-CANONICALIZE: %[[p0:.*]] = ub.poison : i32
// CHECK-CANONICALIZE: scf.yield %[[p0]]
// CHECK-CANONICALIZE: }
// CHECK-CANONICALIZE: return %[[while]]
func.func @scf_while_dead_iter_args() -> i32 {
%c5 = arith.constant 5 : i32
%result:2 = scf.while (%arg0 = %c5) : (i32) -> (i32, i32) {
vector.print %arg0 : i32
// Note: This condition is always "false". (And the liveness analysis
// can figure that out.)
%cmp2 = arith.cmpi slt, %arg0, %c5 : i32
scf.condition(%cmp2) %arg0, %arg0 : i32, i32
} do {
^bb0(%arg1: i32, %arg2: i32):
%x = scf.execute_region -> i32 {
scf.yield %arg2 : i32
}
scf.yield %x : i32
}
return %result#0 : i32
}