llvm-project/mlir/test/Transforms/loop-invariant-code-motion.mlir
Sanjoy Das adabce4118 Correctly model undefined behavior in {tensor|memref}.dim
These operations have undefined behavior if the index is not less than the rank of the source tensor / memref, so they cannot be freely speculated like they were before this patch.  After this patch we speculate them only if we can prove that they don't have UB.

Depends on D135505.

Reviewed By: mravishankar

Differential Revision: https://reviews.llvm.org/D135748
2022-10-12 17:30:13 -07:00

610 lines
16 KiB
MLIR

// RUN: mlir-opt %s -split-input-file -loop-invariant-code-motion | FileCheck %s
func.func @nested_loops_both_having_invariant_code() {
%m = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = arith.addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = arith.addf %v0, %cf8 : f32
affine.store %v0, %m[%arg0] : memref<10xf32>
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32
// CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32
// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32
// CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.store
return
}
// -----
func.func @nested_loops_code_invariant_to_both() {
%m = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
%v0 = arith.addf %cf7, %cf8 : f32
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: arith.constant 7.000000e+00 : f32
// CHECK-NEXT: arith.constant 8.000000e+00 : f32
// CHECK-NEXT: arith.addf
return
}
// -----
func.func @single_loop_nothing_invariant() {
%m1 = memref.alloc() : memref<10xf32>
%m2 = memref.alloc() : memref<10xf32>
affine.for %arg0 = 0 to 10 {
%v0 = affine.load %m1[%arg0] : memref<10xf32>
%v1 = affine.load %m2[%arg0] : memref<10xf32>
%v2 = arith.addf %v0, %v1 : f32
affine.store %v2, %m1[%arg0] : memref<10xf32>
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: memref.alloc() : memref<10xf32>
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.load
// CHECK-NEXT: affine.load
// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.store
return
}
// -----
func.func @invariant_code_inside_affine_if() {
%m = memref.alloc() : memref<10xf32>
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) {
%cf9 = arith.addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: arith.constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.apply
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.store
// CHECK-NEXT: }
return
}
// -----
func.func @invariant_affine_if() {
%m = memref.alloc() : memref<10xf32>
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf9 = arith.addf %cf8, %cf8 : f32
}
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
// CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) {
// CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32
// CHECK-NEXT: }
return
}
// -----
func.func @invariant_affine_if2() {
%m = memref.alloc() : memref<10xf32>
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf9 = arith.addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
// CHECK: memref.alloc
// CHECK-NEXT: arith.constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.store
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
// -----
func.func @invariant_affine_nested_if() {
%m = memref.alloc() : memref<10xf32>
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf9 = arith.addf %cf8, %cf8 : f32
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf10 = arith.addf %cf9, %cf9 : f32
}
}
}
}
// CHECK: memref.alloc
// CHECK-NEXT: arith.constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: }
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
// -----
func.func @invariant_affine_nested_if_else() {
%m = memref.alloc() : memref<10xf32>
%cf8 = arith.constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf9 = arith.addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
%cf10 = arith.addf %cf9, %cf9 : f32
} else {
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
}
// CHECK: memref.alloc
// CHECK-NEXT: arith.constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: affine.store
// CHECK-NEXT: affine.if
// CHECK-NEXT: arith.addf
// CHECK-NEXT: } else {
// CHECK-NEXT: affine.store
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
// -----
func.func @invariant_loop_dialect() {
%ci0 = arith.constant 0 : index
%ci10 = arith.constant 10 : index
%ci1 = arith.constant 1 : index
%m = memref.alloc() : memref<10xf32>
%cf7 = arith.constant 7.0 : f32
%cf8 = arith.constant 8.0 : f32
scf.for %arg0 = %ci0 to %ci10 step %ci1 {
scf.for %arg1 = %ci0 to %ci10 step %ci1 {
%v0 = arith.addf %cf7, %cf8 : f32
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: arith.constant 7.000000e+00 : f32
// CHECK-NEXT: arith.constant 8.000000e+00 : f32
// CHECK-NEXT: arith.addf
return
}
// -----
func.func @variant_loop_dialect() {
%ci0 = arith.constant 0 : index
%ci10 = arith.constant 10 : index
%ci1 = arith.constant 1 : index
%m = memref.alloc() : memref<10xf32>
scf.for %arg0 = %ci0 to %ci10 step %ci1 {
scf.for %arg1 = %ci0 to %ci10 step %ci1 {
%v0 = arith.addi %arg0, %arg1 : index
}
}
// CHECK: memref.alloc() : memref<10xf32>
// CHECK-NEXT: scf.for
// CHECK-NEXT: scf.for
// CHECK-NEXT: arith.addi
return
}
// -----
func.func @parallel_loop_with_invariant() {
%c0 = arith.constant 0 : index
%c10 = arith.constant 10 : index
%c1 = arith.constant 1 : index
%c7 = arith.constant 7 : i32
%c8 = arith.constant 8 : i32
scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%v0 = arith.addi %c7, %c8 : i32
%v3 = arith.addi %arg0, %arg1 : index
}
// CHECK-LABEL: func @parallel_loop_with_invariant
// CHECK: arith.constant 0 : index
// CHECK-NEXT: arith.constant 10 : index
// CHECK-NEXT: arith.constant 1 : index
// CHECK-NEXT: arith.constant 7 : i32
// CHECK-NEXT: arith.constant 8 : i32
// CHECK-NEXT: arith.addi
// CHECK-NEXT: scf.parallel (%[[A:.*]],{{.*}}) =
// CHECK-NEXT: arith.addi %[[A]]
// CHECK-NEXT: yield
// CHECK-NEXT: }
// CHECK-NEXT: return
return
}
// -----
func.func private @make_val() -> (index)
// CHECK-LABEL: func @nested_uses_inside
func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) {
%true = arith.constant true
// Check that ops that contain nested uses to values not defiend outside
// remain in the loop.
// CHECK-NEXT: arith.constant
// CHECK-NEXT: scf.for
// CHECK-NEXT: call @
// CHECK-NEXT: call @
// CHECK-NEXT: scf.if
// CHECK-NEXT: scf.yield
// CHECK-NEXT: else
// CHECK-NEXT: scf.yield
scf.for %i = %lb to %ub step %step {
%val = func.call @make_val() : () -> (index)
%val2 = func.call @make_val() : () -> (index)
%r = scf.if %true -> (index) {
scf.yield %val: index
} else {
scf.yield %val2: index
}
}
return
}
// -----
// Test that two ops that feed into each other are moved without violating
// dominance in non-graph regions.
// CHECK-LABEL: func @invariant_subgraph
// CHECK-SAME: %{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %[[ARG:.*]]: i32
func.func @invariant_subgraph(%lb: index, %ub: index, %step: index, %arg: i32) {
// CHECK: %[[V0:.*]] = arith.addi %[[ARG]], %[[ARG]]
// CHECK-NEXT: %[[V1:.*]] = arith.addi %[[ARG]], %[[V0]]
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
// CHECK-NEXT: "test.sink"(%[[V1]])
%v0 = arith.addi %arg, %arg : i32
%v1 = arith.addi %arg, %v0 : i32
"test.sink"(%v1) : (i32) -> ()
}
return
}
// -----
// Test invariant nested loop is hoisted.
// CHECK-LABEL: func @test_invariant_nested_loop
func.func @test_invariant_nested_loop() {
// CHECK: %[[C:.*]] = arith.constant
%0 = arith.constant 5 : i32
// CHECK: %[[V0:.*]] = arith.addi %[[C]], %[[C]]
// CHECK-NEXT: %[[V1:.*]] = arith.addi %[[V0]], %[[C]]
// CHECK-NEXT: test.graph_loop
// CHECK-NEXT: ^bb0(%[[ARG0:.*]]: i32)
// CHECK-NEXT: %[[V2:.*]] = arith.subi %[[ARG0]], %[[ARG0]]
// CHECK-NEXT: test.region_yield %[[V2]]
// CHECK: test.graph_loop
// CHECK-NEXT: test.region_yield %[[V1]]
test.graph_loop {
%1 = arith.addi %0, %0 : i32
%2 = arith.addi %1, %0 : i32
test.graph_loop {
^bb0(%arg0: i32):
%3 = arith.subi %arg0, %arg0 : i32
test.region_yield %3 : i32
} : () -> ()
test.region_yield %2 : i32
} : () -> ()
return
}
// -----
// Test ops in a graph region are hoisted.
// CHECK-LABEL: func @test_invariants_in_graph_region
func.func @test_invariants_in_graph_region() {
// CHECK: test.single_no_terminator_op
test.single_no_terminator_op : {
// CHECK-NEXT: %[[C:.*]] = arith.constant
// CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
// CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
test.graph_loop {
%v0 = arith.addi %c0, %v1 : i32
%v1 = arith.addi %c0, %c0 : i32
%c0 = arith.constant 5 : i32
test.region_yield %v0 : i32
} : () -> ()
}
return
}
// -----
// Test ops in a graph region are hoisted in topological order into non-graph
// regions and that dominance is preserved.
// CHECK-LABEL: func @test_invariant_backedge
func.func @test_invariant_backedge() {
// CHECK-NEXT: %[[C:.*]] = arith.constant
// CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
// CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
// CHECK-NEXT: test.graph_loop
test.graph_loop {
// CHECK-NEXT: test.region_yield %[[V0]]
%v0 = arith.addi %c0, %v1 : i32
%v1 = arith.addi %c0, %c0 : i32
%c0 = arith.constant 5 : i32
test.region_yield %v0 : i32
} : () -> ()
return
}
// -----
// Test that cycles aren't hoisted from graph regions to non-graph regions.
// CHECK-LABEL: func @test_invariant_cycle_not_hoisted
func.func @test_invariant_cycle_not_hoisted() {
// CHECK: test.graph_loop
test.graph_loop {
// CHECK-NEXT: %[[A:.*]] = "test.a"(%[[B:.*]]) :
// CHECK-NEXT: %[[B]] = "test.b"(%[[A]]) :
// CHECK-NEXT: test.region_yield %[[A]]
%a = "test.a"(%b) : (i32) -> i32
%b = "test.b"(%a) : (i32) -> i32
test.region_yield %a : i32
} : () -> ()
return
}
// -----
// CHECK-LABEL: test_always_speculatable_op
func.func @test_always_speculatable_op(%lb: index, %ub: index, %step: index) {
// CHECK: test.always_speculatable_op
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
%val = "test.always_speculatable_op"() : () -> i32
}
return
}
// CHECK-LABEL: test_never_speculatable_op
func.func @test_never_speculatable_op(%lb: index, %ub: index, %step: index) {
// CHECK: scf.for
// CHECK-NEXT: test.never_speculatable_op
scf.for %i = %lb to %ub step %step {
%val = "test.never_speculatable_op"() : () -> i32
}
return
}
// CHECK-LABEL: test_conditionally_speculatable_op_success
func.func @test_conditionally_speculatable_op_success(%lb: index, %ub: index, %step: index) {
// CHECK: test.conditionally_speculatable_op
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
%const_val = arith.constant 5 : i32
%val = "test.conditionally_speculatable_op"(%const_val) : (i32) -> i32
}
return
}
// CHECK-LABEL: test_conditionally_speculatable_op_failure
func.func @test_conditionally_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) {
// CHECK: scf.for
// CHECK-NEXT: test.conditionally_speculatable_op
%const_5 = arith.constant 5 : i32
%non_const = arith.addi %arg, %const_5 : i32
scf.for %i = %lb to %ub step %step {
%val = "test.conditionally_speculatable_op"(%non_const) : (i32) -> i32
}
return
}
// CHECK-LABEL: test_recursively_speculatable_op_success
func.func @test_recursively_speculatable_op_success(%lb: index, %ub: index, %step: index, %arg: i32) {
// CHECK: test.recursively_speculatable_op
// CHECK: scf.for
scf.for %i = %lb to %ub step %step {
%val = "test.recursively_speculatable_op"()({
%result = arith.addi %arg, %arg : i32
test.region_yield %result : i32
}) : () -> i32
}
return
}
// CHECK-LABEL: test_recursively_speculatable_op_failure
func.func @test_recursively_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) {
// CHECK: scf.for
// CHECK-NEXT: test.recursively_speculatable_op
scf.for %i = %lb to %ub step %step {
%val = "test.recursively_speculatable_op"()({
%result = "test.never_speculatable_op"() : () -> i32
test.region_yield %result : i32
}) : () -> i32
}
return
}
// -----
func.func @speculate_tensor_dim_unknown_rank_unknown_dim(
// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_unknown_dim
%t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
// CHECK: scf.for
// CHECK-NEXT: tensor.dim
scf.for %i = %lb to %ub step %step {
%val = tensor.dim %t, %dim_idx : tensor<*xf32>
}
return
}
func.func @speculate_tensor_dim_known_rank_unknown_dim(
// CHECK-LABEL: @speculate_tensor_dim_known_rank_unknown_dim
%t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
// CHECK: scf.for
// CHECK-NEXT: tensor.dim
scf.for %i = %lb to %ub step %step {
%val = tensor.dim %t, %dim_idx : tensor<?x?x?x?xf32>
}
return
}
func.func @speculate_tensor_dim_unknown_rank_known_dim(
// CHECK-LABEL: @speculate_tensor_dim_unknown_rank_known_dim
%t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
%c0 = arith.constant 0 : index
// CHECK: scf.for
// CHECK-NEXT: tensor.dim
scf.for %i = %lb to %ub step %step {
%val = tensor.dim %t, %c0 : tensor<*xf32>
}
return
}
func.func @speculate_tensor_dim_known_rank_known_dim_inbounds(
// CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds
%t: tensor<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
%c1 = arith.constant 1 : index
// CHECK: tensor.dim
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
%val = tensor.dim %t, %c1 : tensor<?x?x?x?xf32>
}
return
}
// -----
func.func @speculate_memref_dim_unknown_rank_unknown_dim(
// CHECK-LABEL: @speculate_memref_dim_unknown_rank_unknown_dim
%t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
// CHECK: scf.for
// CHECK-NEXT: memref.dim
scf.for %i = %lb to %ub step %step {
%val = memref.dim %t, %dim_idx : memref<*xf32>
}
return
}
func.func @speculate_memref_dim_known_rank_unknown_dim(
// CHECK-LABEL: @speculate_memref_dim_known_rank_unknown_dim
%t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
// CHECK: scf.for
// CHECK-NEXT: memref.dim
scf.for %i = %lb to %ub step %step {
%val = memref.dim %t, %dim_idx : memref<?x?x?x?xf32>
}
return
}
func.func @speculate_memref_dim_unknown_rank_known_dim(
// CHECK-LABEL: @speculate_memref_dim_unknown_rank_known_dim
%t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
%c0 = arith.constant 0 : index
// CHECK: scf.for
// CHECK-NEXT: memref.dim
scf.for %i = %lb to %ub step %step {
%val = memref.dim %t, %c0 : memref<*xf32>
}
return
}
func.func @speculate_memref_dim_known_rank_known_dim_inbounds(
// CHECK-LABEL: @speculate_memref_dim_known_rank_known_dim_inbounds
%t: memref<?x?x?x?xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) {
%c1 = arith.constant 1 : index
// CHECK: memref.dim
// CHECK-NEXT: scf.for
scf.for %i = %lb to %ub step %step {
%val = memref.dim %t, %c1 : memref<?x?x?x?xf32>
}
return
}