diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index 6c754f226e26..3c4da62eda93 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -268,7 +268,7 @@ public: // Move alloca operations into the alloca-block, and all other // operations - right before fir.do_concurrent. - for (mlir::Operation *op : llvm::reverse(opsToMove)) + for (mlir::Operation *op : opsToMove) if (mlir::isa(op)) rewriter.moveOpBefore(op, allocIt, allocIt->begin()); else diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir index eb04e7e58bec..e5cd9c1192db 100644 --- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir +++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir @@ -58,8 +58,8 @@ func.func @dc_2d(%i_lb: index, %i_ub: index, %i_st: index, // CHECK-SAME: %[[J_UB:[^[:space:]]+]]: index, // CHECK-SAME: %[[J_ST:[^[:space:]]+]]: index) { -// CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[J:.*]] = fir.alloca i32 +// CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: llvm.br ^bb1 // CHECK: ^bb1: @@ -105,8 +105,8 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK-SAME: %[[J_UB:[^[:space:]]+]]: index, // CHECK-SAME: %[[J_ST:[^[:space:]]+]]: index) { -// CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[J:.*]] = fir.alloca i32 +// CHECK: %[[I:.*]] = fir.alloca i32 // CHECK: %[[SUM:.*]] = fir.alloca i32 // CHECK: fir.do_loop %[[I_IV:.*]] = %[[I_LB]] to %[[I_UB]] step %[[I_ST]] unordered reduce({{.*}}] -> %[[SUM]] : !fir.ref) { @@ -225,3 +225,44 @@ func.func @after_licm(%i_lb: index, %i_ub: index, %i_st: index, %val : i32) { // CHECK: } // CHECK: return // CHECK: } + +// ----- + +// Check that a chain of dependent operations (e.g. subi/index_cast below) +// is moved in proper order. +func.func @after_licm2(%i_lb: index, %i_ub: index, %i_st: index, %val : i32) { + %c1_i32 = arith.constant 1 : i32 + %c7_i32 = arith.constant 7 : i32 + cf.br ^bb1 +^bb1: + %newval1 = arith.muli %val, %c7_i32 : i32 + fir.do_concurrent { + %i = fir.alloca index + %newval2 = arith.subi %newval1, %c1_i32 : i32 + %newval3 = arith.index_cast %newval2 : i32 to index + fir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) { + %1 = arith.addi %newval3, %i_iv : index + fir.store %1 to %i : !fir.ref + } + } + return +} +// CHECK-LABEL: func.func @after_licm2( +// CHECK-SAME: %[[ARG0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index, +// CHECK-SAME: %[[ARG1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index, +// CHECK-SAME: %[[ARG2:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index, +// CHECK-SAME: %[[ARG3:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: i32) { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 7 : i32 +// CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : i32 +// CHECK: %[[ALLOCA_0:.*]] = fir.alloca index +// CHECK: cf.br ^bb1 +// CHECK: ^bb1: +// CHECK: %[[MULI_0:.*]] = arith.muli %[[ARG3]], %[[CONSTANT_0]] : i32 +// CHECK: %[[SUBI_0:.*]] = arith.subi %[[MULI_0]], %[[CONSTANT_1]] : i32 +// CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[SUBI_0]] : i32 to index +// CHECK: fir.do_loop %[[VAL_0:.*]] = %[[ARG0]] to %[[ARG1]] step %[[ARG2]] unordered { +// CHECK: %[[ADDI_0:.*]] = arith.addi %[[INDEX_CAST_0]], %[[VAL_0]] : index +// CHECK: fir.store %[[ADDI_0]] to %[[ALLOCA_0]] : !fir.ref +// CHECK: } +// CHECK: return +// CHECK: }