diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
index 6c754f226e26..3c4da62eda93 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp
@@ -268,7 +268,7 @@ public:
 
     // Move alloca operations into the alloca-block, and all other
     // operations - right before fir.do_concurrent.
-    for (mlir::Operation *op : llvm::reverse(opsToMove))
+    for (mlir::Operation *op : opsToMove)
       if (mlir::isa<fir::AllocaOp>(op))
         rewriter.moveOpBefore(op, allocIt, allocIt->begin());
       else
diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
index eb04e7e58bec..e5cd9c1192db 100644
--- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
+++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir
@@ -58,8 +58,8 @@ func.func @dc_2d(%i_lb: index, %i_ub: index, %i_st: index,
 // CHECK-SAME:                     %[[J_UB:[^[:space:]]+]]: index,
 // CHECK-SAME:                     %[[J_ST:[^[:space:]]+]]: index) {
 
-// CHECK:           %[[I:.*]] = fir.alloca i32
 // CHECK:           %[[J:.*]] = fir.alloca i32
+// CHECK:           %[[I:.*]] = fir.alloca i32
 // CHECK:           llvm.br ^bb1
 
 // CHECK:         ^bb1:
@@ -105,8 +105,8 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index,
 // CHECK-SAME:                     %[[J_UB:[^[:space:]]+]]: index,
 // CHECK-SAME:                     %[[J_ST:[^[:space:]]+]]: index) {
 
-// CHECK:           %[[I:.*]] = fir.alloca i32
 // CHECK:           %[[J:.*]] = fir.alloca i32
+// CHECK:           %[[I:.*]] = fir.alloca i32
 // CHECK:           %[[SUM:.*]] = fir.alloca i32
 
 // CHECK:           fir.do_loop %[[I_IV:.*]] = %[[I_LB]] to %[[I_UB]] step %[[I_ST]] unordered reduce({{.*}}<add>] -> %[[SUM]] : !fir.ref<i32>) {
@@ -225,3 +225,44 @@ func.func @after_licm(%i_lb: index, %i_ub: index, %i_st: index, %val : i32) {
 // CHECK:           }
 // CHECK:           return
 // CHECK:         }
+
+// -----
+
+// Check that a chain of dependent operations (e.g. subi/index_cast below)
+// is moved in proper order.
+func.func @after_licm2(%i_lb: index, %i_ub: index, %i_st: index, %val : i32) {
+  %c1_i32 = arith.constant 1 : i32
+  %c7_i32 = arith.constant 7 : i32
+  cf.br ^bb1
+^bb1:
+  %newval1 = arith.muli %val, %c7_i32 : i32
+  fir.do_concurrent {
+    %i = fir.alloca index
+    %newval2 = arith.subi %newval1, %c1_i32 : i32
+    %newval3 = arith.index_cast %newval2 : i32 to index
+    fir.do_concurrent.loop (%i_iv) = (%i_lb) to (%i_ub) step (%i_st) {
+      %1 = arith.addi %newval3, %i_iv : index
+      fir.store %1 to %i : !fir.ref<index>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @after_licm2(
+// CHECK-SAME:      %[[ARG0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG2:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: index,
+// CHECK-SAME:      %[[ARG3:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: i32) {
+// CHECK:           %[[CONSTANT_0:.*]] = arith.constant 7 : i32
+// CHECK:           %[[CONSTANT_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[ALLOCA_0:.*]] = fir.alloca index
+// CHECK:           cf.br ^bb1
+// CHECK:         ^bb1:
+// CHECK:           %[[MULI_0:.*]] = arith.muli %[[ARG3]], %[[CONSTANT_0]] : i32
+// CHECK:           %[[SUBI_0:.*]] = arith.subi %[[MULI_0]], %[[CONSTANT_1]] : i32
+// CHECK:           %[[INDEX_CAST_0:.*]] = arith.index_cast %[[SUBI_0]] : i32 to index
+// CHECK:           fir.do_loop %[[VAL_0:.*]] = %[[ARG0]] to %[[ARG1]] step %[[ARG2]] unordered {
+// CHECK:             %[[ADDI_0:.*]] = arith.addi %[[INDEX_CAST_0]], %[[VAL_0]] : index
+// CHECK:             fir.store %[[ADDI_0]] to %[[ALLOCA_0]] : !fir.ref<index>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }