From ab4f66d6f3213051f5ba27d74f8f4c7c5cc58bd5 Mon Sep 17 00:00:00 2001
From: Kareem Ergawy <kareem.ergawy@amd.com>
Date: Wed, 21 Jan 2026 13:22:45 +0100
Subject: [PATCH] [OpenMP][flang] Move `todo` for checking reduction support
 status on the GPU (#175172)

Moves a `todo` to check for the current level of support for by-ref
reductions to the `FunctionFiltering` pass. This guarantees that the
check does not trigger when the same module is compiled twice: on the
CPU and on the GPU.
---
 .../lib/Lower/Support/ReductionProcessor.cpp  | 20 ----------
 .../Optimizer/OpenMP/FunctionFiltering.cpp    | 39 +++++++++++++++++++
 .../omp-function-filtering-todo.mlir          | 33 ++++++++++++++++
 3 files changed, 72 insertions(+), 20 deletions(-)
 create mode 100644 flang/test/Transforms/omp-function-filtering-todo.mlir

diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index db8ad909b1d2..0e01268dd74f 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -598,26 +598,6 @@ DeclareRedType ReductionProcessor::createDeclareReductionHelper(
   genCombinerCB(builder, loc, type, op1, op2, isByRef);
 
   if (isByRef && fir::isa_box_type(valTy)) {
-    bool isBoxReductionSupported = [&]() {
-      auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
-          *builder.getModule());
-
-      // This check tests the implementation status on the GPU. Box reductions
-      // are fully supported on the CPU.
-      if (!offloadMod.getIsGPU())
-        return true;
-
-      auto seqTy = mlir::dyn_cast<fir::SequenceType>(boxedTy);
-
-      // Dynamically-shaped arrays are not supported yet on the GPU.
-      return !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
-    }();
-
-    if (!isBoxReductionSupported) {
-      TODO(loc, "Reduction of dynamically-shaped arrays are not supported yet "
-                "on the GPU.");
-    }
-
     mlir::Region &dataPtrPtrRegion = decl.getDataPtrPtrRegion();
     mlir::Block &dataAddrBlock = *builder.createBlock(
         &dataPtrPtrRegion, dataPtrPtrRegion.end(), {type}, {loc});
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index 3031bb5da691..e58d5b7e7a38 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/OpenMP/Passes.h"
@@ -28,6 +29,42 @@ namespace flangomp {
 
 using namespace mlir;
 
+/// This function triggers TODO errors and halts compilation if it detects
+/// patterns representing unimplemented features.
+///
+/// It exclusively checks situations that cannot be detected after all of the
+/// MLIR pipeline has ran (i.e. at the MLIR to LLVM IR translation stage, where
+/// the preferred location for these types of checks is), and it only checks for
+/// features that have not been implemented for target offload, but are
+/// supported on host execution.
+void checkDeviceImplementationStatus(
+    omp::OffloadModuleInterface offloadModule) {
+  if (!offloadModule.getIsGPU())
+    return;
+
+  offloadModule->walk<WalkOrder::PreOrder>([&](omp::DeclareReductionOp redOp) {
+    if (redOp.symbolKnownUseEmpty(offloadModule))
+      return WalkResult::advance();
+
+    if (!redOp.getByrefElementType())
+      return WalkResult::advance();
+
+    auto seqTy =
+        mlir::dyn_cast<fir::SequenceType>(*redOp.getByrefElementType());
+
+    bool isByRefReductionSupported =
+        !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
+
+    if (!isByRefReductionSupported) {
+      TODO(redOp.getLoc(),
+           "Reduction of dynamically-shaped arrays are not supported yet "
+           "on the GPU.");
+    }
+
+    return WalkResult::advance();
+  });
+}
+
 namespace {
 class FunctionFilteringPass
     : public flangomp::impl::FunctionFilteringPassBase<FunctionFilteringPass> {
@@ -101,6 +138,8 @@ public:
       }
       return WalkResult::advance();
     });
+
+    checkDeviceImplementationStatus(op);
   }
 };
 } // namespace
diff --git a/flang/test/Transforms/omp-function-filtering-todo.mlir b/flang/test/Transforms/omp-function-filtering-todo.mlir
new file mode 100644
index 000000000000..c5640bb9757f
--- /dev/null
+++ b/flang/test/Transforms/omp-function-filtering-todo.mlir
@@ -0,0 +1,33 @@
+// RUN: not fir-opt --omp-function-filtering -o - %s 2>&1 | FileCheck %s
+
+module attributes {omp.is_gpu = true, omp.is_target_device = true} {
+  // CHECK: not yet implemented: Reduction of dynamically-shaped arrays are not supported yet on the GPU.
+  omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> attributes {byref_element_type = !fir.array<?xi32>} alloc {
+    %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+    omp.yield(%0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  } init {
+  ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+    omp.yield(%arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  } combiner {
+  ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+    omp.yield(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  }
+
+  func.func @foo(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+    %ia.map = omp.map.info var_ptr(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, implicit, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "ia"}
+
+    omp.target map_entries(%ia.map -> %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+      omp.parallel {
+        %c1_i32 = arith.constant 1 : i32
+        omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %arg0 -> %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+          omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32) inclusive step (%c1_i32) {
+            omp.yield
+          }
+        }
+        omp.terminator
+      }
+      omp.terminator
+    }
+    return
+  }
+}