From ab4f66d6f3213051f5ba27d74f8f4c7c5cc58bd5 Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Wed, 21 Jan 2026 13:22:45 +0100 Subject: [PATCH] [OpenMP][flang] Move `todo` for checking reduction support status on the GPU (#175172) Moves a `todo` to check for the current level of support for by-ref reductions to the `FunctionFiltering` pass. This guarantees that the check does not trigger when the same module is compiled twice: on the CPU and on the GPU. --- .../lib/Lower/Support/ReductionProcessor.cpp | 20 ---------- .../Optimizer/OpenMP/FunctionFiltering.cpp | 39 +++++++++++++++++++ .../omp-function-filtering-todo.mlir | 33 ++++++++++++++++ 3 files changed, 72 insertions(+), 20 deletions(-) create mode 100644 flang/test/Transforms/omp-function-filtering-todo.mlir diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp index db8ad909b1d2..0e01268dd74f 100644 --- a/flang/lib/Lower/Support/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -598,26 +598,6 @@ DeclareRedType ReductionProcessor::createDeclareReductionHelper( genCombinerCB(builder, loc, type, op1, op2, isByRef); if (isByRef && fir::isa_box_type(valTy)) { - bool isBoxReductionSupported = [&]() { - auto offloadMod = llvm::dyn_cast( - *builder.getModule()); - - // This check tests the implementation status on the GPU. Box reductions - // are fully supported on the CPU. - if (!offloadMod.getIsGPU()) - return true; - - auto seqTy = mlir::dyn_cast(boxedTy); - - // Dynamically-shaped arrays are not supported yet on the GPU. - return !seqTy || !fir::sequenceWithNonConstantShape(seqTy); - }(); - - if (!isBoxReductionSupported) { - TODO(loc, "Reduction of dynamically-shaped arrays are not supported yet " - "on the GPU."); - } - mlir::Region &dataPtrPtrRegion = decl.getDataPtrPtrRegion(); mlir::Block &dataAddrBlock = *builder.createBlock( &dataPtrPtrRegion, dataPtrPtrRegion.end(), {type}, {loc}); diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp index 3031bb5da691..e58d5b7e7a38 100644 --- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp +++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/OpenMP/Passes.h" @@ -28,6 +29,42 @@ namespace flangomp { using namespace mlir; +/// This function triggers TODO errors and halts compilation if it detects +/// patterns representing unimplemented features. +/// +/// It exclusively checks situations that cannot be detected after all of the +/// MLIR pipeline has ran (i.e. at the MLIR to LLVM IR translation stage, where +/// the preferred location for these types of checks is), and it only checks for +/// features that have not been implemented for target offload, but are +/// supported on host execution. +void checkDeviceImplementationStatus( + omp::OffloadModuleInterface offloadModule) { + if (!offloadModule.getIsGPU()) + return; + + offloadModule->walk([&](omp::DeclareReductionOp redOp) { + if (redOp.symbolKnownUseEmpty(offloadModule)) + return WalkResult::advance(); + + if (!redOp.getByrefElementType()) + return WalkResult::advance(); + + auto seqTy = + mlir::dyn_cast(*redOp.getByrefElementType()); + + bool isByRefReductionSupported = + !seqTy || !fir::sequenceWithNonConstantShape(seqTy); + + if (!isByRefReductionSupported) { + TODO(redOp.getLoc(), + "Reduction of dynamically-shaped arrays are not supported yet " + "on the GPU."); + } + + return WalkResult::advance(); + }); +} + namespace { class FunctionFilteringPass : public flangomp::impl::FunctionFilteringPassBase { @@ -101,6 +138,8 @@ public: } return WalkResult::advance(); }); + + checkDeviceImplementationStatus(op); } }; } // namespace diff --git a/flang/test/Transforms/omp-function-filtering-todo.mlir b/flang/test/Transforms/omp-function-filtering-todo.mlir new file mode 100644 index 000000000000..c5640bb9757f --- /dev/null +++ b/flang/test/Transforms/omp-function-filtering-todo.mlir @@ -0,0 +1,33 @@ +// RUN: not fir-opt --omp-function-filtering -o - %s 2>&1 | FileCheck %s + +module attributes {omp.is_gpu = true, omp.is_target_device = true} { + // CHECK: not yet implemented: Reduction of dynamically-shaped arrays are not supported yet on the GPU. + omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref>>> attributes {byref_element_type = !fir.array} alloc { + %0 = fir.alloca !fir.box>> + omp.yield(%0 : !fir.ref>>>) + } init { + ^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + omp.yield(%arg1 : !fir.ref>>>) + } combiner { + ^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + omp.yield(%arg0 : !fir.ref>>>) + } + + func.func @foo(%ia : !fir.ref>>>) { + %ia.map = omp.map.info var_ptr(%ia : !fir.ref>>>, !fir.box>>) map_clauses(always, implicit, to) capture(ByRef) -> !fir.ref>>> {name = "ia"} + + omp.target map_entries(%ia.map -> %arg0 : !fir.ref>>>) { + omp.parallel { + %c1_i32 = arith.constant 1 : i32 + omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %arg0 -> %arg1 : !fir.ref>>>) { + omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32) inclusive step (%c1_i32) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } + return + } +}