[OpenMP][flang] Move todo for checking reduction support status on the GPU (#175172)
Moves a `todo` to check for the current level of support for by-ref reductions to the `FunctionFiltering` pass. This guarantees that the check does not trigger when the same module is compiled twice: on the CPU and on the GPU.
This commit is contained in:
parent
3de4d32a72
commit
ab4f66d6f3
@ -598,26 +598,6 @@ DeclareRedType ReductionProcessor::createDeclareReductionHelper(
|
||||
genCombinerCB(builder, loc, type, op1, op2, isByRef);
|
||||
|
||||
if (isByRef && fir::isa_box_type(valTy)) {
|
||||
bool isBoxReductionSupported = [&]() {
|
||||
auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(
|
||||
*builder.getModule());
|
||||
|
||||
// This check tests the implementation status on the GPU. Box reductions
|
||||
// are fully supported on the CPU.
|
||||
if (!offloadMod.getIsGPU())
|
||||
return true;
|
||||
|
||||
auto seqTy = mlir::dyn_cast<fir::SequenceType>(boxedTy);
|
||||
|
||||
// Dynamically-shaped arrays are not supported yet on the GPU.
|
||||
return !seqTy || !fir::sequenceWithNonConstantShape(seqTy);
|
||||
}();
|
||||
|
||||
if (!isBoxReductionSupported) {
|
||||
TODO(loc, "Reduction of dynamically-shaped arrays are not supported yet "
|
||||
"on the GPU.");
|
||||
}
|
||||
|
||||
mlir::Region &dataPtrPtrRegion = decl.getDataPtrPtrRegion();
|
||||
mlir::Block &dataAddrBlock = *builder.createBlock(
|
||||
&dataPtrPtrRegion, dataPtrPtrRegion.end(), {type}, {loc});
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "flang/Optimizer/Builder/Todo.h"
|
||||
#include "flang/Optimizer/Dialect/FIRDialect.h"
|
||||
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
|
||||
#include "flang/Optimizer/OpenMP/Passes.h"
|
||||
@ -28,6 +29,42 @@ namespace flangomp {
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
/// This function triggers TODO errors and halts compilation if it detects
|
||||
/// patterns representing unimplemented features.
|
||||
///
|
||||
/// It exclusively checks situations that cannot be detected after all of the
|
||||
/// MLIR pipeline has ran (i.e. at the MLIR to LLVM IR translation stage, where
|
||||
/// the preferred location for these types of checks is), and it only checks for
|
||||
/// features that have not been implemented for target offload, but are
|
||||
/// supported on host execution.
|
||||
void checkDeviceImplementationStatus(
|
||||
omp::OffloadModuleInterface offloadModule) {
|
||||
if (!offloadModule.getIsGPU())
|
||||
return;
|
||||
|
||||
offloadModule->walk<WalkOrder::PreOrder>([&](omp::DeclareReductionOp redOp) {
|
||||
if (redOp.symbolKnownUseEmpty(offloadModule))
|
||||
return WalkResult::advance();
|
||||
|
||||
if (!redOp.getByrefElementType())
|
||||
return WalkResult::advance();
|
||||
|
||||
auto seqTy =
|
||||
mlir::dyn_cast<fir::SequenceType>(*redOp.getByrefElementType());
|
||||
|
||||
bool isByRefReductionSupported =
|
||||
!seqTy || !fir::sequenceWithNonConstantShape(seqTy);
|
||||
|
||||
if (!isByRefReductionSupported) {
|
||||
TODO(redOp.getLoc(),
|
||||
"Reduction of dynamically-shaped arrays are not supported yet "
|
||||
"on the GPU.");
|
||||
}
|
||||
|
||||
return WalkResult::advance();
|
||||
});
|
||||
}
|
||||
|
||||
namespace {
|
||||
class FunctionFilteringPass
|
||||
: public flangomp::impl::FunctionFilteringPassBase<FunctionFilteringPass> {
|
||||
@ -101,6 +138,8 @@ public:
|
||||
}
|
||||
return WalkResult::advance();
|
||||
});
|
||||
|
||||
checkDeviceImplementationStatus(op);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
33
flang/test/Transforms/omp-function-filtering-todo.mlir
Normal file
33
flang/test/Transforms/omp-function-filtering-todo.mlir
Normal file
@ -0,0 +1,33 @@
|
||||
// RUN: not fir-opt --omp-function-filtering -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
module attributes {omp.is_gpu = true, omp.is_target_device = true} {
|
||||
// CHECK: not yet implemented: Reduction of dynamically-shaped arrays are not supported yet on the GPU.
|
||||
omp.declare_reduction @add_reduction_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> attributes {byref_element_type = !fir.array<?xi32>} alloc {
|
||||
%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
|
||||
omp.yield(%0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
|
||||
} init {
|
||||
^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
|
||||
omp.yield(%arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
|
||||
} combiner {
|
||||
^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
|
||||
omp.yield(%arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
|
||||
}
|
||||
|
||||
func.func @foo(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
|
||||
%ia.map = omp.map.info var_ptr(%ia : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, implicit, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "ia"}
|
||||
|
||||
omp.target map_entries(%ia.map -> %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
|
||||
omp.parallel {
|
||||
%c1_i32 = arith.constant 1 : i32
|
||||
omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %arg0 -> %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
|
||||
omp.loop_nest (%arg2) : i32 = (%c1_i32) to (%c1_i32) inclusive step (%c1_i32) {
|
||||
omp.yield
|
||||
}
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
omp.terminator
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user