[mlir][acc] Add ACCSpecializeForDevice and ACCSpecializeForHost passes Add two new transformation passes for specializing OpenACC IR for different execution contexts: ACCSpecializeForDevice: - Strips OpenACC constructs that are invalid in device code - Replaces data entry ops with their var operands - Unwraps regions from compute/data constructs - Erases runtime operations (init, shutdown, wait, etc.) This pass is applicable in two contexts: 1. Functions marked with `acc.specialized_routine` attribute, where the entire function body is device code 2. Non-specialized functions, where patterns are applied only to `acc` operations nested inside compute constructs (parallel, serial, kernels), not to the constructs themselves ACCSpecializeForHost: - Converts orphan `acc` operations for host execution - Transforms `acc.atomic.*` to load/store via `PointerLikeType` interface - Converts `acc.loop` to `scf.for` or `scf.execute_region` - Replaces orphan data entry ops with their var operands This pass operates in two modes: 1. Default (orphan) mode: Only converts `acc` operations that are not inside or attached to compute regions. Used for host `acc routine`s where compute constructs should be preserved. 2. Host fallback mode (enable-host-fallback=true): Converts ALL `acc` operations including compute constructs, data regions, and runtime ops. This is used to allow testing of the full conversion. These patterns will be used to handle conditional host execution of `acc` regions with if clause. The pattern population functions (populateACCSpecializeForDevice, populateACCOrphanToHostPatterns, populateACCHostFallbackPatterns) are exposed so other passes can reuse these patterns. --------- Co-authored-by: Susan Tan <zujunt@nvidia.com> Co-authored-by: Scott Manley <rscottmanley@gmail.com>
472 lines
19 KiB
C++
472 lines
19 KiB
C++
//===- ACCSpecializeForHost.cpp -------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass converts OpenACC operations to host-compatible representations,
|
|
// enabling execution on the host rather than on accelerator devices.
|
|
//
|
|
// Overview:
|
|
// ---------
|
|
// The pass operates in two modes depending on the `enableHostFallback` option:
|
|
//
|
|
// 1. Default Mode (Orphan Operations Only):
|
|
// Only converts "orphan" ACC operations that are not inside or attached to
|
|
// compute regions. This is used for host routines (acc routine marked for
|
|
// host) where structured/unstructured data constructs, compute constructs,
|
|
// and their associated data operations should be preserved.
|
|
//
|
|
// 2. Host Fallback Mode (enableHostFallback=true):
|
|
// Converts ALL ACC operations within the region to host equivalents. This
|
|
// is used when the `if` clause evaluates to false at runtime and the
|
|
// entire ACC region needs to fall back to host execution.
|
|
//
|
|
// Transformations (Orphan Mode):
|
|
// ------------------------------
|
|
// The following orphan operations are converted:
|
|
//
|
|
// 1. Atomic Ops (converted to load/store):
|
|
// acc.atomic.update -> load + compute + store
|
|
// acc.atomic.read -> load + store (copy)
|
|
// acc.atomic.write -> store
|
|
// acc.atomic.capture -> inline region contents
|
|
//
|
|
// 2. Loop Ops (converted to SCF):
|
|
// acc.loop (structured) -> scf.for
|
|
// acc.loop (unstructured) -> scf.execute_region
|
|
//
|
|
// 3. Orphan Data Entry Ops (replaced with var operand):
|
|
// acc.cache, acc.private, acc.firstprivate, acc.reduction
|
|
// (only if NOT connected to compute constructs or loop)
|
|
//
|
|
// Transformations (Host Fallback Mode):
|
|
// -------------------------------------
|
|
// In addition to orphan transformations, ALL of the following are converted:
|
|
//
|
|
// 1. Data Entry Ops (replaced with var operand):
|
|
// acc.copyin, acc.create, acc.attach, acc.present, acc.deviceptr,
|
|
// acc.get_deviceptr, acc.nocreate, acc.declare_device_resident,
|
|
// acc.declare_link, acc.use_device, acc.update_device
|
|
//
|
|
// 2. Data Exit Ops (erased):
|
|
// acc.copyout, acc.delete, acc.detach, acc.update_host
|
|
//
|
|
// 3. Structured Data/Compute Constructs (region inlined):
|
|
// acc.data, acc.host_data, acc.kernel_environment, acc.declare,
|
|
// acc.parallel, acc.serial, acc.kernels
|
|
//
|
|
// 4. Unstructured Data Ops (erased):
|
|
// acc.enter_data, acc.exit_data, acc.update
|
|
//
|
|
// 5. Declare Ops (erased):
|
|
// acc.declare_enter, acc.declare_exit
|
|
//
|
|
// 6. Runtime Ops (erased):
|
|
// acc.init, acc.shutdown, acc.set, acc.wait, acc.terminator
|
|
//
|
|
// Requirements:
|
|
// -------------
|
|
// For atomic operation conversion, variables must implement the
|
|
// `acc::PointerLikeType` interface to enable generating load/store operations.
|
|
//
|
|
// The pass uses `OpenACCSupport::emitNYI()` to report unsupported cases.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
|
|
|
|
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
|
#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACCUtilsLoop.h"
|
|
#include "mlir/Dialect/OpenACC/Transforms/ACCSpecializePatterns.h"
|
|
#include "mlir/IR/BuiltinAttributes.h"
|
|
#include "mlir/IR/BuiltinOps.h"
|
|
#include "mlir/IR/BuiltinTypes.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/IR/PatternMatch.h"
|
|
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
|
|
|
|
namespace mlir {
|
|
namespace acc {
|
|
#define GEN_PASS_DEF_ACCSPECIALIZEFORHOST
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
|
|
} // namespace acc
|
|
} // namespace mlir
|
|
|
|
#define DEBUG_TYPE "acc-specialize-for-host"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::acc;
|
|
|
|
/// Check if an operation is inside an ACC compute construct.
|
|
static bool isInsideACCComputeConstruct(Operation *op) {
|
|
while ((op = op->getParentOp()))
|
|
if (isa<ACC_COMPUTE_CONSTRUCT_OPS>(op))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
namespace {
|
|
|
|
// Lower orphan acc.atomic.update by: load from addr, clone region expr with
|
|
// the loaded value, then store the computed result back to addr.
|
|
// Only matches if NOT inside a compute region.
|
|
class ACCOrphanAtomicUpdateOpConversion
|
|
: public OpRewritePattern<acc::AtomicUpdateOp> {
|
|
public:
|
|
ACCOrphanAtomicUpdateOpConversion(MLIRContext *ctx, OpenACCSupport &support)
|
|
: OpRewritePattern<acc::AtomicUpdateOp>(ctx), accSupport(support) {}
|
|
|
|
LogicalResult matchAndRewrite(acc::AtomicUpdateOp atomicUpdateOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not inside an ACC compute construct
|
|
if (isInsideACCComputeConstruct(atomicUpdateOp))
|
|
return failure();
|
|
|
|
Value x = atomicUpdateOp.getX();
|
|
Type type = x.getType();
|
|
auto ptrLikeType = dyn_cast<acc::PointerLikeType>(type);
|
|
if (ptrLikeType) {
|
|
auto xTyped = cast<TypedValue<acc::PointerLikeType>>(x);
|
|
rewriter.setInsertionPointAfter(atomicUpdateOp);
|
|
Value loadOp =
|
|
ptrLikeType.genLoad(rewriter, atomicUpdateOp.getLoc(), xTyped, {});
|
|
if (!loadOp) {
|
|
accSupport.emitNYI(atomicUpdateOp.getLoc(),
|
|
"failed to generate load for atomic update");
|
|
return failure();
|
|
}
|
|
IRMapping mapping;
|
|
mapping.map(atomicUpdateOp.getRegion().front().getArgument(0), loadOp);
|
|
Operation *expr = rewriter.clone(*atomicUpdateOp.getFirstOp(), mapping);
|
|
if (!ptrLikeType.genStore(rewriter, atomicUpdateOp.getLoc(),
|
|
expr->getResult(0), xTyped)) {
|
|
accSupport.emitNYI(atomicUpdateOp.getLoc(),
|
|
"failed to generate store for atomic update");
|
|
return failure();
|
|
}
|
|
rewriter.eraseOp(atomicUpdateOp);
|
|
} else {
|
|
accSupport.emitNYI(atomicUpdateOp.getLoc(),
|
|
"unsupported type for atomic update");
|
|
return failure();
|
|
}
|
|
return success();
|
|
}
|
|
|
|
private:
|
|
OpenACCSupport &accSupport;
|
|
};
|
|
|
|
// Lower orphan acc.atomic.read by: load from src, then store into dst.
|
|
// Only matches if NOT inside an ACC compute construct.
|
|
class ACCOrphanAtomicReadOpConversion
|
|
: public OpRewritePattern<acc::AtomicReadOp> {
|
|
public:
|
|
ACCOrphanAtomicReadOpConversion(MLIRContext *ctx, OpenACCSupport &support)
|
|
: OpRewritePattern<acc::AtomicReadOp>(ctx), accSupport(support) {}
|
|
|
|
LogicalResult matchAndRewrite(acc::AtomicReadOp readOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not inside an ACC compute construct
|
|
if (isInsideACCComputeConstruct(readOp))
|
|
return failure();
|
|
|
|
Value x = readOp.getX();
|
|
Value v = readOp.getV();
|
|
auto xPtrType = dyn_cast<acc::PointerLikeType>(x.getType());
|
|
auto vPtrType = dyn_cast<acc::PointerLikeType>(v.getType());
|
|
if (xPtrType && vPtrType) {
|
|
auto xTyped = cast<TypedValue<acc::PointerLikeType>>(x);
|
|
auto vTyped = cast<TypedValue<acc::PointerLikeType>>(v);
|
|
rewriter.setInsertionPointAfter(readOp);
|
|
|
|
// Use genCopy which does load + store
|
|
if (!xPtrType.genCopy(rewriter, readOp.getLoc(), vTyped, xTyped, {})) {
|
|
accSupport.emitNYI(readOp.getLoc(),
|
|
"failed to generate copy for atomic read");
|
|
return failure();
|
|
}
|
|
rewriter.eraseOp(readOp);
|
|
} else {
|
|
accSupport.emitNYI(readOp.getLoc(), "unsupported type for atomic read");
|
|
return failure();
|
|
}
|
|
return success();
|
|
}
|
|
|
|
private:
|
|
OpenACCSupport &accSupport;
|
|
};
|
|
|
|
// Lower orphan acc.atomic.write by: store value into addr.
|
|
// Only matches if NOT inside an ACC compute construct.
|
|
class ACCOrphanAtomicWriteOpConversion
|
|
: public OpRewritePattern<acc::AtomicWriteOp> {
|
|
public:
|
|
ACCOrphanAtomicWriteOpConversion(MLIRContext *ctx, OpenACCSupport &support)
|
|
: OpRewritePattern<acc::AtomicWriteOp>(ctx), accSupport(support) {}
|
|
|
|
LogicalResult matchAndRewrite(acc::AtomicWriteOp writeOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not inside an ACC compute construct
|
|
if (isInsideACCComputeConstruct(writeOp))
|
|
return failure();
|
|
|
|
Value x = writeOp.getX();
|
|
Value expr = writeOp.getExpr();
|
|
auto ptrLikeType = dyn_cast<acc::PointerLikeType>(x.getType());
|
|
if (ptrLikeType) {
|
|
auto xTyped = cast<TypedValue<acc::PointerLikeType>>(x);
|
|
rewriter.setInsertionPointAfter(writeOp);
|
|
if (!ptrLikeType.genStore(rewriter, writeOp.getLoc(), expr, xTyped)) {
|
|
accSupport.emitNYI(writeOp.getLoc(),
|
|
"failed to generate store for atomic write");
|
|
return failure();
|
|
}
|
|
rewriter.eraseOp(writeOp);
|
|
} else {
|
|
accSupport.emitNYI(writeOp.getLoc(), "unsupported type for atomic write");
|
|
return failure();
|
|
}
|
|
return success();
|
|
}
|
|
|
|
private:
|
|
OpenACCSupport &accSupport;
|
|
};
|
|
|
|
// Lower orphan acc.atomic.capture by: unwrap the capture region and erase the
|
|
// wrapper; inner ops are lowered in-order (e.g., read+update becomes load/store
|
|
// to dst then load/compute/store to addr).
|
|
// Only matches if NOT inside an ACC compute construct.
|
|
class ACCOrphanAtomicCaptureOpConversion
|
|
: public OpRewritePattern<acc::AtomicCaptureOp> {
|
|
using OpRewritePattern<acc::AtomicCaptureOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(acc::AtomicCaptureOp captureOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not inside an ACC compute construct
|
|
if (isInsideACCComputeConstruct(captureOp))
|
|
return failure();
|
|
|
|
assert(captureOp.getRegion().hasOneBlock() && "expected one block");
|
|
Block *block = &captureOp.getRegion().front();
|
|
// Remove the terminator before inlining
|
|
rewriter.eraseOp(block->getTerminator());
|
|
rewriter.inlineBlockBefore(block, captureOp);
|
|
rewriter.eraseOp(captureOp);
|
|
return success();
|
|
}
|
|
};
|
|
|
|
// Convert orphan acc.loop to scf.for or scf.execute_region.
|
|
// Only matches if NOT inside an ACC compute construct.
|
|
class ACCOrphanLoopOpConversion : public OpRewritePattern<acc::LoopOp> {
|
|
using OpRewritePattern<acc::LoopOp>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(acc::LoopOp loopOp,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not inside an ACC compute construct
|
|
if (isInsideACCComputeConstruct(loopOp))
|
|
return failure();
|
|
|
|
if (loopOp.getUnstructured()) {
|
|
auto executeRegion =
|
|
acc::convertUnstructuredACCLoopToSCFExecuteRegion(loopOp, rewriter);
|
|
if (!executeRegion)
|
|
return failure();
|
|
rewriter.replaceOp(loopOp, executeRegion);
|
|
} else {
|
|
auto forOp = acc::convertACCLoopToSCFFor(loopOp, rewriter,
|
|
/*enableCollapse=*/false);
|
|
if (!forOp)
|
|
return failure();
|
|
rewriter.replaceOp(loopOp, forOp);
|
|
}
|
|
return success();
|
|
}
|
|
};
|
|
|
|
/// Check if an operation is used by a compute construct or loop op
|
|
static bool isUsedByComputeOrLoop(Operation *op) {
|
|
for (auto *user : op->getUsers())
|
|
if (isa<acc::ParallelOp, acc::SerialOp, acc::KernelsOp, acc::LoopOp>(user))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/// Orphan data entry ops - only match if NOT connected to compute/loop and
|
|
/// NOT inside a compute region. Used for acc.cache, acc.private,
|
|
/// acc.firstprivate, acc.reduction.
|
|
template <typename OpTy>
|
|
class ACCOrphanDataEntryConversion : public OpRewritePattern<OpTy> {
|
|
using OpRewritePattern<OpTy>::OpRewritePattern;
|
|
|
|
LogicalResult matchAndRewrite(OpTy op,
|
|
PatternRewriter &rewriter) const override {
|
|
// Only convert if this op is not used by a compute construct or loop,
|
|
// and not inside an ACC compute construct.
|
|
if (isUsedByComputeOrLoop(op) || isInsideACCComputeConstruct(op))
|
|
return failure();
|
|
|
|
if (op->use_empty())
|
|
rewriter.eraseOp(op);
|
|
else
|
|
rewriter.replaceOp(op, op.getVar());
|
|
return success();
|
|
}
|
|
};
|
|
|
|
class ACCSpecializeForHost
|
|
: public acc::impl::ACCSpecializeForHostBase<ACCSpecializeForHost> {
|
|
public:
|
|
using ACCSpecializeForHostBase<
|
|
ACCSpecializeForHost>::ACCSpecializeForHostBase;
|
|
|
|
void runOnOperation() override {
|
|
LLVM_DEBUG(llvm::dbgs() << "Enter ACCSpecializeForHost()\n");
|
|
|
|
func::FuncOp funcOp = getOperation();
|
|
if (!acc::isSpecializedAccRoutine(funcOp)) {
|
|
// Convert orphan operations to host, or all ACC operations if
|
|
// host fallback patterns are enabled.
|
|
auto *context = &getContext();
|
|
RewritePatternSet patterns(context);
|
|
OpenACCSupport &accSupport = getAnalysis<OpenACCSupport>();
|
|
if (enableHostFallback)
|
|
populateACCHostFallbackPatterns(patterns, accSupport);
|
|
else
|
|
populateACCOrphanToHostPatterns(patterns, accSupport);
|
|
GreedyRewriteConfig config;
|
|
config.setUseTopDownTraversal(true);
|
|
if (failed(applyPatternsGreedily(funcOp, std::move(patterns), config)))
|
|
signalPassFailure();
|
|
}
|
|
|
|
LLVM_DEBUG(llvm::dbgs() << "Exit ACCSpecializeForHost()\n");
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Pattern population functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
void mlir::acc::populateACCOrphanToHostPatterns(RewritePatternSet &patterns,
|
|
OpenACCSupport &accSupport,
|
|
bool enableLoopConversion) {
|
|
MLIRContext *context = patterns.getContext();
|
|
|
|
// For host routines (acc routine marked for host), we only convert orphan
|
|
// operations that are not allowed outside compute regions. All patterns
|
|
// here check that the operation is NOT inside a compute region before
|
|
// converting:
|
|
// - acc.atomic.* -> load/store operations
|
|
// - acc.loop -> scf.for or scf.execute_region
|
|
// - acc.cache -> replaced with var
|
|
// - acc.private, acc.reduction, acc.firstprivate -> replaced with var
|
|
// (only if NOT connected to compute constructs or loop)
|
|
//
|
|
// We do NOT remove structured/unstructured data constructs, compute
|
|
// constructs, or their associated data operations - those are valid
|
|
// in host routines and will be processed by other passes.
|
|
|
|
// Loop conversion (orphan only)
|
|
if (enableLoopConversion)
|
|
patterns.insert<ACCOrphanLoopOpConversion>(context);
|
|
|
|
// Atomic operations - convert to non-atomic load/store (orphan only)
|
|
patterns.insert<ACCOrphanAtomicUpdateOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicReadOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicWriteOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicCaptureOpConversion>(context);
|
|
|
|
// Orphan data entry ops - only convert if NOT connected to compute/loop
|
|
// and NOT inside a compute region
|
|
patterns.insert<ACCOrphanDataEntryConversion<acc::CacheOp>,
|
|
ACCOrphanDataEntryConversion<acc::PrivateOp>,
|
|
ACCOrphanDataEntryConversion<acc::FirstprivateOp>,
|
|
ACCOrphanDataEntryConversion<acc::ReductionOp>>(context);
|
|
}
|
|
|
|
void mlir::acc::populateACCHostFallbackPatterns(RewritePatternSet &patterns,
|
|
OpenACCSupport &accSupport,
|
|
bool enableLoopConversion) {
|
|
MLIRContext *context = patterns.getContext();
|
|
|
|
// For host fallback path (when `if` clause evaluates to false), ALL ACC
|
|
// operations within the region should be converted to host equivalents.
|
|
// This includes structured/unstructured data, compute constructs, and
|
|
// their associated data operations.
|
|
|
|
// Loop conversion - OK to use the orphan loop conversion pattern here
|
|
// because the parent compute constructs will also be converted.
|
|
if (enableLoopConversion)
|
|
patterns.insert<ACCOrphanLoopOpConversion>(context);
|
|
|
|
// Atomic operations - convert to non-atomic load/store. OK to use the orphan
|
|
// atomic conversion patterns here because the parent compute constructs will
|
|
// also be converted.
|
|
patterns.insert<ACCOrphanAtomicUpdateOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicReadOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicWriteOpConversion>(context, accSupport);
|
|
patterns.insert<ACCOrphanAtomicCaptureOpConversion>(context);
|
|
|
|
// acc.cache - convert ALL cache ops (including those inside compute regions)
|
|
patterns.insert<ACCOpReplaceWithVarConversion<acc::CacheOp>>(context);
|
|
|
|
// Privatization ops - convert ALL (including those attached to compute/loop)
|
|
patterns.insert<ACCOpReplaceWithVarConversion<acc::PrivateOp>,
|
|
ACCOpReplaceWithVarConversion<acc::FirstprivateOp>,
|
|
ACCOpReplaceWithVarConversion<acc::ReductionOp>>(context);
|
|
|
|
// Data entry ops - replaced with their var operand
|
|
patterns.insert<ACCOpReplaceWithVarConversion<acc::CopyinOp>,
|
|
ACCOpReplaceWithVarConversion<acc::CreateOp>,
|
|
ACCOpReplaceWithVarConversion<acc::AttachOp>,
|
|
ACCOpReplaceWithVarConversion<acc::PresentOp>,
|
|
ACCOpReplaceWithVarConversion<acc::DevicePtrOp>,
|
|
ACCOpReplaceWithVarConversion<acc::GetDevicePtrOp>,
|
|
ACCOpReplaceWithVarConversion<acc::NoCreateOp>,
|
|
ACCOpReplaceWithVarConversion<acc::DeclareDeviceResidentOp>,
|
|
ACCOpReplaceWithVarConversion<acc::DeclareLinkOp>,
|
|
ACCOpReplaceWithVarConversion<acc::UseDeviceOp>,
|
|
ACCOpReplaceWithVarConversion<acc::UpdateDeviceOp>>(context);
|
|
|
|
// Data exit ops - simply erased (no results)
|
|
patterns.insert<ACCOpEraseConversion<acc::CopyoutOp>,
|
|
ACCOpEraseConversion<acc::DeleteOp>,
|
|
ACCOpEraseConversion<acc::DetachOp>,
|
|
ACCOpEraseConversion<acc::UpdateHostOp>>(context);
|
|
|
|
// Structured data constructs - unwrap their regions
|
|
patterns.insert<ACCRegionUnwrapConversion<acc::DataOp>,
|
|
ACCRegionUnwrapConversion<acc::HostDataOp>,
|
|
ACCRegionUnwrapConversion<acc::KernelEnvironmentOp>>(context);
|
|
|
|
// Declare ops
|
|
patterns.insert<ACCDeclareEnterOpConversion,
|
|
ACCRegionUnwrapConversion<acc::DeclareOp>>(context);
|
|
|
|
// Unstructured data operations - erase them
|
|
patterns.insert<ACCOpEraseConversion<acc::EnterDataOp>,
|
|
ACCOpEraseConversion<acc::ExitDataOp>,
|
|
ACCOpEraseConversion<acc::UpdateOp>>(context);
|
|
|
|
// Runtime operations - erase them
|
|
patterns.insert<
|
|
ACCOpEraseConversion<acc::InitOp>, ACCOpEraseConversion<acc::ShutdownOp>,
|
|
ACCOpEraseConversion<acc::SetOp>, ACCOpEraseConversion<acc::WaitOp>,
|
|
ACCOpEraseConversion<acc::TerminatorOp>>(context);
|
|
|
|
// Compute constructs - unwrap their regions
|
|
patterns.insert<ACCRegionUnwrapConversion<acc::ParallelOp>,
|
|
ACCRegionUnwrapConversion<acc::SerialOp>,
|
|
ACCRegionUnwrapConversion<acc::KernelsOp>>(context);
|
|
}
|