790 lines
32 KiB
C++
790 lines
32 KiB
C++
//===- ACCImplicitData.cpp ------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass implements the OpenACC specification for "Variables with
|
|
// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2).
|
|
//
|
|
// Overview:
|
|
// ---------
|
|
// The pass automatically generates data clause operations for variables used
|
|
// within OpenACC compute constructs (parallel, kernels, serial) that do not
|
|
// already have explicit data clauses. The semantics follow these rules:
|
|
//
|
|
// 1. If there is a default(none) clause visible, no implicit data actions
|
|
// apply.
|
|
//
|
|
// 2. An aggregate variable (arrays, derived types, etc.) will be treated as:
|
|
// - In a present clause when default(present) is visible.
|
|
// - In a copy clause otherwise.
|
|
//
|
|
// 3. A scalar variable will be treated as if it appears in:
|
|
// - A copy clause if the compute construct is a kernels construct.
|
|
// - A firstprivate clause otherwise (parallel, serial).
|
|
//
|
|
// Requirements:
|
|
// -------------
|
|
// To use this pass in a pipeline, the following requirements must be met:
|
|
//
|
|
// 1. Type Interface Implementation: Variables from the dialect being used
|
|
// must implement one or both of the following MLIR interfaces:
|
|
// `acc::MappableType` and/or `acc::PointerLikeType`
|
|
//
|
|
// These interfaces provide the necessary methods for the pass to:
|
|
// - Determine variable type categories (scalar vs. aggregate)
|
|
// - Generate appropriate bounds information
|
|
// - Generate privatization recipes
|
|
//
|
|
// 2. Operation Interface Implementation: Operations that access partial
|
|
// entities or create views should implement the following MLIR
|
|
// interfaces: `acc::PartialEntityAccess` and/or
|
|
// `mlir::ViewLikeOpInterface`
|
|
//
|
|
// These interfaces are used for proper data clause ordering, ensuring
|
|
// that base entities are mapped before derived entities (e.g., a
|
|
// struct is mapped before its fields, an array is mapped before
|
|
// subarray views).
|
|
//
|
|
// 3. Analysis Registration (Optional): If custom behavior is needed for
|
|
// variable name extraction or alias analysis, the dialect should
|
|
// pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses.
|
|
//
|
|
// If not registered, default behavior will be used.
|
|
//
|
|
// Implementation Details:
|
|
// -----------------------
|
|
// The pass performs the following operations:
|
|
//
|
|
// 1. Finds candidate variables which are live-in to the compute region and
|
|
// are not already in a data clause or private clause.
|
|
//
|
|
// 2. Generates both data "entry" and "exit" clause operations that match
|
|
// the intended action depending on variable type:
|
|
// - copy -> acc.copyin (entry) + acc.copyout (exit)
|
|
// - present -> acc.present (entry) + acc.delete (exit)
|
|
// - firstprivate -> acc.firstprivate (entry only, no exit)
|
|
//
|
|
// 3. Ensures that default clause is taken into consideration by looking
|
|
// through current construct and parent constructs to find the "visible
|
|
// default clause".
|
|
//
|
|
// 4. Fixes up SSA value links so that uses in the acc region reference the
|
|
// result of the newly created data clause operations.
|
|
//
|
|
// 5. When generating implicit data clause operations, it also adds variable
|
|
// name information and marks them with the implicit flag.
|
|
//
|
|
// 6. Recipes are generated by calling the appropriate entrypoints in the
|
|
// MappableType and PointerLikeType interfaces.
|
|
//
|
|
// 7. AliasAnalysis is used to determine if a variable is already covered by
|
|
// an existing data clause (e.g., an interior pointer covered by its parent).
|
|
//
|
|
// Examples:
|
|
// ---------
|
|
//
|
|
// Example 1: Scalar in parallel construct (implicit firstprivate)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
|
// acc.parallel {
|
|
// %val = memref.load %scalar[] : memref<f32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
|
// %firstpriv = acc.firstprivate varPtr(%scalar : memref<f32>)
|
|
// -> memref<f32> {implicit = true, name = "x"}
|
|
// acc.parallel firstprivate(@recipe -> %firstpriv : memref<f32>) {
|
|
// %val = memref.load %firstpriv[] : memref<f32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// Example 2: Scalar in kernels construct (implicit copy)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
|
// acc.kernels {
|
|
// %val = memref.load %scalar[] : memref<i32>
|
|
// acc.terminator
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
|
// %copyin = acc.copyin varPtr(%scalar : memref<i32>) -> memref<i32>
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "n"}
|
|
// acc.kernels dataOperands(%copyin : memref<i32>) {
|
|
// %val = memref.load %copyin[] : memref<i32>
|
|
// acc.terminator
|
|
// }
|
|
// acc.copyout accPtr(%copyin : memref<i32>)
|
|
// to varPtr(%scalar : memref<i32>)
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "n"}
|
|
// }
|
|
//
|
|
// Example 3: Array (aggregate) in parallel (implicit copy)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// acc.parallel {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %array[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// %copyin = acc.copyin varPtr(%array : memref<100xf32>)
|
|
// -> memref<100xf32>
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "arr"}
|
|
// acc.parallel dataOperands(%copyin : memref<100xf32>) {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %copyin[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// acc.copyout accPtr(%copyin : memref<100xf32>)
|
|
// to varPtr(%array : memref<100xf32>)
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "arr"}
|
|
// }
|
|
//
|
|
// Example 4: Array with default(present)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// acc.parallel {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %array[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// } attributes {defaultAttr = #acc<defaultvalue present>}
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// %present = acc.present varPtr(%array : memref<100xf32>)
|
|
// -> memref<100xf32>
|
|
// {implicit = true, name = "arr"}
|
|
// acc.parallel dataOperands(%present : memref<100xf32>)
|
|
// attributes {defaultAttr = #acc<defaultvalue present>} {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %present[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// acc.delete accPtr(%present : memref<100xf32>)
|
|
// {dataClause = #acc<data_clause acc_present>,
|
|
// implicit = true, name = "arr"}
|
|
// }
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
|
|
|
|
#include "mlir/Analysis/AliasAnalysis.h"
|
|
#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACCUtils.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/BuiltinOps.h"
|
|
#include "mlir/IR/Dominance.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/IR/Value.h"
|
|
#include "mlir/Interfaces/FunctionInterfaces.h"
|
|
#include "mlir/Interfaces/ViewLikeInterface.h"
|
|
#include "mlir/Transforms/RegionUtils.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <type_traits>
|
|
|
|
namespace mlir {
|
|
namespace acc {
|
|
#define GEN_PASS_DEF_ACCIMPLICITDATA
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
|
|
} // namespace acc
|
|
} // namespace mlir
|
|
|
|
#define DEBUG_TYPE "acc-implicit-data"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
|
|
class ACCImplicitData : public acc::impl::ACCImplicitDataBase<ACCImplicitData> {
|
|
public:
|
|
using acc::impl::ACCImplicitDataBase<ACCImplicitData>::ACCImplicitDataBase;
|
|
|
|
void runOnOperation() override;
|
|
|
|
private:
|
|
/// Looks through the `dominatingDataClauses` to find the original data clause
|
|
/// op for an alias. Returns nullptr if no original data clause op is found.
|
|
template <typename OpT>
|
|
Operation *getOriginalDataClauseOpForAlias(
|
|
Value var, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses);
|
|
|
|
/// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`,
|
|
/// etc. data clause op for a candidate variable.
|
|
template <typename OpT>
|
|
Operation *generateDataClauseOpForCandidate(
|
|
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses,
|
|
const std::optional<acc::ClauseDefaultValue> &defaultClause);
|
|
|
|
/// Generates the implicit data ops for a compute construct.
|
|
template <typename OpT>
|
|
void
|
|
generateImplicitDataOps(ModuleOp &module, OpT computeConstructOp,
|
|
std::optional<acc::ClauseDefaultValue> &defaultClause,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates a private recipe for a variable.
|
|
acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates a firstprivate recipe for a variable.
|
|
acc::FirstprivateRecipeOp
|
|
generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc,
|
|
OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates recipes for a list of variables.
|
|
void generateRecipes(ModuleOp &module, OpBuilder &builder,
|
|
Operation *computeConstructOp,
|
|
const SmallVector<Value> &newOperands);
|
|
};
|
|
|
|
/// Determines if a variable is a candidate for implicit data mapping.
|
|
/// Returns true if the variable is a candidate, false otherwise.
|
|
static bool isCandidateForImplicitData(Value val, Region &accRegion,
|
|
acc::OpenACCSupport &accSupport) {
|
|
// Ensure the variable is an allowed type for data clause.
|
|
if (!acc::isPointerLikeType(val.getType()) &&
|
|
!acc::isMappableType(val.getType()))
|
|
return false;
|
|
|
|
if (accSupport.isValidValueUse(val, accRegion))
|
|
return false;
|
|
|
|
// If this is already coming from a data clause, we do not need to generate
|
|
// another.
|
|
if (isa_and_nonnull<ACC_DATA_ENTRY_OPS>(val.getDefiningOp()))
|
|
return false;
|
|
|
|
// If this is only used by private clauses, it is not a real live-in.
|
|
if (acc::isOnlyUsedByPrivateClauses(val, accRegion))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename OpT>
|
|
Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
|
|
Value var, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses) {
|
|
auto &aliasAnalysis = this->getAnalysis<AliasAnalysis>();
|
|
for (auto dataClause : dominatingDataClauses) {
|
|
if (auto *dataClauseOp = dataClause.getDefiningOp()) {
|
|
// Only accept clauses that guarantee that the alias is present.
|
|
if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
|
|
acc::DevicePtrOp>(dataClauseOp))
|
|
if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
|
|
return dataClauseOp;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Generates bounds for variables that have unknown dimensions
|
|
static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp,
|
|
OpBuilder &builder) {
|
|
|
|
if (!acc::getBounds(dataClauseOp).empty())
|
|
// If bounds are already present, do not overwrite them.
|
|
return;
|
|
|
|
// For types that have unknown dimensions, attempt to generate bounds by
|
|
// relying on MappableType being able to extract it from the IR.
|
|
auto var = acc::getVar(dataClauseOp);
|
|
auto type = var.getType();
|
|
if (auto mappableTy = dyn_cast<acc::MappableType>(type)) {
|
|
if (mappableTy.hasUnknownDimensions()) {
|
|
TypeSwitch<Operation *>(dataClauseOp)
|
|
.Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto dataClauseOp) {
|
|
if (std::is_same_v<decltype(dataClauseOp), acc::DevicePtrOp>)
|
|
return;
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPoint(dataClauseOp);
|
|
auto bounds = mappableTy.generateAccBounds(var, builder);
|
|
if (!bounds.empty())
|
|
dataClauseOp.getBoundsMutable().assign(bounds);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
acc::PrivateRecipeOp
|
|
ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport) {
|
|
auto type = var.getType();
|
|
std::string recipeName =
|
|
accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var);
|
|
|
|
// Check if recipe already exists
|
|
auto existingRecipe = module.lookupSymbol<acc::PrivateRecipeOp>(recipeName);
|
|
if (existingRecipe)
|
|
return existingRecipe;
|
|
|
|
// Set insertion point to module body in a scoped way
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPointToStart(module.getBody());
|
|
|
|
auto recipe =
|
|
acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, type);
|
|
if (!recipe.has_value())
|
|
return accSupport.emitNYI(loc, "implicit private"), nullptr;
|
|
return recipe.value();
|
|
}
|
|
|
|
acc::FirstprivateRecipeOp
|
|
ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport) {
|
|
auto type = var.getType();
|
|
std::string recipeName =
|
|
accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var);
|
|
|
|
// Check if recipe already exists
|
|
auto existingRecipe =
|
|
module.lookupSymbol<acc::FirstprivateRecipeOp>(recipeName);
|
|
if (existingRecipe)
|
|
return existingRecipe;
|
|
|
|
// Set insertion point to module body in a scoped way
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPointToStart(module.getBody());
|
|
|
|
auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc,
|
|
recipeName, type);
|
|
if (!recipe.has_value())
|
|
return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr;
|
|
return recipe.value();
|
|
}
|
|
|
|
void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder,
|
|
Operation *computeConstructOp,
|
|
const SmallVector<Value> &newOperands) {
|
|
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
|
for (auto var : newOperands) {
|
|
auto loc{var.getLoc()};
|
|
if (auto privateOp = var.getDefiningOp<acc::PrivateOp>()) {
|
|
auto recipe = generatePrivateRecipe(
|
|
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
|
if (recipe)
|
|
privateOp.setRecipeAttr(
|
|
SymbolRefAttr::get(module->getContext(), recipe.getSymName()));
|
|
} else if (auto firstprivateOp = var.getDefiningOp<acc::FirstprivateOp>()) {
|
|
auto recipe = generateFirstprivateRecipe(
|
|
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
|
if (recipe)
|
|
firstprivateOp.setRecipeAttr(SymbolRefAttr::get(
|
|
module->getContext(), recipe.getSymName().str()));
|
|
} else {
|
|
accSupport.emitNYI(var.getLoc(), "implicit reduction");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generates the data entry data op clause so that it adheres to OpenACC
|
|
// rules as follows (line numbers and specification from OpenACC 3.4):
|
|
// 1388 An aggregate variable will be treated as if it appears either:
|
|
// 1389 - In a present clause if there is a default(present) clause visible at
|
|
// the compute construct.
|
|
// 1391 - In a copy clause otherwise.
|
|
// 1392 A scalar variable will be treated as if it appears either:
|
|
// 1393 - In a copy clause if the compute construct is a kernels construct.
|
|
// 1394 - In a firstprivate clause otherwise.
|
|
template <typename OpT>
|
|
Operation *ACCImplicitData::generateDataClauseOpForCandidate(
|
|
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses,
|
|
const std::optional<acc::ClauseDefaultValue> &defaultClause) {
|
|
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
|
acc::VariableTypeCategory typeCategory =
|
|
acc::VariableTypeCategory::uncategorized;
|
|
if (auto mappableTy = dyn_cast<acc::MappableType>(var.getType())) {
|
|
typeCategory = mappableTy.getTypeCategory(var);
|
|
} else if (auto pointerLikeTy =
|
|
dyn_cast<acc::PointerLikeType>(var.getType())) {
|
|
typeCategory = pointerLikeTy.getPointeeTypeCategory(
|
|
cast<TypedValue<acc::PointerLikeType>>(var),
|
|
pointerLikeTy.getElementType());
|
|
}
|
|
|
|
bool isScalar =
|
|
acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar);
|
|
bool isAnyAggregate = acc::bitEnumContainsAny(
|
|
typeCategory, acc::VariableTypeCategory::aggregate);
|
|
Location loc = computeConstructOp->getLoc();
|
|
|
|
Operation *op = nullptr;
|
|
op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp,
|
|
dominatingDataClauses);
|
|
if (op) {
|
|
if (isa<acc::NoCreateOp>(op))
|
|
return acc::NoCreateOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
|
|
if (isa<acc::DevicePtrOp>(op))
|
|
return acc::DevicePtrOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
|
|
// The original data clause op is a PresentOp, CopyinOp, or CreateOp,
|
|
// hence guaranteed to be present.
|
|
return acc::PresentOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
} else if (isScalar) {
|
|
if (enableImplicitReductionCopy &&
|
|
acc::isOnlyUsedByReductionClauses(var,
|
|
computeConstructOp->getRegion(0))) {
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_reduction);
|
|
return copyinOp.getOperation();
|
|
}
|
|
if constexpr (std::is_same_v<OpT, acc::KernelsOp> ||
|
|
std::is_same_v<OpT, acc::KernelEnvironmentOp>) {
|
|
// Scalars are implicit copyin in kernels construct.
|
|
// We also do the same for acc.kernel_environment because semantics
|
|
// of user variable mappings should be applied while ACC construct exists
|
|
// and at this point we should only be dealing with unmapped variables
|
|
// that were made live-in by the compiler.
|
|
// TODO: This may be revisited.
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
|
return copyinOp.getOperation();
|
|
} else {
|
|
// Scalars are implicit firstprivate in parallel and serial construct.
|
|
return acc::FirstprivateOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
}
|
|
} else if (isAnyAggregate) {
|
|
Operation *newDataOp = nullptr;
|
|
|
|
// When default(present) is true, the implicit behavior is present.
|
|
if (defaultClause.has_value() &&
|
|
defaultClause.value() == acc::ClauseDefaultValue::Present) {
|
|
newDataOp = acc::PresentOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
newDataOp->setAttr(acc::getFromDefaultClauseAttrName(),
|
|
builder.getUnitAttr());
|
|
} else {
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
|
newDataOp = copyinOp.getOperation();
|
|
}
|
|
|
|
return newDataOp;
|
|
} else {
|
|
// This is not a fatal error - for example when the element type is
|
|
// pointer type (aka we have a pointer of pointer), it is potentially a
|
|
// deep copy scenario which is not being handled here.
|
|
// Other types need to be canonicalized. Thus just log unhandled cases.
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
<< "Unhandled case for implicit data mapping " << var << "\n");
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Ensures that result values from the acc data clause ops are used inside the
|
|
// acc region. ie:
|
|
// acc.kernels {
|
|
// use %val
|
|
// }
|
|
// =>
|
|
// %dev = acc.dataop %val
|
|
// acc.kernels {
|
|
// use %dev
|
|
// }
|
|
static void legalizeValuesInRegion(Region &accRegion,
|
|
SmallVector<Value> &newPrivateOperands,
|
|
SmallVector<Value> &newDataClauseOperands) {
|
|
for (Value dataClause :
|
|
llvm::concat<Value>(newDataClauseOperands, newPrivateOperands)) {
|
|
Value var = acc::getVar(dataClause.getDefiningOp());
|
|
replaceAllUsesInRegionWith(var, dataClause, accRegion);
|
|
}
|
|
}
|
|
|
|
// Adds the private operands to the compute construct operation.
|
|
template <typename OpT>
|
|
static void addNewPrivateOperands(OpT &accOp,
|
|
const SmallVector<Value> &privateOperands) {
|
|
if (privateOperands.empty())
|
|
return;
|
|
|
|
for (auto priv : privateOperands) {
|
|
if (isa<acc::PrivateOp>(priv.getDefiningOp())) {
|
|
accOp.getPrivateOperandsMutable().append(priv);
|
|
} else if (isa<acc::FirstprivateOp>(priv.getDefiningOp())) {
|
|
accOp.getFirstprivateOperandsMutable().append(priv);
|
|
} else {
|
|
llvm_unreachable("unhandled reduction operand");
|
|
}
|
|
}
|
|
}
|
|
|
|
static Operation *findDataExitOp(Operation *dataEntryOp) {
|
|
auto res = acc::getAccVar(dataEntryOp);
|
|
for (auto *user : res.getUsers())
|
|
if (isa<ACC_DATA_EXIT_OPS>(user))
|
|
return user;
|
|
return nullptr;
|
|
}
|
|
|
|
// Generates matching data exit operation as described in the acc dialect
|
|
// for how data clauses are decomposed:
|
|
// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories
|
|
// Key ones used here:
|
|
// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after
|
|
// region)
|
|
// * acc {construct} present -> acc.present (before region) + acc.delete
|
|
// (after region)
|
|
static void
|
|
generateDataExitOperations(OpBuilder &builder, Operation *accOp,
|
|
const SmallVector<Value> &newDataClauseOperands,
|
|
const SmallVector<Value> &sortedDataClauseOperands) {
|
|
builder.setInsertionPointAfter(accOp);
|
|
Value lastDataClause = nullptr;
|
|
for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) {
|
|
if (llvm::find(newDataClauseOperands, dataEntry) ==
|
|
newDataClauseOperands.end()) {
|
|
// If this is not a new data clause operand, we should not generate an
|
|
// exit operation for it.
|
|
lastDataClause = dataEntry;
|
|
continue;
|
|
}
|
|
if (lastDataClause)
|
|
if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp()))
|
|
builder.setInsertionPointAfter(dataExitOp);
|
|
Operation *dataEntryOp = dataEntry.getDefiningOp();
|
|
if (isa<acc::CopyinOp>(dataEntryOp)) {
|
|
auto copyoutOp = acc::CopyoutOp::create(
|
|
builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp),
|
|
/*structured=*/true, /*implicit=*/true,
|
|
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
|
copyoutOp.setDataClause(acc::DataClause::acc_copy);
|
|
} else if (isa<acc::PresentOp, acc::NoCreateOp>(dataEntryOp)) {
|
|
auto deleteOp = acc::DeleteOp::create(
|
|
builder, dataEntryOp->getLoc(), dataEntry,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
|
deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value());
|
|
} else if (isa<acc::DevicePtrOp>(dataEntryOp)) {
|
|
// Do nothing.
|
|
} else {
|
|
llvm_unreachable("unhandled data exit");
|
|
}
|
|
lastDataClause = dataEntry;
|
|
}
|
|
}
|
|
|
|
/// Returns all base references of a value in order.
|
|
/// So for example, if we have a reference to a struct field like
|
|
/// s.f1.f2.f3, this will return <s, s.f1, s.f1.f2, s.f1.f2.f3>.
|
|
/// Any intermediate casts/view-like operations are included in the
|
|
/// chain as well.
|
|
static SmallVector<Value> getBaseRefsChain(Value val) {
|
|
SmallVector<Value> baseRefs;
|
|
baseRefs.push_back(val);
|
|
while (true) {
|
|
Value prevVal = val;
|
|
|
|
val = acc::getBaseEntity(val);
|
|
if (val != baseRefs.front())
|
|
baseRefs.insert(baseRefs.begin(), val);
|
|
|
|
// If this is a view-like operation, it is effectively another
|
|
// view of the same entity so we should add it to the chain also.
|
|
if (auto viewLikeOp = val.getDefiningOp<ViewLikeOpInterface>()) {
|
|
val = viewLikeOp.getViewSource();
|
|
baseRefs.insert(baseRefs.begin(), val);
|
|
}
|
|
|
|
// Continue loop if we made any progress
|
|
if (val == prevVal)
|
|
break;
|
|
}
|
|
|
|
return baseRefs;
|
|
}
|
|
|
|
static void insertInSortedOrder(SmallVector<Value> &sortedDataClauseOperands,
|
|
Operation *newClause) {
|
|
auto *insertPos =
|
|
std::find_if(sortedDataClauseOperands.begin(),
|
|
sortedDataClauseOperands.end(), [&](Value dataClauseVal) {
|
|
// Get the base refs for the current clause we are looking
|
|
// at.
|
|
auto var = acc::getVar(dataClauseVal.getDefiningOp());
|
|
auto baseRefs = getBaseRefsChain(var);
|
|
|
|
// If the newClause is of a base ref of an existing clause,
|
|
// we should insert it right before the current clause.
|
|
// Thus return true to stop iteration when this is the
|
|
// case.
|
|
return std::find(baseRefs.begin(), baseRefs.end(),
|
|
acc::getVar(newClause)) != baseRefs.end();
|
|
});
|
|
|
|
if (insertPos != sortedDataClauseOperands.end()) {
|
|
newClause->moveBefore(insertPos->getDefiningOp());
|
|
sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause));
|
|
} else {
|
|
sortedDataClauseOperands.push_back(acc::getAccVar(newClause));
|
|
}
|
|
}
|
|
|
|
template <typename OpT>
|
|
void ACCImplicitData::generateImplicitDataOps(
|
|
ModuleOp &module, OpT computeConstructOp,
|
|
std::optional<acc::ClauseDefaultValue> &defaultClause,
|
|
acc::OpenACCSupport &accSupport) {
|
|
// Implicit data attributes are only applied if "[t]here is no default(none)
|
|
// clause visible at the compute construct."
|
|
if (defaultClause.has_value() &&
|
|
defaultClause.value() == acc::ClauseDefaultValue::None)
|
|
return;
|
|
assert(!defaultClause.has_value() ||
|
|
defaultClause.value() == acc::ClauseDefaultValue::Present);
|
|
|
|
// 1) Collect live-in values.
|
|
Region &accRegion = computeConstructOp->getRegion(0);
|
|
SetVector<Value> liveInValues;
|
|
getUsedValuesDefinedAbove(accRegion, liveInValues);
|
|
|
|
// 2) Run the filtering to find relevant pointers that need copied.
|
|
auto isCandidate{[&](Value val) -> bool {
|
|
return isCandidateForImplicitData(val, accRegion, accSupport);
|
|
}};
|
|
auto candidateVars(
|
|
llvm::to_vector(llvm::make_filter_range(liveInValues, isCandidate)));
|
|
if (candidateVars.empty())
|
|
return;
|
|
|
|
// 3) Generate data clauses for the variables.
|
|
SmallVector<Value> newPrivateOperands;
|
|
SmallVector<Value> newDataClauseOperands;
|
|
OpBuilder builder(computeConstructOp);
|
|
if (!candidateVars.empty()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n"
|
|
<< computeConstructOp << "\n");
|
|
}
|
|
auto &domInfo = this->getAnalysis<DominanceInfo>();
|
|
auto &postDomInfo = this->getAnalysis<PostDominanceInfo>();
|
|
auto dominatingDataClauses =
|
|
acc::getDominatingDataClauses(computeConstructOp, domInfo, postDomInfo);
|
|
for (auto var : candidateVars) {
|
|
auto newDataClauseOp = generateDataClauseOpForCandidate(
|
|
var, module, builder, computeConstructOp, dominatingDataClauses,
|
|
defaultClause);
|
|
fillInBoundsForUnknownDimensions(newDataClauseOp, builder);
|
|
LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n"
|
|
<< "\t" << *newDataClauseOp << "\n");
|
|
if (isa_and_nonnull<acc::PrivateOp, acc::FirstprivateOp, acc::ReductionOp>(
|
|
newDataClauseOp)) {
|
|
newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp));
|
|
} else if (isa_and_nonnull<ACC_DATA_CLAUSE_OPS>(newDataClauseOp)) {
|
|
newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp));
|
|
dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp));
|
|
}
|
|
}
|
|
|
|
// 4) Legalize values in region (aka the uses in the region are the result
|
|
// of the data clause ops)
|
|
legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands);
|
|
|
|
// 5) Generate private recipes which are required for properly attaching
|
|
// private operands.
|
|
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
|
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
|
generateRecipes(module, builder, computeConstructOp, newPrivateOperands);
|
|
|
|
// 6) Figure out insertion order for the new data clause operands.
|
|
SmallVector<Value> sortedDataClauseOperands(
|
|
computeConstructOp.getDataClauseOperands());
|
|
for (auto newClause : newDataClauseOperands)
|
|
insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp());
|
|
|
|
// 7) Generate the data exit operations.
|
|
generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands,
|
|
sortedDataClauseOperands);
|
|
// 8) Add all of the new operands to the compute construct op.
|
|
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
|
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
|
addNewPrivateOperands(computeConstructOp, newPrivateOperands);
|
|
computeConstructOp.getDataClauseOperandsMutable().assign(
|
|
sortedDataClauseOperands);
|
|
}
|
|
|
|
void ACCImplicitData::runOnOperation() {
|
|
ModuleOp module = this->getOperation();
|
|
|
|
acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
|
|
|
|
module.walk([&](Operation *op) {
|
|
if (isa<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(op)) {
|
|
assert(op->getNumRegions() == 1 && "must have 1 region");
|
|
|
|
auto defaultClause = acc::getDefaultAttr(op);
|
|
llvm::TypeSwitch<Operation *, void>(op)
|
|
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
|
|
[&](auto op) {
|
|
generateImplicitDataOps(module, op, defaultClause, accSupport);
|
|
})
|
|
.Default([&](Operation *) {});
|
|
}
|
|
});
|
|
}
|
|
|
|
} // namespace
|