Add a new acc::VariableInfoAttr attribute that can be extended and implemented by language dialects to carry language specific information about variables that is not reflected into the MLIR type system and is needed in the implementation of the init/copy/destroy APIs. A new genPrivateVariableInfo API is added to the MappableTypeInterface to generate such attribute from an mlir::Value for the host variable. The use case and motivation is the Fortran OPTIONAL attribute. This patch adds a new fir::OpenACCFortranVariableInfoAtt that implements the acc::VariableInfoAttr to carry the OPTIONAL information around.
800 lines
32 KiB
C++
800 lines
32 KiB
C++
//===- ACCImplicitData.cpp ------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass implements the OpenACC specification for "Variables with
|
|
// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2).
|
|
//
|
|
// Overview:
|
|
// ---------
|
|
// The pass automatically generates data clause operations for variables used
|
|
// within OpenACC compute constructs (parallel, kernels, serial) that do not
|
|
// already have explicit data clauses. The semantics follow these rules:
|
|
//
|
|
// 1. If there is a default(none) clause visible, no implicit data actions
|
|
// apply.
|
|
//
|
|
// 2. An aggregate variable (arrays, derived types, etc.) will be treated as:
|
|
// - In a present clause when default(present) is visible.
|
|
// - In a copy clause otherwise.
|
|
//
|
|
// 3. A scalar variable will be treated as if it appears in:
|
|
// - A copy clause if the compute construct is a kernels construct.
|
|
// - A firstprivate clause otherwise (parallel, serial).
|
|
//
|
|
// Requirements:
|
|
// -------------
|
|
// To use this pass in a pipeline, the following requirements must be met:
|
|
//
|
|
// 1. Type Interface Implementation: Variables from the dialect being used
|
|
// must implement one or both of the following MLIR interfaces:
|
|
// `acc::MappableType` and/or `acc::PointerLikeType`
|
|
//
|
|
// These interfaces provide the necessary methods for the pass to:
|
|
// - Determine variable type categories (scalar vs. aggregate)
|
|
// - Generate appropriate bounds information
|
|
// - Generate privatization recipes
|
|
//
|
|
// 2. Operation Interface Implementation: Operations that access partial
|
|
// entities or create views should implement the following MLIR
|
|
// interfaces: `acc::PartialEntityAccess` and/or
|
|
// `mlir::ViewLikeOpInterface`
|
|
//
|
|
// These interfaces are used for proper data clause ordering, ensuring
|
|
// that base entities are mapped before derived entities (e.g., a
|
|
// struct is mapped before its fields, an array is mapped before
|
|
// subarray views).
|
|
//
|
|
// 3. Analysis Registration (Optional): If custom behavior is needed for
|
|
// variable name extraction or alias analysis, the dialect should
|
|
// pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses.
|
|
//
|
|
// If not registered, default behavior will be used.
|
|
//
|
|
// Implementation Details:
|
|
// -----------------------
|
|
// The pass performs the following operations:
|
|
//
|
|
// 1. Finds candidate variables which are live-in to the compute region and
|
|
// are not already in a data clause or private clause.
|
|
//
|
|
// 2. Generates both data "entry" and "exit" clause operations that match
|
|
// the intended action depending on variable type:
|
|
// - copy -> acc.copyin (entry) + acc.copyout (exit)
|
|
// - present -> acc.present (entry) + acc.delete (exit)
|
|
// - firstprivate -> acc.firstprivate (entry only, no exit)
|
|
//
|
|
// 3. Ensures that default clause is taken into consideration by looking
|
|
// through current construct and parent constructs to find the "visible
|
|
// default clause".
|
|
//
|
|
// 4. Fixes up SSA value links so that uses in the acc region reference the
|
|
// result of the newly created data clause operations.
|
|
//
|
|
// 5. When generating implicit data clause operations, it also adds variable
|
|
// name information and marks them with the implicit flag.
|
|
//
|
|
// 6. Recipes are generated by calling the appropriate entrypoints in the
|
|
// MappableType and PointerLikeType interfaces.
|
|
//
|
|
// 7. AliasAnalysis is used to determine if a variable is already covered by
|
|
// an existing data clause (e.g., an interior pointer covered by its parent).
|
|
//
|
|
// Examples:
|
|
// ---------
|
|
//
|
|
// Example 1: Scalar in parallel construct (implicit firstprivate)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
|
// acc.parallel {
|
|
// %val = memref.load %scalar[] : memref<f32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "x"} : memref<f32>
|
|
// %firstpriv = acc.firstprivate varPtr(%scalar : memref<f32>)
|
|
// -> memref<f32> {implicit = true, name = "x"}
|
|
// acc.parallel firstprivate(@recipe -> %firstpriv : memref<f32>) {
|
|
// %val = memref.load %firstpriv[] : memref<f32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// Example 2: Scalar in kernels construct (implicit copy)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
|
// acc.kernels {
|
|
// %val = memref.load %scalar[] : memref<i32>
|
|
// acc.terminator
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %scalar = memref.alloca() {acc.var_name = "n"} : memref<i32>
|
|
// %copyin = acc.copyin varPtr(%scalar : memref<i32>) -> memref<i32>
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "n"}
|
|
// acc.kernels dataOperands(%copyin : memref<i32>) {
|
|
// %val = memref.load %copyin[] : memref<i32>
|
|
// acc.terminator
|
|
// }
|
|
// acc.copyout accPtr(%copyin : memref<i32>)
|
|
// to varPtr(%scalar : memref<i32>)
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "n"}
|
|
// }
|
|
//
|
|
// Example 3: Array (aggregate) in parallel (implicit copy)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// acc.parallel {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %array[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// %copyin = acc.copyin varPtr(%array : memref<100xf32>)
|
|
// -> memref<100xf32>
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "arr"}
|
|
// acc.parallel dataOperands(%copyin : memref<100xf32>) {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %copyin[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// acc.copyout accPtr(%copyin : memref<100xf32>)
|
|
// to varPtr(%array : memref<100xf32>)
|
|
// {dataClause = #acc<data_clause acc_copy>,
|
|
// implicit = true, name = "arr"}
|
|
// }
|
|
//
|
|
// Example 4: Array with default(present)
|
|
//
|
|
// Before:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// acc.parallel {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %array[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// } attributes {defaultAttr = #acc<defaultvalue present>}
|
|
// }
|
|
//
|
|
// After:
|
|
// func.func @test() {
|
|
// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32>
|
|
// %present = acc.present varPtr(%array : memref<100xf32>)
|
|
// -> memref<100xf32>
|
|
// {implicit = true, name = "arr"}
|
|
// acc.parallel dataOperands(%present : memref<100xf32>)
|
|
// attributes {defaultAttr = #acc<defaultvalue present>} {
|
|
// %c0 = arith.constant 0 : index
|
|
// %val = memref.load %present[%c0] : memref<100xf32>
|
|
// acc.yield
|
|
// }
|
|
// acc.delete accPtr(%present : memref<100xf32>)
|
|
// {dataClause = #acc<data_clause acc_present>,
|
|
// implicit = true, name = "arr"}
|
|
// }
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
|
|
#include "llvm/ADT/SmallVectorExtras.h"
|
|
|
|
#include "mlir/Analysis/AliasAnalysis.h"
|
|
#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
|
#include "mlir/Dialect/OpenACC/OpenACCUtils.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/BuiltinOps.h"
|
|
#include "mlir/IR/Dominance.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/IR/Value.h"
|
|
#include "mlir/Interfaces/FunctionInterfaces.h"
|
|
#include "mlir/Interfaces/ViewLikeInterface.h"
|
|
#include "mlir/Transforms/RegionUtils.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/TypeSwitch.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <type_traits>
|
|
|
|
namespace mlir {
|
|
namespace acc {
|
|
#define GEN_PASS_DEF_ACCIMPLICITDATA
|
|
#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"
|
|
} // namespace acc
|
|
} // namespace mlir
|
|
|
|
#define DEBUG_TYPE "acc-implicit-data"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
|
|
class ACCImplicitData : public acc::impl::ACCImplicitDataBase<ACCImplicitData> {
|
|
public:
|
|
using acc::impl::ACCImplicitDataBase<ACCImplicitData>::ACCImplicitDataBase;
|
|
|
|
void runOnOperation() override;
|
|
|
|
private:
|
|
/// Looks through the `dominatingDataClauses` to find the original data clause
|
|
/// op for an alias. Returns nullptr if no original data clause op is found.
|
|
template <typename OpT>
|
|
Operation *getOriginalDataClauseOpForAlias(
|
|
Value var, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses);
|
|
|
|
/// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`,
|
|
/// etc. data clause op for a candidate variable.
|
|
template <typename OpT>
|
|
Operation *generateDataClauseOpForCandidate(
|
|
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses,
|
|
const std::optional<acc::ClauseDefaultValue> &defaultClause);
|
|
|
|
/// Generates the implicit data ops for a compute construct.
|
|
template <typename OpT>
|
|
void
|
|
generateImplicitDataOps(ModuleOp &module, OpT computeConstructOp,
|
|
std::optional<acc::ClauseDefaultValue> &defaultClause,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates a private recipe for a variable.
|
|
acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates a firstprivate recipe for a variable.
|
|
acc::FirstprivateRecipeOp
|
|
generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc,
|
|
OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport);
|
|
|
|
/// Generates recipes for a list of variables.
|
|
void generateRecipes(ModuleOp &module, OpBuilder &builder,
|
|
Operation *computeConstructOp,
|
|
const SmallVector<Value> &newOperands);
|
|
};
|
|
|
|
/// Determines if a variable is a candidate for implicit data mapping.
|
|
/// Returns true if the variable is a candidate, false otherwise.
|
|
static bool isCandidateForImplicitData(Value val, Region &accRegion,
|
|
acc::OpenACCSupport &accSupport) {
|
|
// Ensure the variable is an allowed type for data clause.
|
|
if (!acc::isPointerLikeType(val.getType()) &&
|
|
!acc::isMappableType(val.getType()))
|
|
return false;
|
|
|
|
// If this is already coming from a data clause, we do not need to generate
|
|
// another.
|
|
if (isa_and_nonnull<ACC_DATA_ENTRY_OPS>(val.getDefiningOp()))
|
|
return false;
|
|
|
|
// Device data is a candidate - it will get a deviceptr clause.
|
|
if (acc::isDeviceValue(val))
|
|
return true;
|
|
|
|
// If it is otherwise valid, skip it.
|
|
if (accSupport.isValidValueUse(val, accRegion))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
template <typename OpT>
|
|
Operation *ACCImplicitData::getOriginalDataClauseOpForAlias(
|
|
Value var, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses) {
|
|
auto &aliasAnalysis = this->getAnalysis<AliasAnalysis>();
|
|
for (auto dataClause : dominatingDataClauses) {
|
|
if (auto *dataClauseOp = dataClause.getDefiningOp()) {
|
|
// Only accept clauses that guarantee that the alias is present.
|
|
if (isa<acc::CopyinOp, acc::CreateOp, acc::PresentOp, acc::NoCreateOp,
|
|
acc::DevicePtrOp>(dataClauseOp))
|
|
if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust())
|
|
return dataClauseOp;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Generates bounds for variables that have unknown dimensions
|
|
static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp,
|
|
OpBuilder &builder) {
|
|
|
|
if (!acc::getBounds(dataClauseOp).empty())
|
|
// If bounds are already present, do not overwrite them.
|
|
return;
|
|
|
|
// For types that have unknown dimensions, attempt to generate bounds by
|
|
// relying on MappableType being able to extract it from the IR.
|
|
auto var = acc::getVar(dataClauseOp);
|
|
auto type = var.getType();
|
|
if (auto mappableTy = dyn_cast<acc::MappableType>(type)) {
|
|
if (mappableTy.hasUnknownDimensions()) {
|
|
TypeSwitch<Operation *>(dataClauseOp)
|
|
.Case<ACC_DATA_ENTRY_OPS, ACC_DATA_EXIT_OPS>([&](auto dataClauseOp) {
|
|
if (std::is_same_v<decltype(dataClauseOp), acc::DevicePtrOp>)
|
|
return;
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPoint(dataClauseOp);
|
|
auto bounds = mappableTy.generateAccBounds(var, builder);
|
|
if (!bounds.empty())
|
|
dataClauseOp.getBoundsMutable().assign(bounds);
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
acc::PrivateRecipeOp
|
|
ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport) {
|
|
auto type = var.getType();
|
|
std::string recipeName =
|
|
accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var);
|
|
|
|
// Check if recipe already exists
|
|
auto existingRecipe = module.lookupSymbol<acc::PrivateRecipeOp>(recipeName);
|
|
if (existingRecipe)
|
|
return existingRecipe;
|
|
|
|
// Set insertion point to module body in a scoped way
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPointToStart(module.getBody());
|
|
|
|
auto recipe =
|
|
acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, var);
|
|
if (!recipe.has_value())
|
|
return accSupport.emitNYI(loc, "implicit private"), nullptr;
|
|
return recipe.value();
|
|
}
|
|
|
|
acc::FirstprivateRecipeOp
|
|
ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var,
|
|
Location loc, OpBuilder &builder,
|
|
acc::OpenACCSupport &accSupport) {
|
|
auto type = var.getType();
|
|
std::string recipeName =
|
|
accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var);
|
|
|
|
// Check if recipe already exists
|
|
auto existingRecipe =
|
|
module.lookupSymbol<acc::FirstprivateRecipeOp>(recipeName);
|
|
if (existingRecipe)
|
|
return existingRecipe;
|
|
|
|
// Set insertion point to module body in a scoped way
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
builder.setInsertionPointToStart(module.getBody());
|
|
|
|
auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc,
|
|
recipeName, var);
|
|
if (!recipe.has_value())
|
|
return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr;
|
|
return recipe.value();
|
|
}
|
|
|
|
void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder,
|
|
Operation *computeConstructOp,
|
|
const SmallVector<Value> &newOperands) {
|
|
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
|
for (auto var : newOperands) {
|
|
auto loc{var.getLoc()};
|
|
if (auto privateOp = var.getDefiningOp<acc::PrivateOp>()) {
|
|
auto recipe = generatePrivateRecipe(
|
|
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
|
if (recipe)
|
|
privateOp.setRecipeAttr(
|
|
SymbolRefAttr::get(module->getContext(), recipe.getSymName()));
|
|
} else if (auto firstprivateOp = var.getDefiningOp<acc::FirstprivateOp>()) {
|
|
auto recipe = generateFirstprivateRecipe(
|
|
module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport);
|
|
if (recipe)
|
|
firstprivateOp.setRecipeAttr(SymbolRefAttr::get(
|
|
module->getContext(), recipe.getSymName().str()));
|
|
} else {
|
|
accSupport.emitNYI(var.getLoc(), "implicit reduction");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Generates the data entry data op clause so that it adheres to OpenACC
|
|
// rules as follows (line numbers and specification from OpenACC 3.4):
|
|
// 1388 An aggregate variable will be treated as if it appears either:
|
|
// 1389 - In a present clause if there is a default(present) clause visible at
|
|
// the compute construct.
|
|
// 1391 - In a copy clause otherwise.
|
|
// 1392 A scalar variable will be treated as if it appears either:
|
|
// 1393 - In a copy clause if the compute construct is a kernels construct.
|
|
// 1394 - In a firstprivate clause otherwise.
|
|
template <typename OpT>
|
|
Operation *ACCImplicitData::generateDataClauseOpForCandidate(
|
|
Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp,
|
|
const SmallVector<Value> &dominatingDataClauses,
|
|
const std::optional<acc::ClauseDefaultValue> &defaultClause) {
|
|
auto &accSupport = this->getAnalysis<acc::OpenACCSupport>();
|
|
acc::VariableTypeCategory typeCategory =
|
|
acc::VariableTypeCategory::uncategorized;
|
|
if (auto mappableTy = dyn_cast<acc::MappableType>(var.getType())) {
|
|
typeCategory = mappableTy.getTypeCategory(var);
|
|
} else if (auto pointerLikeTy =
|
|
dyn_cast<acc::PointerLikeType>(var.getType())) {
|
|
typeCategory = pointerLikeTy.getPointeeTypeCategory(
|
|
cast<TypedValue<acc::PointerLikeType>>(var),
|
|
pointerLikeTy.getElementType());
|
|
}
|
|
|
|
bool isScalar =
|
|
acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar);
|
|
bool isAnyAggregate = acc::bitEnumContainsAny(
|
|
typeCategory, acc::VariableTypeCategory::aggregate);
|
|
Location loc = computeConstructOp->getLoc();
|
|
|
|
Operation *op = nullptr;
|
|
op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp,
|
|
dominatingDataClauses);
|
|
if (op) {
|
|
if (isa<acc::NoCreateOp>(op))
|
|
return acc::NoCreateOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
|
|
if (isa<acc::DevicePtrOp>(op))
|
|
return acc::DevicePtrOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
|
|
// The original data clause op is a PresentOp, CopyinOp, or CreateOp,
|
|
// hence guaranteed to be present.
|
|
return acc::PresentOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var),
|
|
acc::getBounds(op));
|
|
}
|
|
|
|
if (acc::isDeviceValue(var)) {
|
|
// If the variable is device data, use deviceptr clause.
|
|
return acc::DevicePtrOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
}
|
|
|
|
if (isScalar) {
|
|
if (enableImplicitReductionCopy &&
|
|
acc::isOnlyUsedByReductionClauses(var,
|
|
computeConstructOp->getRegion(0))) {
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_reduction);
|
|
return copyinOp.getOperation();
|
|
}
|
|
if constexpr (std::is_same_v<OpT, acc::KernelsOp> ||
|
|
std::is_same_v<OpT, acc::KernelEnvironmentOp>) {
|
|
// Scalars are implicit copyin in kernels construct.
|
|
// We also do the same for acc.kernel_environment because semantics
|
|
// of user variable mappings should be applied while ACC construct exists
|
|
// and at this point we should only be dealing with unmapped variables
|
|
// that were made live-in by the compiler.
|
|
// TODO: This may be revisited.
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
|
return copyinOp.getOperation();
|
|
} else {
|
|
// Scalars are implicit firstprivate in parallel and serial construct.
|
|
return acc::FirstprivateOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
}
|
|
} else if (isAnyAggregate) {
|
|
Operation *newDataOp = nullptr;
|
|
|
|
// When default(present) is true, the implicit behavior is present.
|
|
if (defaultClause.has_value() &&
|
|
defaultClause.value() == acc::ClauseDefaultValue::Present) {
|
|
newDataOp = acc::PresentOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
newDataOp->setAttr(acc::getFromDefaultClauseAttrName(),
|
|
builder.getUnitAttr());
|
|
} else {
|
|
auto copyinOp =
|
|
acc::CopyinOp::create(builder, loc, var,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
accSupport.getVariableName(var));
|
|
copyinOp.setDataClause(acc::DataClause::acc_copy);
|
|
newDataOp = copyinOp.getOperation();
|
|
}
|
|
|
|
return newDataOp;
|
|
} else {
|
|
// This is not a fatal error - for example when the element type is
|
|
// pointer type (aka we have a pointer of pointer), it is potentially a
|
|
// deep copy scenario which is not being handled here.
|
|
// Other types need to be canonicalized. Thus just log unhandled cases.
|
|
LLVM_DEBUG(llvm::dbgs()
|
|
<< "Unhandled case for implicit data mapping " << var << "\n");
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// Ensures that result values from the acc data clause ops are used inside the
|
|
// acc region. ie:
|
|
// acc.kernels {
|
|
// use %val
|
|
// }
|
|
// =>
|
|
// %dev = acc.dataop %val
|
|
// acc.kernels {
|
|
// use %dev
|
|
// }
|
|
static void legalizeValuesInRegion(Region &accRegion,
|
|
SmallVector<Value> &newPrivateOperands,
|
|
SmallVector<Value> &newDataClauseOperands) {
|
|
for (Value dataClause :
|
|
llvm::concat<Value>(newDataClauseOperands, newPrivateOperands)) {
|
|
Value var = acc::getVar(dataClause.getDefiningOp());
|
|
replaceAllUsesInRegionWith(var, dataClause, accRegion);
|
|
}
|
|
}
|
|
|
|
// Adds the private operands to the compute construct operation.
|
|
template <typename OpT>
|
|
static void addNewPrivateOperands(OpT &accOp,
|
|
const SmallVector<Value> &privateOperands) {
|
|
if (privateOperands.empty())
|
|
return;
|
|
|
|
for (auto priv : privateOperands) {
|
|
if (isa<acc::PrivateOp>(priv.getDefiningOp())) {
|
|
accOp.getPrivateOperandsMutable().append(priv);
|
|
} else if (isa<acc::FirstprivateOp>(priv.getDefiningOp())) {
|
|
accOp.getFirstprivateOperandsMutable().append(priv);
|
|
} else {
|
|
llvm_unreachable("unhandled reduction operand");
|
|
}
|
|
}
|
|
}
|
|
|
|
static Operation *findDataExitOp(Operation *dataEntryOp) {
|
|
auto res = acc::getAccVar(dataEntryOp);
|
|
for (auto *user : res.getUsers())
|
|
if (isa<ACC_DATA_EXIT_OPS>(user))
|
|
return user;
|
|
return nullptr;
|
|
}
|
|
|
|
// Generates matching data exit operation as described in the acc dialect
|
|
// for how data clauses are decomposed:
|
|
// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories
|
|
// Key ones used here:
|
|
// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after
|
|
// region)
|
|
// * acc {construct} present -> acc.present (before region) + acc.delete
|
|
// (after region)
|
|
static void
|
|
generateDataExitOperations(OpBuilder &builder, Operation *accOp,
|
|
const SmallVector<Value> &newDataClauseOperands,
|
|
const SmallVector<Value> &sortedDataClauseOperands) {
|
|
builder.setInsertionPointAfter(accOp);
|
|
Value lastDataClause = nullptr;
|
|
for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) {
|
|
if (llvm::find(newDataClauseOperands, dataEntry) ==
|
|
newDataClauseOperands.end()) {
|
|
// If this is not a new data clause operand, we should not generate an
|
|
// exit operation for it.
|
|
lastDataClause = dataEntry;
|
|
continue;
|
|
}
|
|
if (lastDataClause)
|
|
if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp()))
|
|
builder.setInsertionPointAfter(dataExitOp);
|
|
Operation *dataEntryOp = dataEntry.getDefiningOp();
|
|
if (isa<acc::CopyinOp>(dataEntryOp)) {
|
|
auto copyoutOp = acc::CopyoutOp::create(
|
|
builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp),
|
|
/*structured=*/true, /*implicit=*/true,
|
|
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
|
copyoutOp.setDataClause(acc::DataClause::acc_copy);
|
|
} else if (isa<acc::PresentOp, acc::NoCreateOp>(dataEntryOp)) {
|
|
auto deleteOp = acc::DeleteOp::create(
|
|
builder, dataEntryOp->getLoc(), dataEntry,
|
|
/*structured=*/true, /*implicit=*/true,
|
|
acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp));
|
|
deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value());
|
|
} else if (isa<acc::DevicePtrOp>(dataEntryOp)) {
|
|
// Do nothing.
|
|
} else {
|
|
llvm_unreachable("unhandled data exit");
|
|
}
|
|
lastDataClause = dataEntry;
|
|
}
|
|
}
|
|
|
|
/// Returns all base references of a value in order.
|
|
/// So for example, if we have a reference to a struct field like
|
|
/// s.f1.f2.f3, this will return <s, s.f1, s.f1.f2, s.f1.f2.f3>.
|
|
/// Any intermediate casts/view-like operations are included in the
|
|
/// chain as well.
|
|
static SmallVector<Value> getBaseRefsChain(Value val) {
|
|
SmallVector<Value> baseRefs;
|
|
baseRefs.push_back(val);
|
|
while (true) {
|
|
Value prevVal = val;
|
|
|
|
val = acc::getBaseEntity(val);
|
|
if (val != baseRefs.front())
|
|
baseRefs.insert(baseRefs.begin(), val);
|
|
|
|
// If this is a view-like operation, it is effectively another
|
|
// view of the same entity so we should add it to the chain also.
|
|
if (auto viewLikeOp = val.getDefiningOp<ViewLikeOpInterface>()) {
|
|
val = viewLikeOp.getViewSource();
|
|
baseRefs.insert(baseRefs.begin(), val);
|
|
}
|
|
|
|
// Continue loop if we made any progress
|
|
if (val == prevVal)
|
|
break;
|
|
}
|
|
|
|
return baseRefs;
|
|
}
|
|
|
|
static void insertInSortedOrder(SmallVector<Value> &sortedDataClauseOperands,
|
|
Operation *newClause) {
|
|
auto *insertPos =
|
|
std::find_if(sortedDataClauseOperands.begin(),
|
|
sortedDataClauseOperands.end(), [&](Value dataClauseVal) {
|
|
// Get the base refs for the current clause we are looking
|
|
// at.
|
|
auto var = acc::getVar(dataClauseVal.getDefiningOp());
|
|
auto baseRefs = getBaseRefsChain(var);
|
|
|
|
// If the newClause is of a base ref of an existing clause,
|
|
// we should insert it right before the current clause.
|
|
// Thus return true to stop iteration when this is the
|
|
// case.
|
|
return std::find(baseRefs.begin(), baseRefs.end(),
|
|
acc::getVar(newClause)) != baseRefs.end();
|
|
});
|
|
|
|
if (insertPos != sortedDataClauseOperands.end()) {
|
|
newClause->moveBefore(insertPos->getDefiningOp());
|
|
sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause));
|
|
} else {
|
|
sortedDataClauseOperands.push_back(acc::getAccVar(newClause));
|
|
}
|
|
}
|
|
|
|
template <typename OpT>
|
|
void ACCImplicitData::generateImplicitDataOps(
|
|
ModuleOp &module, OpT computeConstructOp,
|
|
std::optional<acc::ClauseDefaultValue> &defaultClause,
|
|
acc::OpenACCSupport &accSupport) {
|
|
// Implicit data attributes are only applied if "[t]here is no default(none)
|
|
// clause visible at the compute construct."
|
|
if (defaultClause.has_value() &&
|
|
defaultClause.value() == acc::ClauseDefaultValue::None)
|
|
return;
|
|
assert(!defaultClause.has_value() ||
|
|
defaultClause.value() == acc::ClauseDefaultValue::Present);
|
|
|
|
// 1) Collect live-in values.
|
|
Region &accRegion = computeConstructOp->getRegion(0);
|
|
SetVector<Value> liveInValues;
|
|
getUsedValuesDefinedAbove(accRegion, liveInValues);
|
|
|
|
// 2) Run the filtering to find relevant pointers that need copied.
|
|
auto isCandidate{[&](Value val) -> bool {
|
|
return isCandidateForImplicitData(val, accRegion, accSupport);
|
|
}};
|
|
auto candidateVars(llvm::filter_to_vector(liveInValues, isCandidate));
|
|
if (candidateVars.empty())
|
|
return;
|
|
|
|
// 3) Generate data clauses for the variables.
|
|
SmallVector<Value> newPrivateOperands;
|
|
SmallVector<Value> newDataClauseOperands;
|
|
OpBuilder builder(computeConstructOp);
|
|
if (!candidateVars.empty()) {
|
|
LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n"
|
|
<< computeConstructOp << "\n");
|
|
}
|
|
auto &domInfo = this->getAnalysis<DominanceInfo>();
|
|
auto &postDomInfo = this->getAnalysis<PostDominanceInfo>();
|
|
auto dominatingDataClauses =
|
|
acc::getDominatingDataClauses(computeConstructOp, domInfo, postDomInfo);
|
|
for (auto var : candidateVars) {
|
|
auto newDataClauseOp = generateDataClauseOpForCandidate(
|
|
var, module, builder, computeConstructOp, dominatingDataClauses,
|
|
defaultClause);
|
|
fillInBoundsForUnknownDimensions(newDataClauseOp, builder);
|
|
LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n"
|
|
<< "\t" << *newDataClauseOp << "\n");
|
|
if (isa_and_nonnull<acc::PrivateOp, acc::FirstprivateOp, acc::ReductionOp>(
|
|
newDataClauseOp)) {
|
|
newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp));
|
|
} else if (isa_and_nonnull<ACC_DATA_CLAUSE_OPS>(newDataClauseOp)) {
|
|
newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp));
|
|
dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp));
|
|
}
|
|
}
|
|
|
|
// 4) Legalize values in region (aka the uses in the region are the result
|
|
// of the data clause ops)
|
|
legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands);
|
|
|
|
// 5) Generate private recipes which are required for properly attaching
|
|
// private operands.
|
|
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
|
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
|
generateRecipes(module, builder, computeConstructOp, newPrivateOperands);
|
|
|
|
// 6) Figure out insertion order for the new data clause operands.
|
|
SmallVector<Value> sortedDataClauseOperands(
|
|
computeConstructOp.getDataClauseOperands());
|
|
for (auto newClause : newDataClauseOperands)
|
|
insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp());
|
|
|
|
// 7) Generate the data exit operations.
|
|
generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands,
|
|
sortedDataClauseOperands);
|
|
// 8) Add all of the new operands to the compute construct op.
|
|
if constexpr (!std::is_same_v<OpT, acc::KernelsOp> &&
|
|
!std::is_same_v<OpT, acc::KernelEnvironmentOp>)
|
|
addNewPrivateOperands(computeConstructOp, newPrivateOperands);
|
|
computeConstructOp.getDataClauseOperandsMutable().assign(
|
|
sortedDataClauseOperands);
|
|
}
|
|
|
|
void ACCImplicitData::runOnOperation() {
|
|
ModuleOp module = this->getOperation();
|
|
|
|
acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();
|
|
|
|
module.walk([&](Operation *op) {
|
|
if (isa<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(op)) {
|
|
assert(op->getNumRegions() == 1 && "must have 1 region");
|
|
|
|
auto defaultClause = acc::getDefaultAttr(op);
|
|
llvm::TypeSwitch<Operation *, void>(op)
|
|
.Case<ACC_COMPUTE_CONSTRUCT_OPS, acc::KernelEnvironmentOp>(
|
|
[&](auto op) {
|
|
generateImplicitDataOps(module, op, defaultClause, accSupport);
|
|
})
|
|
.Default([&](Operation *) {});
|
|
}
|
|
});
|
|
}
|
|
|
|
} // namespace
|