This fixes strict weak ordering checks violations from #155348 when running these two tests: mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare.mlir mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare-by-value.mlir Sample error: /stable/src/libcxx/include/__debug_utils/strict_weak_ordering_check.h:50: libc++ Hardening assertion !__comp(*__first + __a), *(__first + __b)) failed: Your comparator is not a valid strict-weak ordering This is because (x < x) should be false, not true, to meet the irreflexibility property. (Note that .dominates(x, x) returns true.) I'm afraid that even after this commit we can't guarantee a strict weak ordering, because we can't guarantee transitivity of equivalence by sorting with a strict dominance function. However the tests are not failing anymore, and I am not at all familiar with this code so I will leave this concern up to the original author for consideration. (Ideas without any further context: I would consider a topological sort or walking a dominator tree.) Reference on std::sort and strict weak ordering: https://danlark.org/2022/04/20/changing-stdsort-at-googles-scale-and-beyond/
448 lines
19 KiB
C++
448 lines
19 KiB
C++
//===- OpenMPOffloadPrivatizationPrepare.cpp - Prepare OMP privatization --===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Analysis/SliceAnalysis.h"
|
|
#include "mlir/Dialect/LLVMIR/FunctionCallUtils.h"
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
|
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/Dominance.h"
|
|
#include "mlir/IR/IRMapping.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Support/LLVM.h"
|
|
#include "llvm/Support/DebugLog.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include <cstdint>
|
|
#include <iterator>
|
|
#include <utility>
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// A pass that prepares OpenMP code for translation of delayed privatization
|
|
// in the context of deferred target tasks. Deferred target tasks are created
|
|
// when the nowait clause is used on the target directive.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "omp-prepare-for-offload-privatization"
|
|
|
|
namespace mlir {
|
|
namespace omp {
|
|
|
|
#define GEN_PASS_DEF_PREPAREFOROMPOFFLOADPRIVATIZATIONPASS
|
|
#include "mlir/Dialect/OpenMP/Transforms/Passes.h.inc"
|
|
|
|
} // namespace omp
|
|
} // namespace mlir
|
|
|
|
using namespace mlir;
|
|
namespace {
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// PrepareForOMPOffloadPrivatizationPass
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
class PrepareForOMPOffloadPrivatizationPass
|
|
: public omp::impl::PrepareForOMPOffloadPrivatizationPassBase<
|
|
PrepareForOMPOffloadPrivatizationPass> {
|
|
|
|
void runOnOperation() override {
|
|
ModuleOp mod = getOperation();
|
|
|
|
// In this pass, we make host-allocated privatized variables persist for
|
|
// deferred target tasks by copying them to the heap. Once the target task
|
|
// is done, this heap memory is freed. Since all of this happens on the host
|
|
// we can skip device modules.
|
|
auto offloadModuleInterface =
|
|
dyn_cast<omp::OffloadModuleInterface>(mod.getOperation());
|
|
if (offloadModuleInterface && offloadModuleInterface.getIsTargetDevice())
|
|
return;
|
|
|
|
getOperation()->walk([&](omp::TargetOp targetOp) {
|
|
if (!hasPrivateVars(targetOp) || !isTargetTaskDeferred(targetOp))
|
|
return;
|
|
IRRewriter rewriter(&getContext());
|
|
OperandRange privateVars = targetOp.getPrivateVars();
|
|
SmallVector<mlir::Value> newPrivVars;
|
|
Value fakeDependVar;
|
|
omp::TaskOp cleanupTaskOp;
|
|
|
|
newPrivVars.reserve(privateVars.size());
|
|
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
|
|
for (auto [privVarIdx, privVarSymPair] :
|
|
llvm::enumerate(llvm::zip_equal(privateVars, *privateSyms))) {
|
|
Value privVar = std::get<0>(privVarSymPair);
|
|
Attribute privSym = std::get<1>(privVarSymPair);
|
|
|
|
omp::PrivateClauseOp privatizer = findPrivatizer(targetOp, privSym);
|
|
if (!privatizer.needsMap()) {
|
|
newPrivVars.push_back(privVar);
|
|
continue;
|
|
}
|
|
bool isFirstPrivate = privatizer.getDataSharingType() ==
|
|
omp::DataSharingClauseType::FirstPrivate;
|
|
|
|
Value mappedValue = targetOp.getMappedValueForPrivateVar(privVarIdx);
|
|
auto mapInfoOp = cast<omp::MapInfoOp>(mappedValue.getDefiningOp());
|
|
|
|
if (mapInfoOp.getMapCaptureType() == omp::VariableCaptureKind::ByCopy) {
|
|
newPrivVars.push_back(privVar);
|
|
continue;
|
|
}
|
|
|
|
// For deferred target tasks (!$omp target nowait), we need to keep
|
|
// a copy of the original, i.e. host variable being privatized so
|
|
// that it is available when the target task is eventually executed.
|
|
// We do this by first allocating as much heap memory as is needed by
|
|
// the original variable. Then, we use the init and copy regions of the
|
|
// privatizer, an instance of omp::PrivateClauseOp to set up the heap-
|
|
// allocated copy.
|
|
// After the target task is done, we need to use the dealloc region
|
|
// of the privatizer to clean up everything. We also need to free
|
|
// the heap memory we allocated. But due to the deferred nature
|
|
// of the target task, we cannot simply deallocate right after the
|
|
// omp.target operation else we may end up freeing memory before
|
|
// its eventual use by the target task. So, we create a dummy
|
|
// dependence between the target task and new omp.task. In the omp.task,
|
|
// we do all the cleanup. So, we end up with the following structure
|
|
//
|
|
// omp.target map_entries(..) ... nowait depend(out:fakeDependVar) {
|
|
// ...
|
|
// omp.terminator
|
|
// }
|
|
// omp.task depend(in: fakeDependVar) {
|
|
// /*cleanup_code*/
|
|
// omp.terminator
|
|
// }
|
|
// fakeDependVar is the address of the first heap-allocated copy of the
|
|
// host variable being privatized.
|
|
|
|
bool needsCleanupTask = !privatizer.getDeallocRegion().empty();
|
|
|
|
// Allocate heap memory that corresponds to the type of memory
|
|
// pointed to by varPtr
|
|
// For boxchars this won't be a pointer. But, MapsForPrivatizedSymbols
|
|
// should have mapped the pointer to the boxchar so use that as varPtr.
|
|
Value varPtr = mapInfoOp.getVarPtr();
|
|
Type varType = mapInfoOp.getVarType();
|
|
bool isPrivatizedByValue =
|
|
!isa<LLVM::LLVMPointerType>(privVar.getType());
|
|
|
|
assert(isa<LLVM::LLVMPointerType>(varPtr.getType()));
|
|
Value heapMem =
|
|
allocateHeapMem(targetOp, varPtr, varType, mod, rewriter);
|
|
if (!heapMem)
|
|
targetOp.emitError(
|
|
"Unable to allocate heap memory when trying to move "
|
|
"a private variable out of the stack and into the "
|
|
"heap for use by a deferred target task");
|
|
|
|
if (needsCleanupTask && !fakeDependVar)
|
|
fakeDependVar = heapMem;
|
|
|
|
// The types of private vars should match before and after the
|
|
// transformation. In particular, if the type is a pointer,
|
|
// simply record the newly allocated malloc location as the
|
|
// new private variable. If, however, the type is not a pointer
|
|
// then, we need to load the value from the newly allocated
|
|
// location. We'll insert that load later after we have updated
|
|
// the malloc'd location with the contents of the original
|
|
// variable.
|
|
if (!isPrivatizedByValue)
|
|
newPrivVars.push_back(heapMem);
|
|
|
|
// We now need to copy the original private variable into the newly
|
|
// allocated location in the heap.
|
|
// Find the earliest insertion point for the copy. This will be before
|
|
// the first in the list of omp::MapInfoOp instances that use varPtr.
|
|
// After the copy these omp::MapInfoOp instances will refer to heapMem
|
|
// instead.
|
|
Operation *varPtrDefiningOp = varPtr.getDefiningOp();
|
|
DenseSet<Operation *> users;
|
|
if (varPtrDefiningOp) {
|
|
users.insert(varPtrDefiningOp->user_begin(),
|
|
varPtrDefiningOp->user_end());
|
|
} else {
|
|
auto blockArg = cast<BlockArgument>(varPtr);
|
|
users.insert(blockArg.user_begin(), blockArg.user_end());
|
|
}
|
|
auto usesVarPtr = [&users](Operation *op) -> bool {
|
|
return users.count(op);
|
|
};
|
|
|
|
SmallVector<Operation *> chainOfOps;
|
|
chainOfOps.push_back(mapInfoOp);
|
|
for (auto member : mapInfoOp.getMembers()) {
|
|
omp::MapInfoOp memberMap =
|
|
cast<omp::MapInfoOp>(member.getDefiningOp());
|
|
if (usesVarPtr(memberMap))
|
|
chainOfOps.push_back(memberMap);
|
|
if (memberMap.getVarPtrPtr()) {
|
|
Operation *defOp = memberMap.getVarPtrPtr().getDefiningOp();
|
|
if (defOp && usesVarPtr(defOp))
|
|
chainOfOps.push_back(defOp);
|
|
}
|
|
}
|
|
|
|
DominanceInfo dom;
|
|
llvm::sort(chainOfOps, [&](Operation *l, Operation *r) {
|
|
if (l == r)
|
|
return false;
|
|
return dom.properlyDominates(l, r);
|
|
});
|
|
|
|
rewriter.setInsertionPoint(chainOfOps.front());
|
|
|
|
Operation *firstOp = chainOfOps.front();
|
|
Location loc = firstOp->getLoc();
|
|
|
|
// Create a llvm.func for 'region' that is marked always_inline and call
|
|
// it.
|
|
auto createAlwaysInlineFuncAndCallIt =
|
|
[&](Region ®ion, llvm::StringRef funcName,
|
|
llvm::ArrayRef<Value> args, bool returnsValue) -> Value {
|
|
assert(!region.empty() && "region cannot be empty");
|
|
LLVM::LLVMFuncOp func = createFuncOpForRegion(
|
|
loc, mod, region, funcName, rewriter, returnsValue);
|
|
auto call = LLVM::CallOp::create(rewriter, loc, func, args);
|
|
return call.getResult();
|
|
};
|
|
|
|
Value moldArg, newArg;
|
|
if (isPrivatizedByValue) {
|
|
moldArg = LLVM::LoadOp::create(rewriter, loc, varType, varPtr);
|
|
newArg = LLVM::LoadOp::create(rewriter, loc, varType, heapMem);
|
|
} else {
|
|
moldArg = varPtr;
|
|
newArg = heapMem;
|
|
}
|
|
|
|
Value initializedVal;
|
|
if (!privatizer.getInitRegion().empty())
|
|
initializedVal = createAlwaysInlineFuncAndCallIt(
|
|
privatizer.getInitRegion(),
|
|
llvm::formatv("{0}_{1}", privatizer.getSymName(), "init").str(),
|
|
{moldArg, newArg}, /*returnsValue=*/true);
|
|
else
|
|
initializedVal = newArg;
|
|
|
|
if (isFirstPrivate && !privatizer.getCopyRegion().empty())
|
|
initializedVal = createAlwaysInlineFuncAndCallIt(
|
|
privatizer.getCopyRegion(),
|
|
llvm::formatv("{0}_{1}", privatizer.getSymName(), "copy").str(),
|
|
{moldArg, initializedVal}, /*returnsValue=*/true);
|
|
|
|
if (isPrivatizedByValue)
|
|
(void)LLVM::StoreOp::create(rewriter, loc, initializedVal, heapMem);
|
|
|
|
// clone origOp, replace all uses of varPtr with heapMem and
|
|
// erase origOp.
|
|
auto cloneModifyAndErase = [&](Operation *origOp) -> Operation * {
|
|
Operation *clonedOp = rewriter.clone(*origOp);
|
|
rewriter.replaceAllOpUsesWith(origOp, clonedOp);
|
|
rewriter.modifyOpInPlace(clonedOp, [&]() {
|
|
clonedOp->replaceUsesOfWith(varPtr, heapMem);
|
|
});
|
|
rewriter.eraseOp(origOp);
|
|
return clonedOp;
|
|
};
|
|
|
|
// Now that we have set up the heap-allocated copy of the private
|
|
// variable, rewrite all the uses of the original variable with
|
|
// the heap-allocated variable.
|
|
rewriter.setInsertionPoint(targetOp);
|
|
mapInfoOp = cast<omp::MapInfoOp>(cloneModifyAndErase(mapInfoOp));
|
|
rewriter.setInsertionPoint(mapInfoOp);
|
|
|
|
// Fix any members that may use varPtr to now use heapMem
|
|
for (auto member : mapInfoOp.getMembers()) {
|
|
auto memberMapInfoOp = cast<omp::MapInfoOp>(member.getDefiningOp());
|
|
if (!usesVarPtr(memberMapInfoOp))
|
|
continue;
|
|
memberMapInfoOp =
|
|
cast<omp::MapInfoOp>(cloneModifyAndErase(memberMapInfoOp));
|
|
rewriter.setInsertionPoint(memberMapInfoOp);
|
|
|
|
if (memberMapInfoOp.getVarPtrPtr()) {
|
|
Operation *varPtrPtrdefOp =
|
|
memberMapInfoOp.getVarPtrPtr().getDefiningOp();
|
|
rewriter.setInsertionPoint(cloneModifyAndErase(varPtrPtrdefOp));
|
|
}
|
|
}
|
|
|
|
// If the type of the private variable is not a pointer,
|
|
// which is typically the case with !fir.boxchar types, then
|
|
// we need to ensure that the new private variable is also
|
|
// not a pointer. Insert a load from heapMem right before
|
|
// targetOp.
|
|
if (isPrivatizedByValue) {
|
|
rewriter.setInsertionPoint(targetOp);
|
|
auto newPrivVar = LLVM::LoadOp::create(rewriter, mapInfoOp.getLoc(),
|
|
varType, heapMem);
|
|
newPrivVars.push_back(newPrivVar);
|
|
}
|
|
|
|
// Deallocate
|
|
if (needsCleanupTask) {
|
|
if (!cleanupTaskOp) {
|
|
assert(fakeDependVar &&
|
|
"Need a valid value to set up a dependency");
|
|
rewriter.setInsertionPointAfter(targetOp);
|
|
omp::TaskOperands taskOperands;
|
|
auto inDepend = omp::ClauseTaskDependAttr::get(
|
|
rewriter.getContext(), omp::ClauseTaskDepend::taskdependin);
|
|
taskOperands.dependKinds.push_back(inDepend);
|
|
taskOperands.dependVars.push_back(fakeDependVar);
|
|
cleanupTaskOp = omp::TaskOp::create(rewriter, loc, taskOperands);
|
|
Block *taskBlock = rewriter.createBlock(&cleanupTaskOp.getRegion());
|
|
rewriter.setInsertionPointToEnd(taskBlock);
|
|
omp::TerminatorOp::create(rewriter, cleanupTaskOp.getLoc());
|
|
}
|
|
rewriter.setInsertionPointToStart(
|
|
&*cleanupTaskOp.getRegion().getBlocks().begin());
|
|
(void)createAlwaysInlineFuncAndCallIt(
|
|
privatizer.getDeallocRegion(),
|
|
llvm::formatv("{0}_{1}", privatizer.getSymName(), "dealloc")
|
|
.str(),
|
|
{initializedVal}, /*returnsValue=*/false);
|
|
llvm::FailureOr<LLVM::LLVMFuncOp> freeFunc =
|
|
LLVM::lookupOrCreateFreeFn(rewriter, mod);
|
|
assert(llvm::succeeded(freeFunc) &&
|
|
"Could not find free in the module");
|
|
(void)LLVM::CallOp::create(rewriter, loc, freeFunc.value(),
|
|
ValueRange{heapMem});
|
|
}
|
|
}
|
|
assert(newPrivVars.size() == privateVars.size() &&
|
|
"The number of private variables must match before and after "
|
|
"transformation");
|
|
if (fakeDependVar) {
|
|
omp::ClauseTaskDependAttr outDepend = omp::ClauseTaskDependAttr::get(
|
|
rewriter.getContext(), omp::ClauseTaskDepend::taskdependout);
|
|
SmallVector<Attribute> newDependKinds;
|
|
if (!targetOp.getDependVars().empty()) {
|
|
std::optional<ArrayAttr> dependKinds = targetOp.getDependKinds();
|
|
assert(dependKinds && "bad depend clause in omp::TargetOp");
|
|
llvm::copy(*dependKinds, std::back_inserter(newDependKinds));
|
|
}
|
|
newDependKinds.push_back(outDepend);
|
|
ArrayAttr newDependKindsAttr =
|
|
ArrayAttr::get(rewriter.getContext(), newDependKinds);
|
|
targetOp.getDependVarsMutable().append(fakeDependVar);
|
|
targetOp.setDependKindsAttr(newDependKindsAttr);
|
|
}
|
|
rewriter.setInsertionPoint(targetOp);
|
|
targetOp.getPrivateVarsMutable().clear();
|
|
targetOp.getPrivateVarsMutable().assign(newPrivVars);
|
|
});
|
|
}
|
|
|
|
private:
|
|
bool hasPrivateVars(omp::TargetOp targetOp) const {
|
|
return !targetOp.getPrivateVars().empty();
|
|
}
|
|
|
|
bool isTargetTaskDeferred(omp::TargetOp targetOp) const {
|
|
return targetOp.getNowait();
|
|
}
|
|
|
|
template <typename OpTy>
|
|
omp::PrivateClauseOp findPrivatizer(OpTy op, Attribute privSym) const {
|
|
SymbolRefAttr privatizerName = llvm::cast<SymbolRefAttr>(privSym);
|
|
omp::PrivateClauseOp privatizer =
|
|
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
|
|
op, privatizerName);
|
|
return privatizer;
|
|
}
|
|
|
|
// Get the (compile-time constant) size of varType as per the
|
|
// given DataLayout dl.
|
|
std::int64_t getSizeInBytes(const DataLayout &dl, Type varType) const {
|
|
llvm::TypeSize size = dl.getTypeSize(varType);
|
|
unsigned short alignment = dl.getTypeABIAlignment(varType);
|
|
return llvm::alignTo(size, alignment);
|
|
}
|
|
|
|
LLVM::LLVMFuncOp getMalloc(ModuleOp mod, IRRewriter &rewriter) const {
|
|
llvm::FailureOr<LLVM::LLVMFuncOp> mallocCall =
|
|
LLVM::lookupOrCreateMallocFn(rewriter, mod, rewriter.getI64Type());
|
|
assert(llvm::succeeded(mallocCall) &&
|
|
"Could not find malloc in the module");
|
|
return mallocCall.value();
|
|
}
|
|
|
|
Value allocateHeapMem(omp::TargetOp targetOp, Value privVar, Type varType,
|
|
ModuleOp mod, IRRewriter &rewriter) const {
|
|
OpBuilder::InsertionGuard guard(rewriter);
|
|
Value varPtr = privVar;
|
|
Operation *definingOp = varPtr.getDefiningOp();
|
|
BlockArgument blockArg;
|
|
if (!definingOp) {
|
|
blockArg = mlir::dyn_cast<BlockArgument>(varPtr);
|
|
rewriter.setInsertionPointToStart(blockArg.getParentBlock());
|
|
} else {
|
|
rewriter.setInsertionPoint(definingOp);
|
|
}
|
|
Location loc = definingOp ? definingOp->getLoc() : blockArg.getLoc();
|
|
LLVM::LLVMFuncOp mallocFn = getMalloc(mod, rewriter);
|
|
|
|
assert(mod.getDataLayoutSpec() &&
|
|
"MLIR module with no datalayout spec not handled yet");
|
|
|
|
const DataLayout &dl = DataLayout(mod);
|
|
std::int64_t distance = getSizeInBytes(dl, varType);
|
|
|
|
Value sizeBytes = LLVM::ConstantOp::create(
|
|
rewriter, loc, mallocFn.getFunctionType().getParamType(0), distance);
|
|
|
|
auto mallocCallOp =
|
|
LLVM::CallOp::create(rewriter, loc, mallocFn, ValueRange{sizeBytes});
|
|
return mallocCallOp.getResult();
|
|
}
|
|
|
|
// Create a function for srcRegion and attribute it to be always_inline.
|
|
// The big assumption here is that srcRegion is one of init, copy or dealloc
|
|
// regions of a omp::PrivateClauseop. Accordingly, the return type is assumed
|
|
// to either be the same as the types of the two arguments of the region (for
|
|
// init and copy regions) or void as would be the case for dealloc regions.
|
|
LLVM::LLVMFuncOp createFuncOpForRegion(Location loc, ModuleOp mod,
|
|
Region &srcRegion,
|
|
llvm::StringRef funcName,
|
|
IRRewriter &rewriter,
|
|
bool returnsValue = false) {
|
|
|
|
OpBuilder::InsertionGuard guard(rewriter);
|
|
rewriter.setInsertionPoint(mod.getBody(), mod.getBody()->end());
|
|
Region clonedRegion;
|
|
IRMapping mapper;
|
|
srcRegion.cloneInto(&clonedRegion, mapper);
|
|
|
|
SmallVector<Type> paramTypes;
|
|
llvm::copy(srcRegion.getArgumentTypes(), std::back_inserter(paramTypes));
|
|
Type resultType = returnsValue
|
|
? srcRegion.getArgument(0).getType()
|
|
: LLVM::LLVMVoidType::get(rewriter.getContext());
|
|
LLVM::LLVMFunctionType funcType =
|
|
LLVM::LLVMFunctionType::get(resultType, paramTypes);
|
|
|
|
LLVM::LLVMFuncOp func =
|
|
LLVM::LLVMFuncOp::create(rewriter, loc, funcName, funcType);
|
|
func.setAlwaysInline(true);
|
|
rewriter.inlineRegionBefore(clonedRegion, func.getRegion(),
|
|
func.getRegion().end());
|
|
for (auto &block : func.getRegion().getBlocks()) {
|
|
if (isa<omp::YieldOp>(block.getTerminator())) {
|
|
omp::YieldOp yieldOp = cast<omp::YieldOp>(block.getTerminator());
|
|
rewriter.setInsertionPoint(yieldOp);
|
|
rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(yieldOp, TypeRange(),
|
|
yieldOp.getOperands());
|
|
}
|
|
}
|
|
return func;
|
|
}
|
|
};
|
|
} // namespace
|