[mlir][llvmir][OpenMP] Translate affinity clause in task construct to llvmir (#182223)

Translate affinity entries to LLVMIR by passing affinity information to
createTask (__kmpc_omp_reg_task_with_affinity is created inside
PostOutlineCB).

3/3 in stack for implementing affinity clause with iterator modifier
1/3 #182218
2/3 #182222
3/3 #182223
This commit is contained in:
Chi-Chun, Chen 2026-03-16 10:16:38 -05:00 committed by GitHub
parent d4c22859db
commit 2ad51ffbfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 1056 additions and 35 deletions

View File

@ -1510,6 +1510,16 @@ public:
: DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
};
/// Return the LLVM struct type matching runtime `kmp_task_affinity_info_t`.
/// `{ kmp_intptr_t base_addr; size_t len; flags (bitfield storage as i32) }`
LLVM_ABI llvm::StructType *getKmpTaskAffinityInfoTy();
/// A struct to pack the relevant information for an OpenMP affinity clause.
struct AffinityData {
Value *Count; // number of kmp_task_affinity_info_t entries
Value *Info; // kmp_task_affinity_info_t
};
/// Generator for `#omp taskloop`
///
/// \param Loc The location where the taskloop construct was encountered.
@ -1568,17 +1578,21 @@ public:
/// cannot be resumed until execution of the structured
/// block that is associated with the generated task is
/// completed.
/// \param Dependencies Vector of DependData objects holding information of
/// dependencies as specified by the 'depend' clause.
/// \param Affinities AffinityData object holding information of accumulated
/// affinities as specified by the 'affinity' clause.
/// \param EventHandle If present, signifies the event handle as part of
/// the detach clause
/// \param Mergeable If the given task is `mergeable`
/// \param priority `priority-value' specifies the execution order of the
/// tasks that is generated by the construct
LLVM_ABI InsertPointOrErrorTy
createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied = true,
Value *Final = nullptr, Value *IfCondition = nullptr,
SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
Value *EventHandle = nullptr, Value *Priority = nullptr);
LLVM_ABI InsertPointOrErrorTy createTask(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr,
Value *IfCondition = nullptr, SmallVector<DependData> Dependencies = {},
AffinityData Affinities = {}, bool Mergeable = false,
Value *EventHandle = nullptr, Value *Priority = nullptr);
/// Generator for the taskgroup construct
///
@ -3926,6 +3940,39 @@ public:
LLVM_ABI GlobalVariable *
getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
std::optional<unsigned> AddressSpace = {});
using IteratorBodyGenTy = llvm::function_ref<llvm::Error(
InsertPointTy BodyIP, llvm::Value *LinearIV)>;
/// Create a canonical iterator loop at the current insertion point.
///
/// This helper splits the current block and builds a canonical loop
/// using createLoopSkeleton(). The resulting control flow looks like:
///
/// CurBB -> Preheader -> Header -> Body -> Latch -> After -> ContBB
///
/// The body of the loop is produced by calling \p BodyGen with the insertion
/// point for the loop body and the induction variable.
/// Unlike createCanonicalLoop(), this function is intended for \p BodyGen
/// that may perform region lowering (e.g., translating MLIR regions) and are
/// not guaranteed to preserve the canonical skeleton's body terminator. In
/// particular:
///
/// - The skeletons unconditional branch from the loop body is removed
/// before invoking \p BodyGen.
/// - \p BodyGen may freely emit instructions and temporarily introduce
/// control flow.
/// - If the loop body does not end with a terminator after \p BodyGen
/// returns, a branch to the latch is inserted to restore canonical form.
///
/// \param Loc The location where the iterator modifier was encountered.
/// \param TripCount Number of loop iterations.
/// \param BodyGen Callback to generate the loop body.
/// \param Name Base name used for creating the loop
/// \returns The insertion position *after* the iterator loop
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(
LocationDescription Loc, llvm::Value *TripCount,
IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator");
};
/// Class to represented the control flow structure of an OpenMP canonical loop.

View File

@ -2431,11 +2431,18 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
return Builder.saveIP();
}
llvm::StructType *OpenMPIRBuilder::getKmpTaskAffinityInfoTy() {
llvm::Type *IntPtrTy = llvm::Type::getIntNTy(
M.getContext(), M.getDataLayout().getPointerSizeInBits());
return llvm::StructType::get(IntPtrTy, IntPtrTy,
llvm::Type::getInt32Ty(M.getContext()));
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
const LocationDescription &Loc, InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
Value *Priority) {
SmallVector<DependData> Dependencies, AffinityData Affinities,
bool Mergeable, Value *EventHandle, Value *Priority) {
if (!updateToLocation(Loc))
return InsertPointTy();
@ -2481,8 +2488,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
Mergeable, Priority, EventHandle, TaskAllocaBB,
ToBeDeleted](Function &OutlinedFn) mutable {
Affinities, Mergeable, Priority, EventHandle,
TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
// Replace the Stale CI by appropriate RTL function call.
assert(OutlinedFn.hasOneUse() &&
"there must be a single user for the outlined function");
@ -2555,6 +2562,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
/*task_func=*/&OutlinedFn});
if (Affinities.Count && Affinities.Info) {
Function *RegAffFn = getOrCreateRuntimeFunctionPtr(
OMPRTL___kmpc_omp_reg_task_with_affinity);
createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData,
Affinities.Count, Affinities.Info});
}
// Emit detach clause initialization.
// evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
// task_descriptor);
@ -11573,6 +11588,65 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS,
loadOffloadInfoMetadata(*M.get());
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop(
LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen,
llvm::StringRef Name) {
Builder.restoreIP(Loc.IP);
BasicBlock *CurBB = Builder.GetInsertBlock();
assert(CurBB &&
"expected a valid insertion block for creating an iterator loop");
Function *F = CurBB->getParent();
InsertPointTy SplitIP = Builder.saveIP();
if (SplitIP.getPoint() == CurBB->end())
if (Instruction *Terminator = CurBB->getTerminator())
SplitIP = InsertPointTy(CurBB, Terminator->getIterator());
BasicBlock *ContBB =
splitBB(SplitIP, /*CreateBranch=*/false,
Builder.getCurrentDebugLocation(), "omp.it.cont");
CanonicalLoopInfo *CLI =
createLoopSkeleton(Builder.getCurrentDebugLocation(), TripCount, F,
/*PreInsertBefore=*/ContBB,
/*PostInsertBefore=*/ContBB, Name);
// Enter loop from original block.
redirectTo(CurBB, CLI->getPreheader(), Builder.getCurrentDebugLocation());
// Remove the unconditional branch inserted by createLoopSkeleton in the body
if (Instruction *T = CLI->getBody()->getTerminator())
T->eraseFromParent();
InsertPointTy BodyIP = CLI->getBodyIP();
if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar()))
return Err;
// Body must either fallthrough to the latch or branch directly to it.
if (Instruction *BodyTerminator = CLI->getBody()->getTerminator()) {
auto *BodyBr = dyn_cast<BranchInst>(BodyTerminator);
if (!BodyBr || !BodyBr->isUnconditional() ||
BodyBr->getSuccessor(0) != CLI->getLatch()) {
return make_error<StringError>(
"iterator bodygen must terminate the canonical body with an "
"unconditional branch to the loop latch",
inconvertibleErrorCode());
}
} else {
// Ensure we end the loop body by jumping to the latch.
Builder.SetInsertPoint(CLI->getBody());
Builder.CreateBr(CLI->getLatch());
}
// Link After -> ContBB
Builder.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin());
if (!CLI->getAfter()->getTerminator())
Builder.CreateBr(ContBB);
return InsertPointTy{ContBB, ContBB->begin()};
}
//===----------------------------------------------------------------------===//
// OffloadEntriesInfoManager
//===----------------------------------------------------------------------===//

View File

@ -7555,6 +7555,186 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall);
}
TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.Config.IsTargetDevice = false;
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
return Error::success();
};
LLVMContext &Ctx = M->getContext();
StructType *AffInfoTy = StructType::get(
Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx));
// Create [1 x AffInfoTy] as alloca (element alloca is fine too).
Value *CountI32 = Builder.getInt32(1);
AllocaInst *AffArr =
Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list");
// Fill entry 0 minimally so the pointer definitely dominates use.
Value *Entry0 = Builder.CreateInBoundsGEP(
AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry");
Builder.CreateStore(Builder.getInt64(0),
Builder.CreateStructGEP(AffInfoTy, Entry0, 0));
Builder.CreateStore(Builder.getInt64(64),
Builder.CreateStructGEP(AffInfoTy, Entry0, 1));
Builder.CreateStore(Builder.getInt32(0),
Builder.CreateStructGEP(AffInfoTy, Entry0, 2));
OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr};
BasicBlock *AllocaBB = Builder.GetInsertBlock();
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
OpenMPIRBuilder::LocationDescription Loc(
InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTask(
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
BodyGenCB,
/*Tied=*/true,
/*Final=*/nullptr,
/*IfCondition=*/nullptr,
/*Dependencies=*/{},
/*Affinity=*/Affinity,
/*Mergeable=*/false,
/*EventHandle=*/nullptr,
/*Priority=*/nullptr));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
Builder.CreateRetVoid();
EXPECT_FALSE(verifyModule(*M, &errs()));
Function *TaskAllocFn =
OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr(
OMPRTL___kmpc_omp_reg_task_with_affinity);
CallInst *TaskAllocCI = nullptr;
CallInst *RegAffCI = nullptr;
for (auto &I : instructions(F)) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
if (CI->getCalledFunction() == TaskAllocFn)
TaskAllocCI = CI;
if (CI->getCalledFunction() == RegAffFn)
RegAffCI = CI;
}
}
ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call";
ASSERT_NE(RegAffCI, nullptr)
<< "expected __kmpc_omp_reg_task_with_affinity call";
// Check reg_task_with_affinity signature:
// i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid,
// kmp_task_t*, i32 naffins,
// kmp_task_affinity_info_t*)
ASSERT_EQ(RegAffCI->arg_size(), 5u);
// naffins
EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32));
// kmp_task_affinity_info_t*
EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy());
}
TEST_F(OpenMPIRBuilderTest, CreateIteratorLoop) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
{
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func.unterminated");
IRBuilder<> Builder(BB);
auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
Builder.restoreIP(BodyIP);
Builder.CreateAdd(LinearIV, Builder.getInt64(1));
return Error::success();
};
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
BodyGenCB, "iterator"));
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
EXPECT_FALSE(verifyFunction(*F, &errs()));
}
{
Function *F2 =
Function::Create(F->getFunctionType(), Function::ExternalLinkage,
"func.terminated", M.get());
BasicBlock *BB2 = BasicBlock::Create(Ctx, "", F2);
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
IRBuilder<> Builder(BB2);
BasicBlock *OrigSucc =
BasicBlock::Create(Builder.getContext(), "orig.succ", F2);
Builder.CreateBr(OrigSucc);
auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
Builder.restoreIP(BodyIP);
Builder.CreateAdd(LinearIV, Builder.getInt64(1));
return Error::success();
};
OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB2, BB2->end()),
DL);
ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP,
OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4),
BodyGenCB, "iterator"));
EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont");
auto *ContBr = dyn_cast<BranchInst>(AfterIP.getBlock()->getTerminator());
ASSERT_NE(ContBr, nullptr);
ASSERT_FALSE(ContBr->isConditional());
EXPECT_EQ(ContBr->getSuccessor(0), OrigSucc);
Builder.SetInsertPoint(OrigSucc);
Builder.CreateRetVoid();
EXPECT_FALSE(verifyFunction(*F2, &errs()));
}
EXPECT_FALSE(verifyModule(*M, &errs()));
}
TEST_F(OpenMPIRBuilderTest, CreateIteratorLoopInvalidLoopBody) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error {
Builder.restoreIP(BodyIP);
Builder.CreateAdd(LinearIV, Builder.getInt64(1));
BasicBlock *BadDest =
BasicBlock::Create(Builder.getContext(), "iterator.bad.dest", F);
Builder.CreateBr(BadDest);
Builder.SetInsertPoint(BadDest);
Builder.CreateUnreachable();
return Error::success();
};
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createIteratorLoop(
Loc, Builder.getInt64(4), BodyGenCB, "iterator");
ASSERT_TRUE(errorToBool(AfterIP.takeError()));
}
TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);

View File

@ -154,6 +154,9 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
// discarded on lowering to LLVM-IR from the OpenMP dialect.
converter.addConversion(
[&](omp::MapBoundsType type) -> Type { return type; });
converter.addConversion(
[&](omp::AffinityEntryType type) -> Type { return type; });
converter.addConversion([&](omp::IteratedType type) -> Type { return type; });
// Add conversions for all OpenMP operations.
addOpenMPOpConversions<

View File

@ -18,6 +18,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/OperationSupport.h"
#include "mlir/IR/SymbolTable.h"
@ -4796,6 +4797,30 @@ LogicalResult IteratorOp::verify() {
if (!iteratedTy)
return emitOpError() << "result must be omp.iterated<entry_ty>";
for (auto [lb, ub, step] : llvm::zip_equal(
getLoopLowerBounds(), getLoopUpperBounds(), getLoopSteps())) {
if (matchPattern(step, m_Zero()))
return emitOpError() << "loop step must not be zero";
IntegerAttr lbAttr;
IntegerAttr ubAttr;
IntegerAttr stepAttr;
if (!matchPattern(lb, m_Constant(&lbAttr)) ||
!matchPattern(ub, m_Constant(&ubAttr)) ||
!matchPattern(step, m_Constant(&stepAttr)))
continue;
const APInt &lbVal = lbAttr.getValue();
const APInt &ubVal = ubAttr.getValue();
const APInt &stepVal = stepAttr.getValue();
if (stepVal.isStrictlyPositive() && lbVal.sgt(ubVal))
return emitOpError() << "positive loop step requires lower bound to be "
"less than or equal to upper bound";
if (stepVal.isNegative() && lbVal.slt(ubVal))
return emitOpError() << "negative loop step requires lower bound to be "
"greater than or equal to upper bound";
}
Block &b = getRegion().front();
auto yield = llvm::dyn_cast<omp::YieldOp>(b.getTerminator());

View File

@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
<< " operation";
};
auto checkAffinity = [&todo](auto op, LogicalResult &result) {
if (!op.getAffinityVars().empty())
result = todo("affinity");
};
auto checkAllocate = [&todo](auto op, LogicalResult &result) {
if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty())
result = todo("allocate");
@ -408,7 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
checkThreadLimit(op, result);
})
.Case([&](omp::TaskOp op) {
checkAffinity(op, result);
checkAllocate(op, result);
checkInReduction(op, result);
})
@ -2233,6 +2228,81 @@ private:
/// The type of the structure
llvm::Type *structTy = nullptr;
};
/// IteratorInfo extracts and prepares loop bounds information from an
/// mlir::omp::IteratorOp for lowering to LLVM IR.
///
/// It computes the per-dimension trip counts and the total linearized trip
/// count, casted to i64. These are used to build a canonical loop and to
/// reconstruct the physical induction variables inside the loop body.
class IteratorInfo {
private:
llvm::SmallVector<llvm::Value *> lowerBounds;
llvm::SmallVector<llvm::Value *> upperBounds;
llvm::SmallVector<llvm::Value *> steps;
llvm::SmallVector<llvm::Value *> trips;
unsigned dims;
llvm::Value *totalTrips;
llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase &builder) {
llvm::Value *v = mt.lookupValue(val);
if (!v)
return nullptr;
if (v->getType()->isIntegerTy(64))
return v;
if (v->getType()->isIntegerTy())
return builder.CreateSExtOrTrunc(v, builder.getInt64Ty());
return nullptr;
}
public:
IteratorInfo(mlir::omp::IteratorOp itersOp,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::IRBuilderBase &builder) {
dims = itersOp.getLoopLowerBounds().size();
lowerBounds.resize(dims);
upperBounds.resize(dims);
steps.resize(dims);
trips.resize(dims);
for (unsigned d = 0; d < dims; ++d) {
llvm::Value *lb = lookUpAsI64(itersOp.getLoopLowerBounds()[d],
moduleTranslation, builder);
llvm::Value *ub = lookUpAsI64(itersOp.getLoopUpperBounds()[d],
moduleTranslation, builder);
llvm::Value *st =
lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder);
assert(lb && ub && st &&
"Expect lowerBounds, upperBounds, and steps in IteratorOp");
assert((!llvm::isa<llvm::ConstantInt>(st) ||
!llvm::cast<llvm::ConstantInt>(st)->isZero()) &&
"Expect non-zero step in IteratorOp");
lowerBounds[d] = lb;
upperBounds[d] = ub;
steps[d] = st;
// trips = ((ub - lb) / step) + 1 (inclusive ub, assume positive step)
llvm::Value *diff = builder.CreateSub(ub, lb);
llvm::Value *div = builder.CreateSDiv(diff, st);
trips[d] = builder.CreateAdd(
div, llvm::ConstantInt::get(builder.getInt64Ty(), 1));
}
totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1);
for (unsigned d = 0; d < dims; ++d)
totalTrips = builder.CreateMul(totalTrips, trips[d]);
}
unsigned getDims() const { return dims; }
llvm::ArrayRef<llvm::Value *> getLowerBounds() const { return lowerBounds; }
llvm::ArrayRef<llvm::Value *> getUpperBounds() const { return upperBounds; }
llvm::ArrayRef<llvm::Value *> getSteps() const { return steps; }
llvm::ArrayRef<llvm::Value *> getTrips() const { return trips; }
llvm::Value *getTotalTrips() const { return totalTrips; }
};
} // namespace
void TaskContextStructManager::generateTaskContextStruct() {
@ -2307,6 +2377,235 @@ void TaskContextStructManager::freeStructPtr() {
builder.CreateFree(structPtr);
}
static void storeAffinityEntry(llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder &ompBuilder,
llvm::Value *affinityList, llvm::Value *index,
llvm::Value *addr, llvm::Value *len) {
llvm::StructType *kmpTaskAffinityInfoTy =
ompBuilder.getKmpTaskAffinityInfoTy();
llvm::Value *entry = builder.CreateInBoundsGEP(
kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry");
addr = builder.CreatePtrToInt(addr, kmpTaskAffinityInfoTy->getElementType(0));
len = builder.CreateIntCast(len, kmpTaskAffinityInfoTy->getElementType(1),
/*isSigned=*/false);
llvm::Value *flags = builder.getInt32(0);
builder.CreateStore(addr,
builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0));
builder.CreateStore(len,
builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1));
builder.CreateStore(flags,
builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2));
}
static void fillAffinityLocators(Operation::operand_range affinityVars,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::Value *affinityList) {
for (auto [i, affinityVar] : llvm::enumerate(affinityVars)) {
auto entryOp = affinityVar.getDefiningOp<mlir::omp::AffinityEntryOp>();
assert(entryOp && "affinity item must be omp.affinity_entry");
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
assert(addr && len && "expect affinity addr and len to be non-null");
storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(),
affinityList, builder.getInt64(i), addr, len);
}
}
static mlir::LogicalResult
convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo,
mlir::Block &iteratorRegionBlock,
llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
llvm::Value *tmp = linearIV;
for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) {
llvm::Value *trip = iterInfo.getTrips()[d];
// idx_d = tmp % trip_d
llvm::Value *idx = builder.CreateURem(tmp, trip);
// tmp = tmp / trip_d
tmp = builder.CreateUDiv(tmp, trip);
// physIV_d = lb_d + idx_d * step_d
llvm::Value *physIV = builder.CreateAdd(
iterInfo.getLowerBounds()[d],
builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv");
moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV);
}
// Translate the iterator region into the loop body.
moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock());
if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock,
/*ignoreArguments=*/true,
builder))) {
return mlir::failure();
}
return mlir::success();
}
static mlir::LogicalResult
fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp,
llvm::IRBuilderBase &builder,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::Value *affinityList, IteratorInfo &iterInfo) {
mlir::Region &itersRegion = itersOp.getRegion();
mlir::Block &iteratorRegionBlock = itersRegion.front();
llvm::OpenMPIRBuilder::LocationDescription loc(builder);
auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP,
llvm::Value *linearIV) -> llvm::Error {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(bodyIP);
if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock,
builder, moduleTranslation))) {
return llvm::make_error<llvm::StringError>(
"failed to convert iterator region", llvm::inconvertibleErrorCode());
}
// Extract affinity entry from omp.yield and store into list[linearIV].
auto yield =
mlir::dyn_cast<mlir::omp::YieldOp>(iteratorRegionBlock.getTerminator());
assert(yield && yield.getResults().size() == 1 &&
"expect omp.yield in iterator region to have one result");
auto entryOp =
yield.getResults()[0].getDefiningOp<mlir::omp::AffinityEntryOp>();
assert(entryOp && "expect yield generate an affinity entry");
llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr());
llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen());
storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(),
affinityList, linearIV, addr, len);
// Iterator-region block/value mappings are temporary for this conversion,
// clear them to avoid stale entries in ModuleTranslation.
moduleTranslation.forgetMapping(itersRegion);
return llvm::Error::success();
};
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createIteratorLoop(
loc, iterInfo.getTotalTrips(), bodyGen,
/*Name=*/"iterator");
if (failed(handleError(afterIP, *itersOp)))
return failure();
builder.restoreIP(*afterIP);
return mlir::success();
}
static mlir::LogicalResult
buildAffinityData(mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder,
mlir::LLVM::ModuleTranslation &moduleTranslation,
llvm::OpenMPIRBuilder::AffinityData &ad) {
if (taskOp.getAffinityVars().empty() && taskOp.getIterated().empty()) {
ad.Count = nullptr;
ad.Info = nullptr;
return mlir::success();
}
llvm::SmallVector<llvm::OpenMPIRBuilder::AffinityData> ads;
llvm::StructType *kmpTaskAffinityInfoTy =
moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy();
auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
if (llvm::isa<llvm::Constant>(count) || llvm::isa<llvm::Argument>(count))
builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
return builder.CreateAlloca(kmpTaskAffinityInfoTy, count,
"omp.affinity_list");
};
auto createAffinity =
[&](llvm::Value *count,
llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData {
llvm::OpenMPIRBuilder::AffinityData ad{};
ad.Count = builder.CreateTrunc(count, builder.getInt32Ty());
ad.Info =
builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0));
return ad;
};
if (!taskOp.getAffinityVars().empty()) {
llvm::Value *count = llvm::ConstantInt::get(
builder.getInt64Ty(), taskOp.getAffinityVars().size());
llvm::Value *list = allocateAffinityList(count);
fillAffinityLocators(taskOp.getAffinityVars(), builder, moduleTranslation,
list);
ads.emplace_back(createAffinity(count, list));
}
if (!taskOp.getIterated().empty()) {
for (auto [i, iter] : llvm::enumerate(taskOp.getIterated())) {
auto itersOp = iter.getDefiningOp<omp::IteratorOp>();
assert(itersOp && "iterated value must be defined by omp.iterator");
IteratorInfo iterInfo(itersOp, moduleTranslation, builder);
llvm::Value *affList = allocateAffinityList(iterInfo.getTotalTrips());
if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation,
affList, iterInfo)))
return llvm::failure();
ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), affList));
}
}
llvm::Value *totalAffinityCount = builder.getInt32(0);
for (const auto &affinity : ads)
totalAffinityCount = builder.CreateAdd(
totalAffinityCount,
builder.CreateIntCast(affinity.Count, builder.getInt32Ty(),
/*isSigned=*/false));
llvm::Value *affinityInfo = ads.front().Info;
if (ads.size() > 1) {
llvm::StructType *kmpTaskAffinityInfoTy =
moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy();
llvm::Value *affinityInfoElemSize = builder.getInt64(
moduleTranslation.getLLVMModule()->getDataLayout().getTypeAllocSize(
kmpTaskAffinityInfoTy));
llvm::Value *packedAffinityInfo = allocateAffinityList(totalAffinityCount);
llvm::Value *packedAffinityInfoOffset = builder.getInt32(0);
for (const auto &affinity : ads) {
llvm::Value *affinityCount = builder.CreateIntCast(
affinity.Count, builder.getInt32Ty(), /*isSigned=*/false);
llvm::Value *affinityCountInt64 = builder.CreateIntCast(
affinityCount, builder.getInt64Ty(), /*isSigned=*/false);
llvm::Value *affinityInfoSize =
builder.CreateMul(affinityCountInt64, affinityInfoElemSize);
llvm::Value *packedAffinityInfoIndex = builder.CreateIntCast(
packedAffinityInfoOffset, kmpTaskAffinityInfoTy->getElementType(0),
/*isSigned=*/false);
packedAffinityInfoIndex = builder.CreateInBoundsGEP(
kmpTaskAffinityInfoTy, packedAffinityInfo, packedAffinityInfoIndex);
builder.CreateMemCpy(
packedAffinityInfoIndex, llvm::Align(1),
builder.CreatePointerBitCastOrAddrSpaceCast(
affinity.Info, builder.getPtrTy(packedAffinityInfoIndex->getType()
->getPointerAddressSpace())),
llvm::Align(1), affinityInfoSize);
packedAffinityInfoOffset =
builder.CreateAdd(packedAffinityInfoOffset, affinityCount);
}
affinityInfo = packedAffinityInfo;
}
ad.Count = totalAffinityCount;
ad.Info = affinityInfo;
return mlir::success();
}
/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
@ -2421,6 +2720,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
taskOp.getPrivateNeedsBarrier())))
return llvm::failure();
llvm::OpenMPIRBuilder::AffinityData ad;
if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ad)))
return llvm::failure();
// Set up for call to createTask()
builder.SetInsertPoint(taskStartBlock);
@ -2524,7 +2827,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation.getOpenMPBuilder()->createTask(
ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
moduleTranslation.lookupValue(taskOp.getFinal()),
moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad,
taskOp.getMergeable(),
moduleTranslation.lookupValue(taskOp.getEventHandle()),
moduleTranslation.lookupValue(taskOp.getPriority()));
@ -7321,13 +7624,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation(
.Case([&](omp::LoopNestOp) {
return convertOmpLoopNest(*op, builder, moduleTranslation);
})
.Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp>(
[&](auto op) {
// No-op, should be handled by relevant owning operations e.g.
// TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
// etc. and then discarded
return success();
})
.Case<omp::MapInfoOp, omp::MapBoundsOp, omp::PrivateClauseOp,
omp::AffinityEntryOp, omp::IteratorOp>([&](auto op) {
// No-op, should be handled by relevant owning operations e.g.
// TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp
// etc. and then discarded
return success();
})
.Case([&](omp::NewCliOp op) {
// Meta-operation: Doesn't do anything by itself, but used to
// identify a loop.

View File

@ -3180,6 +3180,48 @@ func.func @iterator_bad_result_type(%lb : index, %ub : index, %st : index) {
// -----
func.func @iterator_zero_step(%s2 : !llvm.struct<(ptr, i64)>) {
%lb = arith.constant 1 : index
%ub = arith.constant 4 : index
%st = arith.constant 0 : index
// expected-error@+1 {{loop step must not be zero}}
%0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
} -> !omp.iterated<!llvm.struct<(ptr, i64)>>
return
}
// -----
func.func @iterator_positive_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) {
%lb = arith.constant 1000 : index
%ub = arith.constant -1 : index
%st = arith.constant 10 : index
// expected-error@+1 {{positive loop step requires lower bound to be less than or equal to upper bound}}
%0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
} -> !omp.iterated<!llvm.struct<(ptr, i64)>>
return
}
// -----
func.func @iterator_negative_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) {
%lb = arith.constant -1000 : index
%ub = arith.constant 4 : index
%st = arith.constant -999 : index
// expected-error@+1 {{negative loop step requires lower bound to be greater than or equal to upper bound}}
%0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
} -> !omp.iterated<!llvm.struct<(ptr, i64)>>
return
}
// -----
func.func @iterator_missing_yield(%lb : index, %ub : index, %st : index) {
// expected-error@+1 {{region must be terminated by omp.yield}}
%0 = omp.iterator(%i: index) = (%lb to %ub step %st) {

View File

@ -3601,6 +3601,24 @@ func.func @omp_iterator_2d(%s2 : !llvm.struct<(ptr, i64)>) -> () {
return
}
// CHECK-LABEL: func.func @omp_iterator_negative_step
func.func @omp_iterator_negative_step(%s2 : !llvm.struct<(ptr, i64)>) -> () {
// CHECK: %[[LB:.*]] = arith.constant 4 : index
// CHECK: %[[UB:.*]] = arith.constant 1 : index
// CHECK: %[[ST:.*]] = arith.constant -1 : index
// CHECK: %[[IT:.*]] = omp.iterator(%[[IV:.*]]: index) = (%[[LB]] to %[[UB]] step %[[ST]]) {
// CHECK: omp.yield(%{{.*}} : !llvm.struct<(ptr, i64)>)
// CHECK: } -> !omp.iterated<!llvm.struct<(ptr, i64)>>
%lb = arith.constant 4 : index
%ub = arith.constant 1 : index
%st = arith.constant -1 : index
%0 = omp.iterator(%iv: index) = (%lb to %ub step %st) {
omp.yield(%s2 : !llvm.struct<(ptr, i64)>)
} -> !omp.iterated<!llvm.struct<(ptr, i64)>>
return
}
// CHECK-LABEL: func.func @omp_task_affinity_iterator_1d
func.func @omp_task_affinity_iterator_1d(%lb : index, %ub : index, %step : index,
%addr : !llvm.ptr, %len : i64) -> () {

View File

@ -0,0 +1,295 @@
// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s
llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64, %j: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_1d
// Preheader -> Header
// CHECK: omp_iterator.preheader:
// CHECK: br label %omp_iterator.header
//
// Header has the IV phi and branches to cond
// CHECK: omp_iterator.header:
// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
// CHECK: br label %omp_iterator.cond
//
// Cond: IV < 24 and branches to body or exit
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 24
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
//
// Exit -> After -> continuation
// CHECK: omp_iterator.exit:
// CHECK: br label %omp_iterator.after
// CHECK: omp_iterator.after:
// CHECK: br label %omp.it.cont
//
// Body: store into affinity_list[IV] then branch to inc
// CHECK: omp_iterator.body:
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP]]
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: br label %omp_iterator.inc
//
// CHECK: omp_iterator.inc:
// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
// CHECK: br label %omp_iterator.header
llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c2 = llvm.mlir.constant(2 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
// 3-D iterator: i=1..4, j=1..6, k=1..2 => total trips = 48
%it = omp.iterator(%i: i64, %j: i64, %k: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1, %c1 to %c2 step %c1) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_3d
// Preheader -> Header
// CHECK: omp_iterator.preheader:
// CHECK: br label %omp_iterator.header
//
// Header has the IV phi and branches to cond
// CHECK: omp_iterator.header:
// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ]
// CHECK: br label %omp_iterator.cond
//
// Cond: IV < 48 and branches to body or exit
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 48
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
//
// Exit -> After -> continuation
// CHECK: omp_iterator.exit:
// CHECK: br label %omp_iterator.after
// CHECK: omp_iterator.after:
// CHECK: br label %omp.it.cont
//
// Body: store into affinity_list[IV] then branch to inc
// CHECK: omp_iterator.body:
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]]
// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP]]
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: br label %omp_iterator.inc
//
// CHECK: omp_iterator.inc:
// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1
// CHECK: br label %omp_iterator.header
llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) {
%c1 = llvm.mlir.constant(1 : i64) : i64
%c3 = llvm.mlir.constant(3 : i64) : i64
%c4 = llvm.mlir.constant(4 : i64) : i64
%c6 = llvm.mlir.constant(6 : i64) : i64
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
// First iterator: 2-D (4 * 6 = 24)
%it0 = omp.iterator(%i: i64, %j: i64) =
(%c1 to %c4 step %c1, %c1 to %c6 step %c1) {
%entry0 = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry0 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
// second iterator: 1-D (3)
%it1 = omp.iterator(%k: i64) = (%c1 to %c3 step %c1) {
%entry1 = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry1 : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
// Multiple iterators in a single affinity clause.
omp.task affinity(%it0: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>,
%it1: !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_multiple
// CHECK-DAG: [[AFFLIST0:%.*]] = alloca { i64, i64, i32 }, i64 24, align 8
// CHECK-DAG: [[AFFLIST1:%.*]] = alloca { i64, i64, i32 }, i64 3, align 8
// CHECK-DAG: [[AFFINITY_LIST:%.*]] = alloca { i64, i64, i32 }, i32 27, align 8
// First iterator header
// CHECK: omp_iterator.preheader:
// CHECK: br label %[[HEADER0:.+]]
// CHECK: [[HEADER0]]:
// CHECK: [[IV0:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT0:%.*]], %[[INC0:.+]] ]
// CHECK: br label %[[COND0:.+]]
// CHECK: [[COND0]]:
// CHECK: [[CMP0:%.*]] = icmp ult i64 [[IV0]], 24
// CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit
// Second iterator header
// CHECK: omp_iterator.preheader{{.*}}:
// CHECK: [[HEADER1:.+]]:
// CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ]
// CHECK: br label %omp_iterator.cond{{.*}}
// CHECK: omp_iterator.cond{{.*}}:
// CHECK: [[CMP1:%.*]] = icmp ult i64 [[IV1]], 3
// CHECK: br i1 [[CMP1]], label %[[BODY1:.+]], label %omp_iterator.exit{{.*}}
// CHECK: [[AFFINITY_LIST_1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 0
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_1]], ptr align 1 [[AFFLIST0]], i64 480, i1 false)
// CHECK: [[AFFINITY_LIST_2:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 24
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_2]], ptr align 1 [[AFFLIST1]], i64 60, i1 false)
// CHECK: codeRepl:
// CHECK: call ptr @__kmpc_omp_task_alloc
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 27{{.*}}ptr [[AFFINITY_LIST]]
// CHECK: call i32 @__kmpc_omp_task
// Second iterator body
// CHECK: [[BODY1]]:
// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST1]]
// CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0
// CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]]
// CHECK: [[LENGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP1]]
// CHECK: [[FLAGGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP1]]
// CHECK: br label %[[INC1]]
// CHECK: [[INC1]]:
// CHECK: [[NEXT1]] = add nuw i64 [[IV1]], 1
// CHECK: br label %[[HEADER1]]
// First iterator body
// CHECK: [[BODY0]]:
// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST0]], i64 [[IV0]]
// CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64
// CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0
// CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]]
// CHECK: [[LENGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP0]]
// CHECK: [[FLAGGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP0]]
// CHECK: br label %[[INC0]]
// CHECK: [[INC0]]:
// CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1
// CHECK: br label %[[HEADER0]]
// Makes sure affinity list only created after dynamic count
llvm.func @task_affinity_iterator_dynamic_tripcount(
%arr: !llvm.ptr {llvm.nocapture}, %lb: i64, %ub: i64, %step: i64,
%len: i64) {
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64) = (%lb to %ub step %step) {
%entry = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_dynamic_tripcount
// CHECK: [[DIFF:%.*]] = sub i64 {{.*}}, {{.*}}
// CHECK: [[DIV:%.*]] = sdiv i64 [[DIFF]], {{.*}}
// CHECK: [[TRIPS:%.*]] = add i64 [[DIV]], 1
// CHECK: [[SCALED:%.*]] = mul i64 1, [[TRIPS]]
// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 [[SCALED]]
llvm.func @task_affinity_iterator_negative_step(%arr: !llvm.ptr {llvm.nocapture}) {
%c4 = llvm.mlir.constant(4 : i64) : i64
%c1 = llvm.mlir.constant(1 : i64) : i64
%cn1 = llvm.mlir.constant(-1 : i64) : i64
omp.parallel {
omp.single {
%it = omp.iterator(%i: i64) = (%c4 to %c1 step %cn1) {
%entry = omp.affinity_entry %arr, %i
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.yield(%entry : !omp.affinity_entry_ty<!llvm.ptr, i64>)
} -> !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>
omp.task affinity(%it : !omp.iterated<!omp.affinity_entry_ty<!llvm.ptr, i64>>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_iterator_negative_step
// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 4, align 8
// CHECK: omp_iterator.cond:
// CHECK: [[CMP:%.*]] = icmp ult i64 %omp_iterator.iv, 4
// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit
// CHECK: omp_iterator.body:
// CHECK: [[IDX:%.*]] = urem i64 %omp_iterator.iv, 4
// CHECK: [[STEPMUL:%.*]] = mul i64 [[IDX]], -1
// CHECK: [[PHYSIV:%.*]] = add i64 4, [[STEPMUL]]
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 %omp_iterator.iv
// CHECK: [[LENPTR:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 [[PHYSIV]], ptr [[LENPTR]]

View File

@ -3589,3 +3589,37 @@ llvm.func @nested_task_with_deps() {
// CHECK: ret void
// CHECK: }
llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) {
%len = llvm.mlir.constant(4 : i64) : i64
omp.parallel {
omp.single {
%ae = omp.affinity_entry %arr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
omp.terminator
}
omp.terminator
}
omp.terminator
}
llvm.return
}
// CHECK-LABEL: define internal void @task_affinity_plain
// CHECK: [[BASE:%.*]] = load ptr, ptr %gep_, align 8
// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 1, align 8
// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 0
// addr
// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr [[BASE]] to i64
// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0
// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]]
// len
// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1
// CHECK: store i64 4, ptr [[LENGEP]]
// flags is always 0
// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2
// CHECK: store i32 0, ptr [[FLAGGEP]]
// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]]

View File

@ -462,15 +462,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) {
}
llvm.return
}
// -----
llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) {
// expected-error@below {{not yet implemented: omp.affinity_entry}}
// expected-error@below {{LLVM Translation failed for operation: omp.affinity_entry}}
%ae = omp.affinity_entry %ptr, %len
: (!llvm.ptr, i64) -> !omp.affinity_entry_ty<!llvm.ptr, i64>
omp.task affinity(%ae : !omp.affinity_entry_ty<!llvm.ptr, i64>) {
omp.terminator
}
llvm.return
}

View File

@ -1505,6 +1505,18 @@ kmp_int32
__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *new_task, kmp_int32 naffins,
kmp_task_affinity_info_t *affin_list) {
if (naffins > 0)
KMP_DEBUG_ASSERT(affin_list != NULL);
for (kmp_int32 i = 0; i < naffins; ++i) {
KA_TRACE(30, ("__kmpc_omp_reg_task_with_affinity: T#%d aff[%d] "
"base_addr=0x%llx len=%zu flags={%d,%d,%d}\n",
gtid, i, (unsigned long long)affin_list[i].base_addr,
affin_list[i].len, (int)affin_list[i].flags.flag1,
(int)affin_list[i].flags.flag2,
(int)affin_list[i].flags.reserved));
}
return 0;
}