diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 9885ffc8b206..c36d721b567e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1510,6 +1510,16 @@ public: : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} }; + /// Return the LLVM struct type matching runtime `kmp_task_affinity_info_t`. + /// `{ kmp_intptr_t base_addr; size_t len; flags (bitfield storage as i32) }` + LLVM_ABI llvm::StructType *getKmpTaskAffinityInfoTy(); + + /// A struct to pack the relevant information for an OpenMP affinity clause. + struct AffinityData { + Value *Count; // number of kmp_task_affinity_info_t entries + Value *Info; // kmp_task_affinity_info_t + }; + /// Generator for `#omp taskloop` /// /// \param Loc The location where the taskloop construct was encountered. @@ -1568,17 +1578,21 @@ public: /// cannot be resumed until execution of the structured /// block that is associated with the generated task is /// completed. + /// \param Dependencies Vector of DependData objects holding information of + /// dependencies as specified by the 'depend' clause. + /// \param Affinities AffinityData object holding information of accumulated + /// affinities as specified by the 'affinity' clause. /// \param EventHandle If present, signifies the event handle as part of /// the detach clause /// \param Mergeable If the given task is `mergeable` /// \param priority `priority-value' specifies the execution order of the /// tasks that is generated by the construct - LLVM_ABI InsertPointOrErrorTy - createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, bool Tied = true, - Value *Final = nullptr, Value *IfCondition = nullptr, - SmallVector Dependencies = {}, bool Mergeable = false, - Value *EventHandle = nullptr, Value *Priority = nullptr); + LLVM_ABI InsertPointOrErrorTy createTask( + const LocationDescription &Loc, InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB, bool Tied = true, Value *Final = nullptr, + Value *IfCondition = nullptr, SmallVector Dependencies = {}, + AffinityData Affinities = {}, bool Mergeable = false, + Value *EventHandle = nullptr, Value *Priority = nullptr); /// Generator for the taskgroup construct /// @@ -3926,6 +3940,39 @@ public: LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional AddressSpace = {}); + + using IteratorBodyGenTy = llvm::function_ref; + + /// Create a canonical iterator loop at the current insertion point. + /// + /// This helper splits the current block and builds a canonical loop + /// using createLoopSkeleton(). The resulting control flow looks like: + /// + /// CurBB -> Preheader -> Header -> Body -> Latch -> After -> ContBB + /// + /// The body of the loop is produced by calling \p BodyGen with the insertion + /// point for the loop body and the induction variable. + /// Unlike createCanonicalLoop(), this function is intended for \p BodyGen + /// that may perform region lowering (e.g., translating MLIR regions) and are + /// not guaranteed to preserve the canonical skeleton's body terminator. In + /// particular: + /// + /// - The skeleton’s unconditional branch from the loop body is removed + /// before invoking \p BodyGen. + /// - \p BodyGen may freely emit instructions and temporarily introduce + /// control flow. + /// - If the loop body does not end with a terminator after \p BodyGen + /// returns, a branch to the latch is inserted to restore canonical form. + /// + /// \param Loc The location where the iterator modifier was encountered. + /// \param TripCount Number of loop iterations. + /// \param BodyGen Callback to generate the loop body. + /// \param Name Base name used for creating the loop + /// \returns The insertion position *after* the iterator loop + LLVM_ABI InsertPointOrErrorTy createIteratorLoop( + LocationDescription Loc, llvm::Value *TripCount, + IteratorBodyGenTy BodyGen, llvm::StringRef Name = "iterator"); }; /// Class to represented the control flow structure of an OpenMP canonical loop. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index aa001fbf8c4d..85ecec046cfd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2431,11 +2431,18 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop( return Builder.saveIP(); } +llvm::StructType *OpenMPIRBuilder::getKmpTaskAffinityInfoTy() { + llvm::Type *IntPtrTy = llvm::Type::getIntNTy( + M.getContext(), M.getDataLayout().getPointerSizeInBits()); + return llvm::StructType::get(IntPtrTy, IntPtrTy, + llvm::Type::getInt32Ty(M.getContext())); +} + OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, - SmallVector Dependencies, bool Mergeable, Value *EventHandle, - Value *Priority) { + SmallVector Dependencies, AffinityData Affinities, + bool Mergeable, Value *EventHandle, Value *Priority) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -2481,8 +2488,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, - Mergeable, Priority, EventHandle, TaskAllocaBB, - ToBeDeleted](Function &OutlinedFn) mutable { + Affinities, Mergeable, Priority, EventHandle, + TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable { // Replace the Stale CI by appropriate RTL function call. assert(OutlinedFn.hasOneUse() && "there must be a single user for the outlined function"); @@ -2555,6 +2562,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize, /*task_func=*/&OutlinedFn}); + if (Affinities.Count && Affinities.Info) { + Function *RegAffFn = getOrCreateRuntimeFunctionPtr( + OMPRTL___kmpc_omp_reg_task_with_affinity); + + createRuntimeFunctionCall(RegAffFn, {Ident, ThreadID, TaskData, + Affinities.Count, Affinities.Info}); + } + // Emit detach clause initialization. // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, // task_descriptor); @@ -11573,6 +11588,65 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(vfs::FileSystem &VFS, loadOffloadInfoMetadata(*M.get()); } +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createIteratorLoop( + LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, + llvm::StringRef Name) { + Builder.restoreIP(Loc.IP); + + BasicBlock *CurBB = Builder.GetInsertBlock(); + assert(CurBB && + "expected a valid insertion block for creating an iterator loop"); + Function *F = CurBB->getParent(); + + InsertPointTy SplitIP = Builder.saveIP(); + if (SplitIP.getPoint() == CurBB->end()) + if (Instruction *Terminator = CurBB->getTerminator()) + SplitIP = InsertPointTy(CurBB, Terminator->getIterator()); + + BasicBlock *ContBB = + splitBB(SplitIP, /*CreateBranch=*/false, + Builder.getCurrentDebugLocation(), "omp.it.cont"); + + CanonicalLoopInfo *CLI = + createLoopSkeleton(Builder.getCurrentDebugLocation(), TripCount, F, + /*PreInsertBefore=*/ContBB, + /*PostInsertBefore=*/ContBB, Name); + + // Enter loop from original block. + redirectTo(CurBB, CLI->getPreheader(), Builder.getCurrentDebugLocation()); + + // Remove the unconditional branch inserted by createLoopSkeleton in the body + if (Instruction *T = CLI->getBody()->getTerminator()) + T->eraseFromParent(); + + InsertPointTy BodyIP = CLI->getBodyIP(); + if (llvm::Error Err = BodyGen(BodyIP, CLI->getIndVar())) + return Err; + + // Body must either fallthrough to the latch or branch directly to it. + if (Instruction *BodyTerminator = CLI->getBody()->getTerminator()) { + auto *BodyBr = dyn_cast(BodyTerminator); + if (!BodyBr || !BodyBr->isUnconditional() || + BodyBr->getSuccessor(0) != CLI->getLatch()) { + return make_error( + "iterator bodygen must terminate the canonical body with an " + "unconditional branch to the loop latch", + inconvertibleErrorCode()); + } + } else { + // Ensure we end the loop body by jumping to the latch. + Builder.SetInsertPoint(CLI->getBody()); + Builder.CreateBr(CLI->getLatch()); + } + + // Link After -> ContBB + Builder.SetInsertPoint(CLI->getAfter(), CLI->getAfter()->begin()); + if (!CLI->getAfter()->getTerminator()) + Builder.CreateBr(ContBB); + + return InsertPointTy{ContBB, ContBB->begin()}; +} + //===----------------------------------------------------------------------===// // OffloadEntriesInfoManager //===----------------------------------------------------------------------===// diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 5d7ecbce7375..eab4f88c7fbf 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -7555,6 +7555,186 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { EXPECT_EQ(OulinedFnCall->getNextNode(), TaskCompleteCall); } +TEST_F(OpenMPIRBuilderTest, CreateTaskAffinity) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.Config.IsTargetDevice = false; + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; + + LLVMContext &Ctx = M->getContext(); + StructType *AffInfoTy = StructType::get( + Type::getInt64Ty(Ctx), Type::getInt64Ty(Ctx), Type::getInt32Ty(Ctx)); + + // Create [1 x AffInfoTy] as alloca (element alloca is fine too). + Value *CountI32 = Builder.getInt32(1); + AllocaInst *AffArr = + Builder.CreateAlloca(AffInfoTy, Builder.getInt64(1), "omp.affinity_list"); + + // Fill entry 0 minimally so the pointer definitely dominates use. + Value *Entry0 = Builder.CreateInBoundsGEP( + AffInfoTy, AffArr, Builder.getInt64(0), "omp.affinity.entry"); + Builder.CreateStore(Builder.getInt64(0), + Builder.CreateStructGEP(AffInfoTy, Entry0, 0)); + Builder.CreateStore(Builder.getInt64(64), + Builder.CreateStructGEP(AffInfoTy, Entry0, 1)); + Builder.CreateStore(Builder.getInt32(0), + Builder.CreateStructGEP(AffInfoTy, Entry0, 2)); + + OpenMPIRBuilder::AffinityData Affinity{CountI32, AffArr}; + + BasicBlock *AllocaBB = Builder.GetInsertBlock(); + BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); + OpenMPIRBuilder::LocationDescription Loc( + InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); + + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + BodyGenCB, + /*Tied=*/true, + /*Final=*/nullptr, + /*IfCondition=*/nullptr, + /*Dependencies=*/{}, + /*Affinity=*/Affinity, + /*Mergeable=*/false, + /*EventHandle=*/nullptr, + /*Priority=*/nullptr)); + + Builder.restoreIP(AfterIP); + OMPBuilder.finalize(); + Builder.CreateRetVoid(); + + EXPECT_FALSE(verifyModule(*M, &errs())); + + Function *TaskAllocFn = + OMPBuilder.getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc); + Function *RegAffFn = OMPBuilder.getOrCreateRuntimeFunctionPtr( + OMPRTL___kmpc_omp_reg_task_with_affinity); + + CallInst *TaskAllocCI = nullptr; + CallInst *RegAffCI = nullptr; + + for (auto &I : instructions(F)) { + if (auto *CI = dyn_cast(&I)) { + if (CI->getCalledFunction() == TaskAllocFn) + TaskAllocCI = CI; + if (CI->getCalledFunction() == RegAffFn) + RegAffCI = CI; + } + } + + ASSERT_NE(TaskAllocCI, nullptr) << "expected __kmpc_omp_task_alloc call"; + ASSERT_NE(RegAffCI, nullptr) + << "expected __kmpc_omp_reg_task_with_affinity call"; + + // Check reg_task_with_affinity signature: + // i32 __kmpc_omp_reg_task_with_affinity(ident_t*, i32 gtid, + // kmp_task_t*, i32 naffins, + // kmp_task_affinity_info_t*) + ASSERT_EQ(RegAffCI->arg_size(), 5u); + // naffins + EXPECT_TRUE(RegAffCI->getArgOperand(3)->getType()->isIntegerTy(32)); + // kmp_task_affinity_info_t* + EXPECT_TRUE(RegAffCI->getArgOperand(4)->getType()->isPointerTy()); +} + +TEST_F(OpenMPIRBuilderTest, CreateIteratorLoop) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func.unterminated"); + IRBuilder<> Builder(BB); + + auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error { + Builder.restoreIP(BodyIP); + Builder.CreateAdd(LinearIV, Builder.getInt64(1)); + return Error::success(); + }; + + OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); + ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP, + OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4), + BodyGenCB, "iterator")); + + Builder.restoreIP(AfterIP); + Builder.CreateRetVoid(); + + EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont"); + EXPECT_FALSE(verifyFunction(*F, &errs())); + } + + { + Function *F2 = + Function::Create(F->getFunctionType(), Function::ExternalLinkage, + "func.terminated", M.get()); + BasicBlock *BB2 = BasicBlock::Create(Ctx, "", F2); + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB2); + + BasicBlock *OrigSucc = + BasicBlock::Create(Builder.getContext(), "orig.succ", F2); + Builder.CreateBr(OrigSucc); + + auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error { + Builder.restoreIP(BodyIP); + Builder.CreateAdd(LinearIV, Builder.getInt64(1)); + return Error::success(); + }; + + OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB2, BB2->end()), + DL); + ASSERT_EXPECTED_INIT(InsertPointTy, AfterIP, + OMPBuilder.createIteratorLoop(Loc, Builder.getInt64(4), + BodyGenCB, "iterator")); + + EXPECT_EQ(AfterIP.getBlock()->getName(), "omp.it.cont"); + auto *ContBr = dyn_cast(AfterIP.getBlock()->getTerminator()); + ASSERT_NE(ContBr, nullptr); + ASSERT_FALSE(ContBr->isConditional()); + EXPECT_EQ(ContBr->getSuccessor(0), OrigSucc); + + Builder.SetInsertPoint(OrigSucc); + Builder.CreateRetVoid(); + + EXPECT_FALSE(verifyFunction(*F2, &errs())); + } + + EXPECT_FALSE(verifyModule(*M, &errs())); +} + +TEST_F(OpenMPIRBuilderTest, CreateIteratorLoopInvalidLoopBody) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + auto BodyGenCB = [&](InsertPointTy BodyIP, Value *LinearIV) -> Error { + Builder.restoreIP(BodyIP); + Builder.CreateAdd(LinearIV, Builder.getInt64(1)); + BasicBlock *BadDest = + BasicBlock::Create(Builder.getContext(), "iterator.bad.dest", F); + Builder.CreateBr(BadDest); + Builder.SetInsertPoint(BadDest); + Builder.CreateUnreachable(); + return Error::success(); + }; + + OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createIteratorLoop( + Loc, Builder.getInt64(4), BodyGenCB, "iterator"); + ASSERT_TRUE(errorToBool(AfterIP.takeError())); +} + TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp index 7fdc23adc857..d90912f9f686 100644 --- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp +++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp @@ -154,6 +154,9 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter, // discarded on lowering to LLVM-IR from the OpenMP dialect. converter.addConversion( [&](omp::MapBoundsType type) -> Type { return type; }); + converter.addConversion( + [&](omp::AffinityEntryType type) -> Type { return type; }); + converter.addConversion([&](omp::IteratedType type) -> Type { return type; }); // Add conversions for all OpenMP operations. addOpenMPOpConversions< diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e0559e850faf..7cab929d583c 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/OperationSupport.h" #include "mlir/IR/SymbolTable.h" @@ -4796,6 +4797,30 @@ LogicalResult IteratorOp::verify() { if (!iteratedTy) return emitOpError() << "result must be omp.iterated"; + for (auto [lb, ub, step] : llvm::zip_equal( + getLoopLowerBounds(), getLoopUpperBounds(), getLoopSteps())) { + if (matchPattern(step, m_Zero())) + return emitOpError() << "loop step must not be zero"; + + IntegerAttr lbAttr; + IntegerAttr ubAttr; + IntegerAttr stepAttr; + if (!matchPattern(lb, m_Constant(&lbAttr)) || + !matchPattern(ub, m_Constant(&ubAttr)) || + !matchPattern(step, m_Constant(&stepAttr))) + continue; + + const APInt &lbVal = lbAttr.getValue(); + const APInt &ubVal = ubAttr.getValue(); + const APInt &stepVal = stepAttr.getValue(); + if (stepVal.isStrictlyPositive() && lbVal.sgt(ubVal)) + return emitOpError() << "positive loop step requires lower bound to be " + "less than or equal to upper bound"; + if (stepVal.isNegative() && lbVal.slt(ubVal)) + return emitOpError() << "negative loop step requires lower bound to be " + "greater than or equal to upper bound"; + } + Block &b = getRegion().front(); auto yield = llvm::dyn_cast(b.getTerminator()); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 9d7c0003c233..37b1a37c2e1a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -321,10 +321,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { << " operation"; }; - auto checkAffinity = [&todo](auto op, LogicalResult &result) { - if (!op.getAffinityVars().empty()) - result = todo("affinity"); - }; auto checkAllocate = [&todo](auto op, LogicalResult &result) { if (!op.getAllocateVars().empty() || !op.getAllocatorVars().empty()) result = todo("allocate"); @@ -408,7 +404,6 @@ static LogicalResult checkImplementationStatus(Operation &op) { checkThreadLimit(op, result); }) .Case([&](omp::TaskOp op) { - checkAffinity(op, result); checkAllocate(op, result); checkInReduction(op, result); }) @@ -2233,6 +2228,81 @@ private: /// The type of the structure llvm::Type *structTy = nullptr; }; + +/// IteratorInfo extracts and prepares loop bounds information from an +/// mlir::omp::IteratorOp for lowering to LLVM IR. +/// +/// It computes the per-dimension trip counts and the total linearized trip +/// count, casted to i64. These are used to build a canonical loop and to +/// reconstruct the physical induction variables inside the loop body. +class IteratorInfo { +private: + llvm::SmallVector lowerBounds; + llvm::SmallVector upperBounds; + llvm::SmallVector steps; + llvm::SmallVector trips; + unsigned dims; + llvm::Value *totalTrips; + + llvm::Value *lookUpAsI64(mlir::Value val, const LLVM::ModuleTranslation &mt, + llvm::IRBuilderBase &builder) { + llvm::Value *v = mt.lookupValue(val); + if (!v) + return nullptr; + if (v->getType()->isIntegerTy(64)) + return v; + if (v->getType()->isIntegerTy()) + return builder.CreateSExtOrTrunc(v, builder.getInt64Ty()); + return nullptr; + } + +public: + IteratorInfo(mlir::omp::IteratorOp itersOp, + mlir::LLVM::ModuleTranslation &moduleTranslation, + llvm::IRBuilderBase &builder) { + dims = itersOp.getLoopLowerBounds().size(); + lowerBounds.resize(dims); + upperBounds.resize(dims); + steps.resize(dims); + trips.resize(dims); + + for (unsigned d = 0; d < dims; ++d) { + llvm::Value *lb = lookUpAsI64(itersOp.getLoopLowerBounds()[d], + moduleTranslation, builder); + llvm::Value *ub = lookUpAsI64(itersOp.getLoopUpperBounds()[d], + moduleTranslation, builder); + llvm::Value *st = + lookUpAsI64(itersOp.getLoopSteps()[d], moduleTranslation, builder); + assert(lb && ub && st && + "Expect lowerBounds, upperBounds, and steps in IteratorOp"); + assert((!llvm::isa(st) || + !llvm::cast(st)->isZero()) && + "Expect non-zero step in IteratorOp"); + + lowerBounds[d] = lb; + upperBounds[d] = ub; + steps[d] = st; + + // trips = ((ub - lb) / step) + 1 (inclusive ub, assume positive step) + llvm::Value *diff = builder.CreateSub(ub, lb); + llvm::Value *div = builder.CreateSDiv(diff, st); + trips[d] = builder.CreateAdd( + div, llvm::ConstantInt::get(builder.getInt64Ty(), 1)); + } + + totalTrips = llvm::ConstantInt::get(builder.getInt64Ty(), 1); + for (unsigned d = 0; d < dims; ++d) + totalTrips = builder.CreateMul(totalTrips, trips[d]); + } + + unsigned getDims() const { return dims; } + llvm::ArrayRef getLowerBounds() const { return lowerBounds; } + llvm::ArrayRef getUpperBounds() const { return upperBounds; } + llvm::ArrayRef getSteps() const { return steps; } + llvm::ArrayRef getTrips() const { return trips; } + llvm::Value *getTotalTrips() const { return totalTrips; } +}; + } // namespace void TaskContextStructManager::generateTaskContextStruct() { @@ -2307,6 +2377,235 @@ void TaskContextStructManager::freeStructPtr() { builder.CreateFree(structPtr); } +static void storeAffinityEntry(llvm::IRBuilderBase &builder, + llvm::OpenMPIRBuilder &ompBuilder, + llvm::Value *affinityList, llvm::Value *index, + llvm::Value *addr, llvm::Value *len) { + llvm::StructType *kmpTaskAffinityInfoTy = + ompBuilder.getKmpTaskAffinityInfoTy(); + llvm::Value *entry = builder.CreateInBoundsGEP( + kmpTaskAffinityInfoTy, affinityList, index, "omp.affinity.entry"); + + addr = builder.CreatePtrToInt(addr, kmpTaskAffinityInfoTy->getElementType(0)); + len = builder.CreateIntCast(len, kmpTaskAffinityInfoTy->getElementType(1), + /*isSigned=*/false); + llvm::Value *flags = builder.getInt32(0); + + builder.CreateStore(addr, + builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 0)); + builder.CreateStore(len, + builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 1)); + builder.CreateStore(flags, + builder.CreateStructGEP(kmpTaskAffinityInfoTy, entry, 2)); +} + +static void fillAffinityLocators(Operation::operand_range affinityVars, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::Value *affinityList) { + for (auto [i, affinityVar] : llvm::enumerate(affinityVars)) { + auto entryOp = affinityVar.getDefiningOp(); + assert(entryOp && "affinity item must be omp.affinity_entry"); + + llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr()); + llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen()); + assert(addr && len && "expect affinity addr and len to be non-null"); + storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(), + affinityList, builder.getInt64(i), addr, len); + } +} + +static mlir::LogicalResult +convertIteratorRegion(llvm::Value *linearIV, IteratorInfo &iterInfo, + mlir::Block &iteratorRegionBlock, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::Value *tmp = linearIV; + for (int d = (int)iterInfo.getDims() - 1; d >= 0; --d) { + llvm::Value *trip = iterInfo.getTrips()[d]; + // idx_d = tmp % trip_d + llvm::Value *idx = builder.CreateURem(tmp, trip); + // tmp = tmp / trip_d + tmp = builder.CreateUDiv(tmp, trip); + + // physIV_d = lb_d + idx_d * step_d + llvm::Value *physIV = builder.CreateAdd( + iterInfo.getLowerBounds()[d], + builder.CreateMul(idx, iterInfo.getSteps()[d]), "omp.it.phys_iv"); + + moduleTranslation.mapValue(iteratorRegionBlock.getArgument(d), physIV); + } + + // Translate the iterator region into the loop body. + moduleTranslation.mapBlock(&iteratorRegionBlock, builder.GetInsertBlock()); + if (mlir::failed(moduleTranslation.convertBlock(iteratorRegionBlock, + /*ignoreArguments=*/true, + builder))) { + return mlir::failure(); + } + return mlir::success(); +} + +static mlir::LogicalResult +fillAffinityIteratorLoop(mlir::omp::IteratorOp itersOp, + llvm::IRBuilderBase &builder, + mlir::LLVM::ModuleTranslation &moduleTranslation, + llvm::Value *affinityList, IteratorInfo &iterInfo) { + mlir::Region &itersRegion = itersOp.getRegion(); + mlir::Block &iteratorRegionBlock = itersRegion.front(); + + llvm::OpenMPIRBuilder::LocationDescription loc(builder); + + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy bodyIP, + llvm::Value *linearIV) -> llvm::Error { + llvm::IRBuilderBase::InsertPointGuard guard(builder); + builder.restoreIP(bodyIP); + + if (failed(convertIteratorRegion(linearIV, iterInfo, iteratorRegionBlock, + builder, moduleTranslation))) { + return llvm::make_error( + "failed to convert iterator region", llvm::inconvertibleErrorCode()); + } + + // Extract affinity entry from omp.yield and store into list[linearIV]. + auto yield = + mlir::dyn_cast(iteratorRegionBlock.getTerminator()); + assert(yield && yield.getResults().size() == 1 && + "expect omp.yield in iterator region to have one result"); + auto entryOp = + yield.getResults()[0].getDefiningOp(); + assert(entryOp && "expect yield generate an affinity entry"); + + llvm::Value *addr = moduleTranslation.lookupValue(entryOp.getAddr()); + llvm::Value *len = moduleTranslation.lookupValue(entryOp.getLen()); + storeAffinityEntry(builder, *moduleTranslation.getOpenMPBuilder(), + affinityList, linearIV, addr, len); + + // Iterator-region block/value mappings are temporary for this conversion, + // clear them to avoid stale entries in ModuleTranslation. + moduleTranslation.forgetMapping(itersRegion); + + return llvm::Error::success(); + }; + + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createIteratorLoop( + loc, iterInfo.getTotalTrips(), bodyGen, + /*Name=*/"iterator"); + if (failed(handleError(afterIP, *itersOp))) + return failure(); + + builder.restoreIP(*afterIP); + + return mlir::success(); +} + +static mlir::LogicalResult +buildAffinityData(mlir::omp::TaskOp &taskOp, llvm::IRBuilderBase &builder, + mlir::LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::AffinityData &ad) { + + if (taskOp.getAffinityVars().empty() && taskOp.getIterated().empty()) { + ad.Count = nullptr; + ad.Info = nullptr; + return mlir::success(); + } + + llvm::SmallVector ads; + llvm::StructType *kmpTaskAffinityInfoTy = + moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy(); + + auto allocateAffinityList = [&](llvm::Value *count) -> llvm::Value * { + llvm::IRBuilderBase::InsertPointGuard guard(builder); + if (llvm::isa(count) || llvm::isa(count)) + builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + return builder.CreateAlloca(kmpTaskAffinityInfoTy, count, + "omp.affinity_list"); + }; + + auto createAffinity = + [&](llvm::Value *count, + llvm::Value *info) -> llvm::OpenMPIRBuilder::AffinityData { + llvm::OpenMPIRBuilder::AffinityData ad{}; + ad.Count = builder.CreateTrunc(count, builder.getInt32Ty()); + ad.Info = + builder.CreatePointerBitCastOrAddrSpaceCast(info, builder.getPtrTy(0)); + return ad; + }; + + if (!taskOp.getAffinityVars().empty()) { + llvm::Value *count = llvm::ConstantInt::get( + builder.getInt64Ty(), taskOp.getAffinityVars().size()); + llvm::Value *list = allocateAffinityList(count); + fillAffinityLocators(taskOp.getAffinityVars(), builder, moduleTranslation, + list); + ads.emplace_back(createAffinity(count, list)); + } + + if (!taskOp.getIterated().empty()) { + for (auto [i, iter] : llvm::enumerate(taskOp.getIterated())) { + auto itersOp = iter.getDefiningOp(); + assert(itersOp && "iterated value must be defined by omp.iterator"); + IteratorInfo iterInfo(itersOp, moduleTranslation, builder); + llvm::Value *affList = allocateAffinityList(iterInfo.getTotalTrips()); + if (failed(fillAffinityIteratorLoop(itersOp, builder, moduleTranslation, + affList, iterInfo))) + return llvm::failure(); + ads.emplace_back(createAffinity(iterInfo.getTotalTrips(), affList)); + } + } + + llvm::Value *totalAffinityCount = builder.getInt32(0); + for (const auto &affinity : ads) + totalAffinityCount = builder.CreateAdd( + totalAffinityCount, + builder.CreateIntCast(affinity.Count, builder.getInt32Ty(), + /*isSigned=*/false)); + + llvm::Value *affinityInfo = ads.front().Info; + if (ads.size() > 1) { + llvm::StructType *kmpTaskAffinityInfoTy = + moduleTranslation.getOpenMPBuilder()->getKmpTaskAffinityInfoTy(); + llvm::Value *affinityInfoElemSize = builder.getInt64( + moduleTranslation.getLLVMModule()->getDataLayout().getTypeAllocSize( + kmpTaskAffinityInfoTy)); + + llvm::Value *packedAffinityInfo = allocateAffinityList(totalAffinityCount); + llvm::Value *packedAffinityInfoOffset = builder.getInt32(0); + for (const auto &affinity : ads) { + llvm::Value *affinityCount = builder.CreateIntCast( + affinity.Count, builder.getInt32Ty(), /*isSigned=*/false); + llvm::Value *affinityCountInt64 = builder.CreateIntCast( + affinityCount, builder.getInt64Ty(), /*isSigned=*/false); + llvm::Value *affinityInfoSize = + builder.CreateMul(affinityCountInt64, affinityInfoElemSize); + + llvm::Value *packedAffinityInfoIndex = builder.CreateIntCast( + packedAffinityInfoOffset, kmpTaskAffinityInfoTy->getElementType(0), + /*isSigned=*/false); + packedAffinityInfoIndex = builder.CreateInBoundsGEP( + kmpTaskAffinityInfoTy, packedAffinityInfo, packedAffinityInfoIndex); + + builder.CreateMemCpy( + packedAffinityInfoIndex, llvm::Align(1), + builder.CreatePointerBitCastOrAddrSpaceCast( + affinity.Info, builder.getPtrTy(packedAffinityInfoIndex->getType() + ->getPointerAddressSpace())), + llvm::Align(1), affinityInfoSize); + + packedAffinityInfoOffset = + builder.CreateAdd(packedAffinityInfoOffset, affinityCount); + } + + affinityInfo = packedAffinityInfo; + } + + ad.Count = totalAffinityCount; + ad.Info = affinityInfo; + + return mlir::success(); +} + /// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, @@ -2421,6 +2720,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, taskOp.getPrivateNeedsBarrier()))) return llvm::failure(); + llvm::OpenMPIRBuilder::AffinityData ad; + if (failed(buildAffinityData(taskOp, builder, moduleTranslation, ad))) + return llvm::failure(); + // Set up for call to createTask() builder.SetInsertPoint(taskStartBlock); @@ -2524,7 +2827,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, moduleTranslation.getOpenMPBuilder()->createTask( ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), moduleTranslation.lookupValue(taskOp.getFinal()), - moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, + moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, ad, taskOp.getMergeable(), moduleTranslation.lookupValue(taskOp.getEventHandle()), moduleTranslation.lookupValue(taskOp.getPriority())); @@ -7321,13 +7624,13 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::LoopNestOp) { return convertOmpLoopNest(*op, builder, moduleTranslation); }) - .Case( - [&](auto op) { - // No-op, should be handled by relevant owning operations e.g. - // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp - // etc. and then discarded - return success(); - }) + .Case([&](auto op) { + // No-op, should be handled by relevant owning operations e.g. + // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp + // etc. and then discarded + return success(); + }) .Case([&](omp::NewCliOp op) { // Meta-operation: Doesn't do anything by itself, but used to // identify a loop. diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index bc508d66fbd5..cbe18b9b882d 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -3180,6 +3180,48 @@ func.func @iterator_bad_result_type(%lb : index, %ub : index, %st : index) { // ----- +func.func @iterator_zero_step(%s2 : !llvm.struct<(ptr, i64)>) { + %lb = arith.constant 1 : index + %ub = arith.constant 4 : index + %st = arith.constant 0 : index + + // expected-error@+1 {{loop step must not be zero}} + %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) { + omp.yield(%s2 : !llvm.struct<(ptr, i64)>) + } -> !omp.iterated> + return +} + +// ----- + +func.func @iterator_positive_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) { + %lb = arith.constant 1000 : index + %ub = arith.constant -1 : index + %st = arith.constant 10 : index + + // expected-error@+1 {{positive loop step requires lower bound to be less than or equal to upper bound}} + %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) { + omp.yield(%s2 : !llvm.struct<(ptr, i64)>) + } -> !omp.iterated> + return +} + +// ----- + +func.func @iterator_negative_step_wrong_direction(%s2 : !llvm.struct<(ptr, i64)>) { + %lb = arith.constant -1000 : index + %ub = arith.constant 4 : index + %st = arith.constant -999 : index + + // expected-error@+1 {{negative loop step requires lower bound to be greater than or equal to upper bound}} + %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) { + omp.yield(%s2 : !llvm.struct<(ptr, i64)>) + } -> !omp.iterated> + return +} + +// ----- + func.func @iterator_missing_yield(%lb : index, %ub : index, %st : index) { // expected-error@+1 {{region must be terminated by omp.yield}} %0 = omp.iterator(%i: index) = (%lb to %ub step %st) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index b908874c2010..ba329cc67bb1 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -3601,6 +3601,24 @@ func.func @omp_iterator_2d(%s2 : !llvm.struct<(ptr, i64)>) -> () { return } +// CHECK-LABEL: func.func @omp_iterator_negative_step +func.func @omp_iterator_negative_step(%s2 : !llvm.struct<(ptr, i64)>) -> () { + // CHECK: %[[LB:.*]] = arith.constant 4 : index + // CHECK: %[[UB:.*]] = arith.constant 1 : index + // CHECK: %[[ST:.*]] = arith.constant -1 : index + // CHECK: %[[IT:.*]] = omp.iterator(%[[IV:.*]]: index) = (%[[LB]] to %[[UB]] step %[[ST]]) { + // CHECK: omp.yield(%{{.*}} : !llvm.struct<(ptr, i64)>) + // CHECK: } -> !omp.iterated> + %lb = arith.constant 4 : index + %ub = arith.constant 1 : index + %st = arith.constant -1 : index + + %0 = omp.iterator(%iv: index) = (%lb to %ub step %st) { + omp.yield(%s2 : !llvm.struct<(ptr, i64)>) + } -> !omp.iterated> + return +} + // CHECK-LABEL: func.func @omp_task_affinity_iterator_1d func.func @omp_task_affinity_iterator_1d(%lb : index, %ub : index, %step : index, %addr : !llvm.ptr, %len : i64) -> () { diff --git a/mlir/test/Target/LLVMIR/openmp-iterator.mlir b/mlir/test/Target/LLVMIR/openmp-iterator.mlir new file mode 100644 index 000000000000..faadfbdc7202 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-iterator.mlir @@ -0,0 +1,295 @@ +// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s + +llvm.func @task_affinity_iterator_1d(%arr: !llvm.ptr {llvm.nocapture}) { + %c1 = llvm.mlir.constant(1 : i64) : i64 + %c4 = llvm.mlir.constant(4 : i64) : i64 + %c6 = llvm.mlir.constant(6 : i64) : i64 + %len = llvm.mlir.constant(4 : i64) : i64 + + omp.parallel { + omp.single { + %it = omp.iterator(%i: i64, %j: i64) = + (%c1 to %c4 step %c1, %c1 to %c6 step %c1) { + %entry = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry : !omp.affinity_entry_ty) + } -> !omp.iterated> + + omp.task affinity(%it : !omp.iterated>) { + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_iterator_1d + +// Preheader -> Header +// CHECK: omp_iterator.preheader: +// CHECK: br label %omp_iterator.header +// +// Header has the IV phi and branches to cond +// CHECK: omp_iterator.header: +// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ] +// CHECK: br label %omp_iterator.cond +// +// Cond: IV < 24 and branches to body or exit +// CHECK: omp_iterator.cond: +// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 24 +// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit +// +// Exit -> After -> continuation +// CHECK: omp_iterator.exit: +// CHECK: br label %omp_iterator.after +// CHECK: omp_iterator.after: +// CHECK: br label %omp.it.cont +// +// Body: store into affinity_list[IV] then branch to inc +// CHECK: omp_iterator.body: +// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]] +// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64 +// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0 +// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]] +// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1 +// CHECK: store i64 4, ptr [[LENGEP]] +// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2 +// CHECK: store i32 0, ptr [[FLAGGEP]] +// CHECK: br label %omp_iterator.inc +// +// CHECK: omp_iterator.inc: +// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1 +// CHECK: br label %omp_iterator.header + +llvm.func @task_affinity_iterator_3d(%arr: !llvm.ptr {llvm.nocapture}) { + %c1 = llvm.mlir.constant(1 : i64) : i64 + %c2 = llvm.mlir.constant(2 : i64) : i64 + %c4 = llvm.mlir.constant(4 : i64) : i64 + %c6 = llvm.mlir.constant(6 : i64) : i64 + %len = llvm.mlir.constant(4 : i64) : i64 + + omp.parallel { + omp.single { + // 3-D iterator: i=1..4, j=1..6, k=1..2 => total trips = 48 + %it = omp.iterator(%i: i64, %j: i64, %k: i64) = + (%c1 to %c4 step %c1, %c1 to %c6 step %c1, %c1 to %c2 step %c1) { + %entry = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry : !omp.affinity_entry_ty) + } -> !omp.iterated> + + omp.task affinity(%it : !omp.iterated>) { + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_iterator_3d + +// Preheader -> Header +// CHECK: omp_iterator.preheader: +// CHECK: br label %omp_iterator.header +// +// Header has the IV phi and branches to cond +// CHECK: omp_iterator.header: +// CHECK: [[IV:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT:%.*]], %omp_iterator.inc ] +// CHECK: br label %omp_iterator.cond +// +// Cond: IV < 48 and branches to body or exit +// CHECK: omp_iterator.cond: +// CHECK: [[CMP:%.*]] = icmp ult i64 [[IV]], 48 +// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit +// +// Exit -> After -> continuation +// CHECK: omp_iterator.exit: +// CHECK: br label %omp_iterator.after +// CHECK: omp_iterator.after: +// CHECK: br label %omp.it.cont +// +// Body: store into affinity_list[IV] then branch to inc +// CHECK: omp_iterator.body: +// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr %{{.*affinity_list.*}}, i64 [[IV]] +// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr %loadgep_ to i64 +// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0 +// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]] +// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1 +// CHECK: store i64 4, ptr [[LENGEP]] +// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2 +// CHECK: store i32 0, ptr [[FLAGGEP]] +// CHECK: br label %omp_iterator.inc +// +// CHECK: omp_iterator.inc: +// CHECK: [[NEXT]] = add nuw i64 [[IV]], 1 +// CHECK: br label %omp_iterator.header + +llvm.func @task_affinity_iterator_multiple(%arr: !llvm.ptr {llvm.nocapture}) { + %c1 = llvm.mlir.constant(1 : i64) : i64 + %c3 = llvm.mlir.constant(3 : i64) : i64 + %c4 = llvm.mlir.constant(4 : i64) : i64 + %c6 = llvm.mlir.constant(6 : i64) : i64 + %len = llvm.mlir.constant(4 : i64) : i64 + + omp.parallel { + omp.single { + // First iterator: 2-D (4 * 6 = 24) + %it0 = omp.iterator(%i: i64, %j: i64) = + (%c1 to %c4 step %c1, %c1 to %c6 step %c1) { + %entry0 = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry0 : !omp.affinity_entry_ty) + } -> !omp.iterated> + + // second iterator: 1-D (3) + %it1 = omp.iterator(%k: i64) = (%c1 to %c3 step %c1) { + %entry1 = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry1 : !omp.affinity_entry_ty) + } -> !omp.iterated> + + // Multiple iterators in a single affinity clause. + omp.task affinity(%it0: !omp.iterated>, + %it1: !omp.iterated>) { + omp.terminator + } + + omp.terminator + } + omp.terminator + } + + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_iterator_multiple +// CHECK-DAG: [[AFFLIST0:%.*]] = alloca { i64, i64, i32 }, i64 24, align 8 +// CHECK-DAG: [[AFFLIST1:%.*]] = alloca { i64, i64, i32 }, i64 3, align 8 +// CHECK-DAG: [[AFFINITY_LIST:%.*]] = alloca { i64, i64, i32 }, i32 27, align 8 + +// First iterator header +// CHECK: omp_iterator.preheader: +// CHECK: br label %[[HEADER0:.+]] +// CHECK: [[HEADER0]]: +// CHECK: [[IV0:%.*]] = phi i64 [ 0, %omp_iterator.preheader ], [ [[NEXT0:%.*]], %[[INC0:.+]] ] +// CHECK: br label %[[COND0:.+]] +// CHECK: [[COND0]]: +// CHECK: [[CMP0:%.*]] = icmp ult i64 [[IV0]], 24 +// CHECK: br i1 [[CMP0]], label %[[BODY0:.+]], label %omp_iterator.exit + +// Second iterator header +// CHECK: omp_iterator.preheader{{.*}}: +// CHECK: [[HEADER1:.+]]: +// CHECK: [[IV1:%.*]] = phi i64 [ 0, %omp_iterator.preheader{{.*}} ], [ [[NEXT1:%.*]], %[[INC1:.+]] ] +// CHECK: br label %omp_iterator.cond{{.*}} +// CHECK: omp_iterator.cond{{.*}}: +// CHECK: [[CMP1:%.*]] = icmp ult i64 [[IV1]], 3 +// CHECK: br i1 [[CMP1]], label %[[BODY1:.+]], label %omp_iterator.exit{{.*}} + +// CHECK: [[AFFINITY_LIST_1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 0 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_1]], ptr align 1 [[AFFLIST0]], i64 480, i1 false) +// CHECK: [[AFFINITY_LIST_2:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFINITY_LIST]], i64 24 +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AFFINITY_LIST_2]], ptr align 1 [[AFFLIST1]], i64 60, i1 false) +// CHECK: codeRepl: +// CHECK: call ptr @__kmpc_omp_task_alloc +// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 27{{.*}}ptr [[AFFINITY_LIST]] +// CHECK: call i32 @__kmpc_omp_task + +// Second iterator body +// CHECK: [[BODY1]]: +// CHECK: [[ENTRY1:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST1]] +// CHECK: [[ADDR1:%.*]] = ptrtoint ptr %loadgep_ to i64 +// CHECK: [[ADDRGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 0 +// CHECK: store i64 [[ADDR1]], ptr [[ADDRGEP1]] +// CHECK: [[LENGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 1 +// CHECK: store i64 4, ptr [[LENGEP1]] +// CHECK: [[FLAGGEP1:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY1]], i32 0, i32 2 +// CHECK: store i32 0, ptr [[FLAGGEP1]] +// CHECK: br label %[[INC1]] +// CHECK: [[INC1]]: +// CHECK: [[NEXT1]] = add nuw i64 [[IV1]], 1 +// CHECK: br label %[[HEADER1]] + +// First iterator body +// CHECK: [[BODY0]]: +// CHECK: [[ENTRY0:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST0]], i64 [[IV0]] +// CHECK: [[ADDR0:%.*]] = ptrtoint ptr %loadgep_ to i64 +// CHECK: [[ADDRGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 0 +// CHECK: store i64 [[ADDR0]], ptr [[ADDRGEP0]] +// CHECK: [[LENGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 1 +// CHECK: store i64 4, ptr [[LENGEP0]] +// CHECK: [[FLAGGEP0:%.*]] = getelementptr inbounds{{.*}} { i64, i64, i32 }, ptr [[ENTRY0]], i32 0, i32 2 +// CHECK: store i32 0, ptr [[FLAGGEP0]] +// CHECK: br label %[[INC0]] +// CHECK: [[INC0]]: +// CHECK: [[NEXT0]] = add nuw i64 [[IV0]], 1 +// CHECK: br label %[[HEADER0]] + +// Makes sure affinity list only created after dynamic count +llvm.func @task_affinity_iterator_dynamic_tripcount( + %arr: !llvm.ptr {llvm.nocapture}, %lb: i64, %ub: i64, %step: i64, + %len: i64) { + omp.parallel { + omp.single { + %it = omp.iterator(%i: i64) = (%lb to %ub step %step) { + %entry = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry : !omp.affinity_entry_ty) + } -> !omp.iterated> + + omp.task affinity(%it : !omp.iterated>) { + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_iterator_dynamic_tripcount +// CHECK: [[DIFF:%.*]] = sub i64 {{.*}}, {{.*}} +// CHECK: [[DIV:%.*]] = sdiv i64 [[DIFF]], {{.*}} +// CHECK: [[TRIPS:%.*]] = add i64 [[DIV]], 1 +// CHECK: [[SCALED:%.*]] = mul i64 1, [[TRIPS]] +// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 [[SCALED]] + +llvm.func @task_affinity_iterator_negative_step(%arr: !llvm.ptr {llvm.nocapture}) { + %c4 = llvm.mlir.constant(4 : i64) : i64 + %c1 = llvm.mlir.constant(1 : i64) : i64 + %cn1 = llvm.mlir.constant(-1 : i64) : i64 + + omp.parallel { + omp.single { + %it = omp.iterator(%i: i64) = (%c4 to %c1 step %cn1) { + %entry = omp.affinity_entry %arr, %i + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + omp.yield(%entry : !omp.affinity_entry_ty) + } -> !omp.iterated> + + omp.task affinity(%it : !omp.iterated>) { + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_iterator_negative_step +// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 4, align 8 +// CHECK: omp_iterator.cond: +// CHECK: [[CMP:%.*]] = icmp ult i64 %omp_iterator.iv, 4 +// CHECK: br i1 [[CMP]], label %omp_iterator.body, label %omp_iterator.exit +// CHECK: omp_iterator.body: +// CHECK: [[IDX:%.*]] = urem i64 %omp_iterator.iv, 4 +// CHECK: [[STEPMUL:%.*]] = mul i64 [[IDX]], -1 +// CHECK: [[PHYSIV:%.*]] = add i64 4, [[STEPMUL]] +// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 %omp_iterator.iv +// CHECK: [[LENPTR:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1 +// CHECK: store i64 [[PHYSIV]], ptr [[LENPTR]] diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index fcb937dbc186..c5cdecd09177 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -3589,3 +3589,37 @@ llvm.func @nested_task_with_deps() { // CHECK: ret void // CHECK: } + +llvm.func @task_affinity_plain(%arr: !llvm.ptr {llvm.nocapture}) { + %len = llvm.mlir.constant(4 : i64) : i64 + + omp.parallel { + omp.single { + %ae = omp.affinity_entry %arr, %len + : (!llvm.ptr, i64) -> !omp.affinity_entry_ty + + omp.task affinity(%ae : !omp.affinity_entry_ty) { + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define internal void @task_affinity_plain +// CHECK: [[BASE:%.*]] = load ptr, ptr %gep_, align 8 +// CHECK: [[AFFLIST:%.*]] = alloca { i64, i64, i32 }, i64 1, align 8 +// CHECK: [[ENTRY:%.*]] = getelementptr inbounds { i64, i64, i32 }, ptr [[AFFLIST]], i64 0 +// addr +// CHECK: [[ADDRI64:%.*]] = ptrtoint ptr [[BASE]] to i64 +// CHECK: [[ADDRGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 0 +// CHECK: store i64 [[ADDRI64]], ptr [[ADDRGEP]] +// len +// CHECK: [[LENGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 1 +// CHECK: store i64 4, ptr [[LENGEP]] +// flags is always 0 +// CHECK: [[FLAGGEP:%.*]] = getelementptr inbounds nuw { i64, i64, i32 }, ptr [[ENTRY]], i32 0, i32 2 +// CHECK: store i32 0, ptr [[FLAGGEP]] +// CHECK: call i32 @__kmpc_omp_reg_task_with_affinity{{.*}}i32 1, ptr [[AFFLIST]] diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 2500d546fcf4..8fb66cb4dd0e 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -462,15 +462,3 @@ llvm.func @wsloop_order(%lb : i32, %ub : i32, %step : i32) { } llvm.return } - -// ----- -llvm.func @task_affinity(%ptr : !llvm.ptr, %len : i64) { - // expected-error@below {{not yet implemented: omp.affinity_entry}} - // expected-error@below {{LLVM Translation failed for operation: omp.affinity_entry}} - %ae = omp.affinity_entry %ptr, %len - : (!llvm.ptr, i64) -> !omp.affinity_entry_ty - omp.task affinity(%ae : !omp.affinity_entry_ty) { - omp.terminator - } - llvm.return -} diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 37836fb45753..ae2d617c3ea4 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1505,6 +1505,18 @@ kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list) { + if (naffins > 0) + KMP_DEBUG_ASSERT(affin_list != NULL); + + for (kmp_int32 i = 0; i < naffins; ++i) { + KA_TRACE(30, ("__kmpc_omp_reg_task_with_affinity: T#%d aff[%d] " + "base_addr=0x%llx len=%zu flags={%d,%d,%d}\n", + gtid, i, (unsigned long long)affin_list[i].base_addr, + affin_list[i].len, (int)affin_list[i].flags.flag1, + (int)affin_list[i].flags.flag2, + (int)affin_list[i].flags.reserved)); + } + return 0; }