diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index 9fa403e68e13..d1c4ffe4c6f3 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -873,6 +873,8 @@ the coroutine destroy function. Otherwise it is replaced with an indirect call based on the function pointer for the destroy function stored in the coroutine frame. Destroying a coroutine that is not suspended leads to undefined behavior. +This intrinsic implies `coro.dead`. + .. _coro.resume: 'llvm.coro.resume' Intrinsic @@ -1169,6 +1171,48 @@ Example (standard deallocation functions): call void @free(ptr %mem) ret void +.. _coro.dead: + +'llvm.coro.dead' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:: + + declare void @llvm.coro.dead(ptr ) + +Overview: +""""""""" + +The 'llvm.coro.dead' intrinsic is an optimization hint to help Heap Allocation eLision Optimization (HALO) +mark the end of lifetime of the coroutine frame. + +Arguments: +"""""""""" + +The argument is a pointer to the coroutine frame. This should be the same +pointer that was returned by prior `coro.begin` call. + +Semantics: +"""""""""" + +A frontend can delegate this intrinsic to indicate that the coroutine frame is dead, allowing +coroutines that are not explicitly destroyed via `coro.destroy` to be elided. + +Example: +""""""""""""""""""""""""""""""""""""""" + +.. code-block:: llvm + + cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %frame) + %mem_not_null = icmp ne ptr %mem, null + br i1 %mem_not_null, label %if.then, label %if.end + if.then: + call void @CustomFree(ptr %mem) + br label %if.end + if.end: + call void @llvm.coro.dead(ptr %frame) + ret void + .. _coro.alloc: 'llvm.coro.alloc' Intrinsic diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 6d4b9bd4415a..b51ac732914b 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1849,6 +1849,7 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_coro_dead : Intrinsic<[], [llvm_ptr_ty], [IntrNoMem]>; def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty, llvm_token_ty], []>; def int_coro_end_results : Intrinsic<[llvm_token_ty], [llvm_vararg_ty]>; def int_coro_end_async diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h index 38daf25cacd8..cfdf831709ad 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroInstr.h @@ -456,6 +456,20 @@ public: } }; +/// This represents the llvm.coro.dead instruction. +class CoroDeadInst : public IntrinsicInst { +public: + Value *getFrame() const { return getArgOperand(0); } + + // Methods to support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_dead; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + /// This class represents the llvm.coro.begin or llvm.coro.begin.custom.abi /// instructions. class CoroBeginInst : public IntrinsicInst { diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index 01b5a0a24ced..e06d913b794f 100644 --- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -108,6 +108,8 @@ bool Lowerer::lower(Function &F) { case Intrinsic::coro_free: II->replaceAllUsesWith(II->getArgOperand(1)); break; + case Intrinsic::coro_dead: + break; case Intrinsic::coro_alloc: II->replaceAllUsesWith(ConstantInt::getTrue(Context)); break; @@ -258,12 +260,12 @@ void NoopCoroElider::eraseFromWorklist(Instruction *I) { static bool declaresCoroCleanupIntrinsics(const Module &M) { return coro::declaresIntrinsics( - M, - {Intrinsic::coro_alloc, Intrinsic::coro_begin, Intrinsic::coro_subfn_addr, - Intrinsic::coro_free, Intrinsic::coro_id, Intrinsic::coro_id_retcon, - Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once, - Intrinsic::coro_noop, Intrinsic::coro_async_size_replace, - Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi}); + M, {Intrinsic::coro_alloc, Intrinsic::coro_begin, + Intrinsic::coro_subfn_addr, Intrinsic::coro_free, + Intrinsic::coro_dead, Intrinsic::coro_id, Intrinsic::coro_id_retcon, + Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once, + Intrinsic::coro_noop, Intrinsic::coro_async_size_replace, + Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi}); } PreservedAnalyses CoroCleanupPass::run(Module &M, diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 3e260d48a5d8..e6ea4393252a 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -73,7 +73,8 @@ private: SmallVector CoroBegins; SmallVector CoroAllocs; SmallVector ResumeAddr; - DenseMap> DestroyAddr; + SmallVector DestroyAddr; + DenseMap> BeginDeadMap; }; } // end anonymous namespace @@ -177,23 +178,28 @@ CoroIdElider::CoroIdElider(CoroIdInst *CoroId, FunctionElideInfo &FEI, CoroAllocs.push_back(CA); } - // Collect all coro.subfn.addrs associated with coro.begin. - // Note, we only devirtualize the calls if their coro.subfn.addr refers to - // coro.begin directly. If we run into cases where this check is too - // conservative, we can consider relaxing the check. for (CoroBeginInst *CB : CoroBegins) { - for (User *U : CB->users()) - if (auto *II = dyn_cast(U)) + for (User *U : CB->users()) { + auto &CoroDeads = BeginDeadMap[CB]; + // Collect all coro.subfn.addrs associated with coro.begin. + // Note, we only devirtualize the calls if their coro.subfn.addr refers to + // coro.begin directly. If we run into cases where this check is too + // conservative, we can consider relaxing the check. + if (auto *II = dyn_cast(U)) { switch (II->getIndex()) { case CoroSubFnInst::ResumeIndex: ResumeAddr.push_back(II); break; case CoroSubFnInst::DestroyIndex: - DestroyAddr[CB].push_back(II); + CoroDeads.push_back(II); // coro.destroy implies coro.dead + DestroyAddr.push_back(II); break; default: llvm_unreachable("unexpected coro.subfn.addr constant"); } + } else if (auto *II = dyn_cast(U)) + CoroDeads.push_back(II); + } } } @@ -240,8 +246,8 @@ void CoroIdElider::elideHeapAllocations(uint64_t FrameSize, Align FrameAlign) { bool CoroIdElider::canCoroBeginEscape( const CoroBeginInst *CB, const SmallPtrSetImpl &TIs) const { - const auto &It = DestroyAddr.find(CB); - assert(It != DestroyAddr.end()); + const auto &It = BeginDeadMap.find(CB); + assert(It != BeginDeadMap.end()); // Limit the number of blocks we visit. unsigned Limit = 32 * (1 + It->second.size()); @@ -250,8 +256,8 @@ bool CoroIdElider::canCoroBeginEscape( Worklist.push_back(CB->getParent()); SmallPtrSet Visited; - // Consider basicblock of coro.destroy as visited one, so that we - // skip the path pass through coro.destroy. + // Consider basicblock of coro.dead/destroy as visited one, so that we + // skip the path pass through it. for (auto *DA : It->second) Visited.insert(DA->getParent()); @@ -327,11 +333,11 @@ bool CoroIdElider::lifetimeEligibleForElide() const { if (CoroAllocs.empty()) return false; - // Check that for every coro.begin there is at least one coro.destroy directly - // referencing the SSA value of that coro.begin along each + // Check that for every coro.begin there is at least one coro.dead/destroy + // directly referencing the SSA value of that coro.begin along each // non-exceptional path. // - // If the value escaped, then coro.destroy would have been referencing a + // If the value escaped, then coro.dead/destroy would have been referencing a // memory location storing that value and not the virtual register. SmallPtrSet Terminators; @@ -347,21 +353,16 @@ bool CoroIdElider::lifetimeEligibleForElide() const { Terminators.insert(&B); } - // Filter out the coro.destroy that lie along exceptional paths. + // Filter out the coro.dead/destroy that lie along exceptional paths. for (const auto *CB : CoroBegins) { - auto It = DestroyAddr.find(CB); - - // FIXME: If we have not found any destroys for this coro.begin, we - // disqualify this elide. - if (It == DestroyAddr.end()) + auto It = BeginDeadMap.find(CB); + if (It == BeginDeadMap.end()) return false; - const auto &CorrespondingDestroyAddrs = It->second; - - // If every terminators is dominated by coro.destroy, we could know the + // If every terminators is dominated by coro.dead/destroy, we could know the // corresponding coro.begin wouldn't escape. auto DominatesTerminator = [&](auto *TI) { - return llvm::any_of(CorrespondingDestroyAddrs, [&](auto *Destroy) { + return llvm::any_of(It->second, [&](auto *Destroy) { return DT.dominates(Destroy, TI->getTerminator()); }); }; @@ -371,7 +372,7 @@ bool CoroIdElider::lifetimeEligibleForElide() const { // Otherwise canCoroBeginEscape would decide whether there is any paths from // coro.begin to Terminators which not pass through any of the - // coro.destroys. This is a slower analysis. + // coro.dead/destroy. This is a slower analysis. // // canCoroBeginEscape is relatively slow, so we avoid to run it as much as // possible. @@ -401,8 +402,7 @@ bool CoroIdElider::attemptElide() { EligibleForElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex); - for (auto &It : DestroyAddr) - replaceWithConstant(DestroyAddrConstant, It.second); + replaceWithConstant(DestroyAddrConstant, DestroyAddr); auto FrameSizeAndAlign = getFrameLayout(cast(ResumeAddrConstant)); diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index a68a5bf1623b..a9f96e0309d2 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -118,15 +118,17 @@ bool coro::declaresIntrinsics(const Module &M, ArrayRef List) { return false; } -// Replace all coro.frees associated with the provided frame with 'null' +// Replace all coro.frees associated with the provided frame with 'null' and +// erase all associated coro.deads void coro::elideCoroFree(Value *FramePtr) { SmallVector CoroFrees; - for (User *U : FramePtr->users()) - if (auto CF = dyn_cast(U)) + SmallVector CoroDeads; + for (User *U : FramePtr->users()) { + if (auto *CF = dyn_cast(U)) CoroFrees.push_back(CF); - - if (CoroFrees.empty()) - return; + else if (auto *CD = dyn_cast(U)) + CoroDeads.push_back(CD); + } Value *Replacement = ConstantPointerNull::get(PointerType::get(FramePtr->getContext(), 0)); @@ -134,6 +136,9 @@ void coro::elideCoroFree(Value *FramePtr) { CF->replaceAllUsesWith(Replacement); CF->eraseFromParent(); } + + for (auto *CD : CoroDeads) + CD->eraseFromParent(); } void coro::suppressCoroAllocs(CoroIdInst *CoroId) { diff --git a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll index 633045d4c9b8..2be1fbaa4845 100644 --- a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll @@ -337,6 +337,33 @@ entry: ret void } +; Test that the coroutine is elided if marked by coro.dead +; CHECK-LABEL: @callResume_implicit_destroy( +define void @callResume_implicit_destroy() { +entry: +; CHECK: alloca [4 x i8], align 4 +; CHECK-NOT: coro.begin +; CHECK-NOT: CustomAlloc +; CHECK: call void @may_throw() + %hdl = call ptr @f() + +; Need to remove 'tail' from the first call to @bar +; CHECK-NOT: tail call void @bar( +; CHECK: call void @bar( + tail call void @bar(ptr %hdl) +; CHECK: tail call void @bar( + tail call void @bar(ptr null) + +; CHECK-NEXT: call fastcc void @f.resume(ptr %0) + %0 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0) + call fastcc void %0(ptr %hdl) + + call void @llvm.coro.dead(ptr %hdl) + +; CHECK-NEXT: ret void + ret void +} + declare token @llvm.coro.id(i32, ptr, ptr, ptr) declare i1 @llvm.coro.alloc(token) declare ptr @llvm.coro.free(token, ptr) diff --git a/llvm/test/Transforms/Coroutines/no-suspend.ll b/llvm/test/Transforms/Coroutines/no-suspend.ll index c08423d6053f..b683291f5e2b 100644 --- a/llvm/test/Transforms/Coroutines/no-suspend.ll +++ b/llvm/test/Transforms/Coroutines/no-suspend.ll @@ -27,9 +27,12 @@ body: cleanup: %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) %need.dyn.free = icmp ne ptr %mem, null - br i1 %need.dyn.free, label %dyn.free, label %suspend + br i1 %need.dyn.free, label %dyn.free, label %after.free dyn.free: call void @free(ptr %mem) + br label %after.free +after.free: + call void @llvm.coro.dead(ptr %hdl) br label %suspend suspend: call void @llvm.coro.end(ptr %hdl, i1 false, token none)