[CoroElide][IR] Add llvm.coro.dead intrinsic (#188899)

Part 3/4: Implement HALO for coroutines that flow off final suspend.
Parent PR approved in https://github.com/llvm/llvm-project/pull/185336.
This version also elides `coro.dead` for coroutines that never suspend,
with additional tests.

This patch add an optimization hint `llvm.coro.dead` for cases where
`llvm.coro.destroy` is not available so that HALO works.
This commit is contained in:
Weibo He 2026-03-27 16:19:13 +08:00 committed by GitHub
parent 8430e9e8f0
commit a3d9708a41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 137 additions and 41 deletions

View File

@ -873,6 +873,8 @@ the coroutine destroy function. Otherwise it is replaced with an indirect call
based on the function pointer for the destroy function stored in the coroutine
frame. Destroying a coroutine that is not suspended leads to undefined behavior.
This intrinsic implies `coro.dead`.
.. _coro.resume:
'llvm.coro.resume' Intrinsic
@ -1169,6 +1171,48 @@ Example (standard deallocation functions):
call void @free(ptr %mem)
ret void
.. _coro.dead:
'llvm.coro.dead' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
declare void @llvm.coro.dead(ptr <frame>)
Overview:
"""""""""
The 'llvm.coro.dead' intrinsic is an optimization hint to help Heap Allocation eLision Optimization (HALO)
mark the end of lifetime of the coroutine frame.
Arguments:
""""""""""
The argument is a pointer to the coroutine frame. This should be the same
pointer that was returned by prior `coro.begin` call.
Semantics:
""""""""""
A frontend can delegate this intrinsic to indicate that the coroutine frame is dead, allowing
coroutines that are not explicitly destroyed via `coro.destroy` to be elided.
Example:
"""""""""""""""""""""""""""""""""""""""
.. code-block:: llvm
cleanup:
%mem = call ptr @llvm.coro.free(token %id, ptr %frame)
%mem_not_null = icmp ne ptr %mem, null
br i1 %mem_not_null, label %if.then, label %if.end
if.then:
call void @CustomFree(ptr %mem)
br label %if.end
if.end:
call void @llvm.coro.dead(ptr %frame)
ret void
.. _coro.alloc:
'llvm.coro.alloc' Intrinsic

View File

@ -1849,6 +1849,7 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly,
ReadOnly<ArgIndex<1>>,
NoCapture<ArgIndex<1>>]>;
def int_coro_dead : Intrinsic<[], [llvm_ptr_ty], [IntrNoMem]>;
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty, llvm_token_ty], []>;
def int_coro_end_results : Intrinsic<[llvm_token_ty], [llvm_vararg_ty]>;
def int_coro_end_async

View File

@ -456,6 +456,20 @@ public:
}
};
/// This represents the llvm.coro.dead instruction.
class CoroDeadInst : public IntrinsicInst {
public:
Value *getFrame() const { return getArgOperand(0); }
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_dead;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This class represents the llvm.coro.begin or llvm.coro.begin.custom.abi
/// instructions.
class CoroBeginInst : public IntrinsicInst {

View File

@ -108,6 +108,8 @@ bool Lowerer::lower(Function &F) {
case Intrinsic::coro_free:
II->replaceAllUsesWith(II->getArgOperand(1));
break;
case Intrinsic::coro_dead:
break;
case Intrinsic::coro_alloc:
II->replaceAllUsesWith(ConstantInt::getTrue(Context));
break;
@ -258,12 +260,12 @@ void NoopCoroElider::eraseFromWorklist(Instruction *I) {
static bool declaresCoroCleanupIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M,
{Intrinsic::coro_alloc, Intrinsic::coro_begin, Intrinsic::coro_subfn_addr,
Intrinsic::coro_free, Intrinsic::coro_id, Intrinsic::coro_id_retcon,
Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once,
Intrinsic::coro_noop, Intrinsic::coro_async_size_replace,
Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi});
M, {Intrinsic::coro_alloc, Intrinsic::coro_begin,
Intrinsic::coro_subfn_addr, Intrinsic::coro_free,
Intrinsic::coro_dead, Intrinsic::coro_id, Intrinsic::coro_id_retcon,
Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once,
Intrinsic::coro_noop, Intrinsic::coro_async_size_replace,
Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi});
}
PreservedAnalyses CoroCleanupPass::run(Module &M,

View File

@ -73,7 +73,8 @@ private:
SmallVector<CoroBeginInst *, 1> CoroBegins;
SmallVector<CoroAllocInst *, 1> CoroAllocs;
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
SmallVector<CoroSubFnInst *, 4> DestroyAddr;
DenseMap<CoroBeginInst *, SmallVector<IntrinsicInst *, 4>> BeginDeadMap;
};
} // end anonymous namespace
@ -177,23 +178,28 @@ CoroIdElider::CoroIdElider(CoroIdInst *CoroId, FunctionElideInfo &FEI,
CoroAllocs.push_back(CA);
}
// Collect all coro.subfn.addrs associated with coro.begin.
// Note, we only devirtualize the calls if their coro.subfn.addr refers to
// coro.begin directly. If we run into cases where this check is too
// conservative, we can consider relaxing the check.
for (CoroBeginInst *CB : CoroBegins) {
for (User *U : CB->users())
if (auto *II = dyn_cast<CoroSubFnInst>(U))
for (User *U : CB->users()) {
auto &CoroDeads = BeginDeadMap[CB];
// Collect all coro.subfn.addrs associated with coro.begin.
// Note, we only devirtualize the calls if their coro.subfn.addr refers to
// coro.begin directly. If we run into cases where this check is too
// conservative, we can consider relaxing the check.
if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
switch (II->getIndex()) {
case CoroSubFnInst::ResumeIndex:
ResumeAddr.push_back(II);
break;
case CoroSubFnInst::DestroyIndex:
DestroyAddr[CB].push_back(II);
CoroDeads.push_back(II); // coro.destroy implies coro.dead
DestroyAddr.push_back(II);
break;
default:
llvm_unreachable("unexpected coro.subfn.addr constant");
}
} else if (auto *II = dyn_cast<CoroDeadInst>(U))
CoroDeads.push_back(II);
}
}
}
@ -240,8 +246,8 @@ void CoroIdElider::elideHeapAllocations(uint64_t FrameSize, Align FrameAlign) {
bool CoroIdElider::canCoroBeginEscape(
const CoroBeginInst *CB, const SmallPtrSetImpl<BasicBlock *> &TIs) const {
const auto &It = DestroyAddr.find(CB);
assert(It != DestroyAddr.end());
const auto &It = BeginDeadMap.find(CB);
assert(It != BeginDeadMap.end());
// Limit the number of blocks we visit.
unsigned Limit = 32 * (1 + It->second.size());
@ -250,8 +256,8 @@ bool CoroIdElider::canCoroBeginEscape(
Worklist.push_back(CB->getParent());
SmallPtrSet<const BasicBlock *, 32> Visited;
// Consider basicblock of coro.destroy as visited one, so that we
// skip the path pass through coro.destroy.
// Consider basicblock of coro.dead/destroy as visited one, so that we
// skip the path pass through it.
for (auto *DA : It->second)
Visited.insert(DA->getParent());
@ -327,11 +333,11 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
if (CoroAllocs.empty())
return false;
// Check that for every coro.begin there is at least one coro.destroy directly
// referencing the SSA value of that coro.begin along each
// Check that for every coro.begin there is at least one coro.dead/destroy
// directly referencing the SSA value of that coro.begin along each
// non-exceptional path.
//
// If the value escaped, then coro.destroy would have been referencing a
// If the value escaped, then coro.dead/destroy would have been referencing a
// memory location storing that value and not the virtual register.
SmallPtrSet<BasicBlock *, 8> Terminators;
@ -347,21 +353,16 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
Terminators.insert(&B);
}
// Filter out the coro.destroy that lie along exceptional paths.
// Filter out the coro.dead/destroy that lie along exceptional paths.
for (const auto *CB : CoroBegins) {
auto It = DestroyAddr.find(CB);
// FIXME: If we have not found any destroys for this coro.begin, we
// disqualify this elide.
if (It == DestroyAddr.end())
auto It = BeginDeadMap.find(CB);
if (It == BeginDeadMap.end())
return false;
const auto &CorrespondingDestroyAddrs = It->second;
// If every terminators is dominated by coro.destroy, we could know the
// If every terminators is dominated by coro.dead/destroy, we could know the
// corresponding coro.begin wouldn't escape.
auto DominatesTerminator = [&](auto *TI) {
return llvm::any_of(CorrespondingDestroyAddrs, [&](auto *Destroy) {
return llvm::any_of(It->second, [&](auto *Destroy) {
return DT.dominates(Destroy, TI->getTerminator());
});
};
@ -371,7 +372,7 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
// Otherwise canCoroBeginEscape would decide whether there is any paths from
// coro.begin to Terminators which not pass through any of the
// coro.destroys. This is a slower analysis.
// coro.dead/destroy. This is a slower analysis.
//
// canCoroBeginEscape is relatively slow, so we avoid to run it as much as
// possible.
@ -401,8 +402,7 @@ bool CoroIdElider::attemptElide() {
EligibleForElide ? CoroSubFnInst::CleanupIndex
: CoroSubFnInst::DestroyIndex);
for (auto &It : DestroyAddr)
replaceWithConstant(DestroyAddrConstant, It.second);
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant));

View File

@ -118,15 +118,17 @@ bool coro::declaresIntrinsics(const Module &M, ArrayRef<Intrinsic::ID> List) {
return false;
}
// Replace all coro.frees associated with the provided frame with 'null'
// Replace all coro.frees associated with the provided frame with 'null' and
// erase all associated coro.deads
void coro::elideCoroFree(Value *FramePtr) {
SmallVector<CoroFreeInst *, 4> CoroFrees;
for (User *U : FramePtr->users())
if (auto CF = dyn_cast<CoroFreeInst>(U))
SmallVector<CoroDeadInst *, 4> CoroDeads;
for (User *U : FramePtr->users()) {
if (auto *CF = dyn_cast<CoroFreeInst>(U))
CoroFrees.push_back(CF);
if (CoroFrees.empty())
return;
else if (auto *CD = dyn_cast<CoroDeadInst>(U))
CoroDeads.push_back(CD);
}
Value *Replacement =
ConstantPointerNull::get(PointerType::get(FramePtr->getContext(), 0));
@ -134,6 +136,9 @@ void coro::elideCoroFree(Value *FramePtr) {
CF->replaceAllUsesWith(Replacement);
CF->eraseFromParent();
}
for (auto *CD : CoroDeads)
CD->eraseFromParent();
}
void coro::suppressCoroAllocs(CoroIdInst *CoroId) {

View File

@ -337,6 +337,33 @@ entry:
ret void
}
; Test that the coroutine is elided if marked by coro.dead
; CHECK-LABEL: @callResume_implicit_destroy(
define void @callResume_implicit_destroy() {
entry:
; CHECK: alloca [4 x i8], align 4
; CHECK-NOT: coro.begin
; CHECK-NOT: CustomAlloc
; CHECK: call void @may_throw()
%hdl = call ptr @f()
; Need to remove 'tail' from the first call to @bar
; CHECK-NOT: tail call void @bar(
; CHECK: call void @bar(
tail call void @bar(ptr %hdl)
; CHECK: tail call void @bar(
tail call void @bar(ptr null)
; CHECK-NEXT: call fastcc void @f.resume(ptr %0)
%0 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
call fastcc void %0(ptr %hdl)
call void @llvm.coro.dead(ptr %hdl)
; CHECK-NEXT: ret void
ret void
}
declare token @llvm.coro.id(i32, ptr, ptr, ptr)
declare i1 @llvm.coro.alloc(token)
declare ptr @llvm.coro.free(token, ptr)

View File

@ -27,9 +27,12 @@ body:
cleanup:
%mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
%need.dyn.free = icmp ne ptr %mem, null
br i1 %need.dyn.free, label %dyn.free, label %suspend
br i1 %need.dyn.free, label %dyn.free, label %after.free
dyn.free:
call void @free(ptr %mem)
br label %after.free
after.free:
call void @llvm.coro.dead(ptr %hdl)
br label %suspend
suspend:
call void @llvm.coro.end(ptr %hdl, i1 false, token none)