[CoroElide][IR] Add llvm.coro.dead intrinsic (#188899)
Part 3/4: Implement HALO for coroutines that flow off final suspend. Parent PR approved in https://github.com/llvm/llvm-project/pull/185336. This version also elides `coro.dead` for coroutines that never suspend, with additional tests. This patch add an optimization hint `llvm.coro.dead` for cases where `llvm.coro.destroy` is not available so that HALO works.
This commit is contained in:
parent
8430e9e8f0
commit
a3d9708a41
@ -873,6 +873,8 @@ the coroutine destroy function. Otherwise it is replaced with an indirect call
|
||||
based on the function pointer for the destroy function stored in the coroutine
|
||||
frame. Destroying a coroutine that is not suspended leads to undefined behavior.
|
||||
|
||||
This intrinsic implies `coro.dead`.
|
||||
|
||||
.. _coro.resume:
|
||||
|
||||
'llvm.coro.resume' Intrinsic
|
||||
@ -1169,6 +1171,48 @@ Example (standard deallocation functions):
|
||||
call void @free(ptr %mem)
|
||||
ret void
|
||||
|
||||
.. _coro.dead:
|
||||
|
||||
'llvm.coro.dead' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
declare void @llvm.coro.dead(ptr <frame>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The 'llvm.coro.dead' intrinsic is an optimization hint to help Heap Allocation eLision Optimization (HALO)
|
||||
mark the end of lifetime of the coroutine frame.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The argument is a pointer to the coroutine frame. This should be the same
|
||||
pointer that was returned by prior `coro.begin` call.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
A frontend can delegate this intrinsic to indicate that the coroutine frame is dead, allowing
|
||||
coroutines that are not explicitly destroyed via `coro.destroy` to be elided.
|
||||
|
||||
Example:
|
||||
"""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
cleanup:
|
||||
%mem = call ptr @llvm.coro.free(token %id, ptr %frame)
|
||||
%mem_not_null = icmp ne ptr %mem, null
|
||||
br i1 %mem_not_null, label %if.then, label %if.end
|
||||
if.then:
|
||||
call void @CustomFree(ptr %mem)
|
||||
br label %if.end
|
||||
if.end:
|
||||
call void @llvm.coro.dead(ptr %frame)
|
||||
ret void
|
||||
|
||||
.. _coro.alloc:
|
||||
|
||||
'llvm.coro.alloc' Intrinsic
|
||||
|
||||
@ -1849,6 +1849,7 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
|
||||
[IntrReadMem, IntrArgMemOnly,
|
||||
ReadOnly<ArgIndex<1>>,
|
||||
NoCapture<ArgIndex<1>>]>;
|
||||
def int_coro_dead : Intrinsic<[], [llvm_ptr_ty], [IntrNoMem]>;
|
||||
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty, llvm_token_ty], []>;
|
||||
def int_coro_end_results : Intrinsic<[llvm_token_ty], [llvm_vararg_ty]>;
|
||||
def int_coro_end_async
|
||||
|
||||
@ -456,6 +456,20 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// This represents the llvm.coro.dead instruction.
|
||||
class CoroDeadInst : public IntrinsicInst {
|
||||
public:
|
||||
Value *getFrame() const { return getArgOperand(0); }
|
||||
|
||||
// Methods to support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::coro_dead;
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This class represents the llvm.coro.begin or llvm.coro.begin.custom.abi
|
||||
/// instructions.
|
||||
class CoroBeginInst : public IntrinsicInst {
|
||||
|
||||
@ -108,6 +108,8 @@ bool Lowerer::lower(Function &F) {
|
||||
case Intrinsic::coro_free:
|
||||
II->replaceAllUsesWith(II->getArgOperand(1));
|
||||
break;
|
||||
case Intrinsic::coro_dead:
|
||||
break;
|
||||
case Intrinsic::coro_alloc:
|
||||
II->replaceAllUsesWith(ConstantInt::getTrue(Context));
|
||||
break;
|
||||
@ -258,12 +260,12 @@ void NoopCoroElider::eraseFromWorklist(Instruction *I) {
|
||||
|
||||
static bool declaresCoroCleanupIntrinsics(const Module &M) {
|
||||
return coro::declaresIntrinsics(
|
||||
M,
|
||||
{Intrinsic::coro_alloc, Intrinsic::coro_begin, Intrinsic::coro_subfn_addr,
|
||||
Intrinsic::coro_free, Intrinsic::coro_id, Intrinsic::coro_id_retcon,
|
||||
Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once,
|
||||
Intrinsic::coro_noop, Intrinsic::coro_async_size_replace,
|
||||
Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi});
|
||||
M, {Intrinsic::coro_alloc, Intrinsic::coro_begin,
|
||||
Intrinsic::coro_subfn_addr, Intrinsic::coro_free,
|
||||
Intrinsic::coro_dead, Intrinsic::coro_id, Intrinsic::coro_id_retcon,
|
||||
Intrinsic::coro_id_async, Intrinsic::coro_id_retcon_once,
|
||||
Intrinsic::coro_noop, Intrinsic::coro_async_size_replace,
|
||||
Intrinsic::coro_async_resume, Intrinsic::coro_begin_custom_abi});
|
||||
}
|
||||
|
||||
PreservedAnalyses CoroCleanupPass::run(Module &M,
|
||||
|
||||
@ -73,7 +73,8 @@ private:
|
||||
SmallVector<CoroBeginInst *, 1> CoroBegins;
|
||||
SmallVector<CoroAllocInst *, 1> CoroAllocs;
|
||||
SmallVector<CoroSubFnInst *, 4> ResumeAddr;
|
||||
DenseMap<CoroBeginInst *, SmallVector<CoroSubFnInst *, 4>> DestroyAddr;
|
||||
SmallVector<CoroSubFnInst *, 4> DestroyAddr;
|
||||
DenseMap<CoroBeginInst *, SmallVector<IntrinsicInst *, 4>> BeginDeadMap;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -177,23 +178,28 @@ CoroIdElider::CoroIdElider(CoroIdInst *CoroId, FunctionElideInfo &FEI,
|
||||
CoroAllocs.push_back(CA);
|
||||
}
|
||||
|
||||
// Collect all coro.subfn.addrs associated with coro.begin.
|
||||
// Note, we only devirtualize the calls if their coro.subfn.addr refers to
|
||||
// coro.begin directly. If we run into cases where this check is too
|
||||
// conservative, we can consider relaxing the check.
|
||||
for (CoroBeginInst *CB : CoroBegins) {
|
||||
for (User *U : CB->users())
|
||||
if (auto *II = dyn_cast<CoroSubFnInst>(U))
|
||||
for (User *U : CB->users()) {
|
||||
auto &CoroDeads = BeginDeadMap[CB];
|
||||
// Collect all coro.subfn.addrs associated with coro.begin.
|
||||
// Note, we only devirtualize the calls if their coro.subfn.addr refers to
|
||||
// coro.begin directly. If we run into cases where this check is too
|
||||
// conservative, we can consider relaxing the check.
|
||||
if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
|
||||
switch (II->getIndex()) {
|
||||
case CoroSubFnInst::ResumeIndex:
|
||||
ResumeAddr.push_back(II);
|
||||
break;
|
||||
case CoroSubFnInst::DestroyIndex:
|
||||
DestroyAddr[CB].push_back(II);
|
||||
CoroDeads.push_back(II); // coro.destroy implies coro.dead
|
||||
DestroyAddr.push_back(II);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unexpected coro.subfn.addr constant");
|
||||
}
|
||||
} else if (auto *II = dyn_cast<CoroDeadInst>(U))
|
||||
CoroDeads.push_back(II);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -240,8 +246,8 @@ void CoroIdElider::elideHeapAllocations(uint64_t FrameSize, Align FrameAlign) {
|
||||
|
||||
bool CoroIdElider::canCoroBeginEscape(
|
||||
const CoroBeginInst *CB, const SmallPtrSetImpl<BasicBlock *> &TIs) const {
|
||||
const auto &It = DestroyAddr.find(CB);
|
||||
assert(It != DestroyAddr.end());
|
||||
const auto &It = BeginDeadMap.find(CB);
|
||||
assert(It != BeginDeadMap.end());
|
||||
|
||||
// Limit the number of blocks we visit.
|
||||
unsigned Limit = 32 * (1 + It->second.size());
|
||||
@ -250,8 +256,8 @@ bool CoroIdElider::canCoroBeginEscape(
|
||||
Worklist.push_back(CB->getParent());
|
||||
|
||||
SmallPtrSet<const BasicBlock *, 32> Visited;
|
||||
// Consider basicblock of coro.destroy as visited one, so that we
|
||||
// skip the path pass through coro.destroy.
|
||||
// Consider basicblock of coro.dead/destroy as visited one, so that we
|
||||
// skip the path pass through it.
|
||||
for (auto *DA : It->second)
|
||||
Visited.insert(DA->getParent());
|
||||
|
||||
@ -327,11 +333,11 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
|
||||
if (CoroAllocs.empty())
|
||||
return false;
|
||||
|
||||
// Check that for every coro.begin there is at least one coro.destroy directly
|
||||
// referencing the SSA value of that coro.begin along each
|
||||
// Check that for every coro.begin there is at least one coro.dead/destroy
|
||||
// directly referencing the SSA value of that coro.begin along each
|
||||
// non-exceptional path.
|
||||
//
|
||||
// If the value escaped, then coro.destroy would have been referencing a
|
||||
// If the value escaped, then coro.dead/destroy would have been referencing a
|
||||
// memory location storing that value and not the virtual register.
|
||||
|
||||
SmallPtrSet<BasicBlock *, 8> Terminators;
|
||||
@ -347,21 +353,16 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
|
||||
Terminators.insert(&B);
|
||||
}
|
||||
|
||||
// Filter out the coro.destroy that lie along exceptional paths.
|
||||
// Filter out the coro.dead/destroy that lie along exceptional paths.
|
||||
for (const auto *CB : CoroBegins) {
|
||||
auto It = DestroyAddr.find(CB);
|
||||
|
||||
// FIXME: If we have not found any destroys for this coro.begin, we
|
||||
// disqualify this elide.
|
||||
if (It == DestroyAddr.end())
|
||||
auto It = BeginDeadMap.find(CB);
|
||||
if (It == BeginDeadMap.end())
|
||||
return false;
|
||||
|
||||
const auto &CorrespondingDestroyAddrs = It->second;
|
||||
|
||||
// If every terminators is dominated by coro.destroy, we could know the
|
||||
// If every terminators is dominated by coro.dead/destroy, we could know the
|
||||
// corresponding coro.begin wouldn't escape.
|
||||
auto DominatesTerminator = [&](auto *TI) {
|
||||
return llvm::any_of(CorrespondingDestroyAddrs, [&](auto *Destroy) {
|
||||
return llvm::any_of(It->second, [&](auto *Destroy) {
|
||||
return DT.dominates(Destroy, TI->getTerminator());
|
||||
});
|
||||
};
|
||||
@ -371,7 +372,7 @@ bool CoroIdElider::lifetimeEligibleForElide() const {
|
||||
|
||||
// Otherwise canCoroBeginEscape would decide whether there is any paths from
|
||||
// coro.begin to Terminators which not pass through any of the
|
||||
// coro.destroys. This is a slower analysis.
|
||||
// coro.dead/destroy. This is a slower analysis.
|
||||
//
|
||||
// canCoroBeginEscape is relatively slow, so we avoid to run it as much as
|
||||
// possible.
|
||||
@ -401,8 +402,7 @@ bool CoroIdElider::attemptElide() {
|
||||
EligibleForElide ? CoroSubFnInst::CleanupIndex
|
||||
: CoroSubFnInst::DestroyIndex);
|
||||
|
||||
for (auto &It : DestroyAddr)
|
||||
replaceWithConstant(DestroyAddrConstant, It.second);
|
||||
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
|
||||
|
||||
auto FrameSizeAndAlign = getFrameLayout(cast<Function>(ResumeAddrConstant));
|
||||
|
||||
|
||||
@ -118,15 +118,17 @@ bool coro::declaresIntrinsics(const Module &M, ArrayRef<Intrinsic::ID> List) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Replace all coro.frees associated with the provided frame with 'null'
|
||||
// Replace all coro.frees associated with the provided frame with 'null' and
|
||||
// erase all associated coro.deads
|
||||
void coro::elideCoroFree(Value *FramePtr) {
|
||||
SmallVector<CoroFreeInst *, 4> CoroFrees;
|
||||
for (User *U : FramePtr->users())
|
||||
if (auto CF = dyn_cast<CoroFreeInst>(U))
|
||||
SmallVector<CoroDeadInst *, 4> CoroDeads;
|
||||
for (User *U : FramePtr->users()) {
|
||||
if (auto *CF = dyn_cast<CoroFreeInst>(U))
|
||||
CoroFrees.push_back(CF);
|
||||
|
||||
if (CoroFrees.empty())
|
||||
return;
|
||||
else if (auto *CD = dyn_cast<CoroDeadInst>(U))
|
||||
CoroDeads.push_back(CD);
|
||||
}
|
||||
|
||||
Value *Replacement =
|
||||
ConstantPointerNull::get(PointerType::get(FramePtr->getContext(), 0));
|
||||
@ -134,6 +136,9 @@ void coro::elideCoroFree(Value *FramePtr) {
|
||||
CF->replaceAllUsesWith(Replacement);
|
||||
CF->eraseFromParent();
|
||||
}
|
||||
|
||||
for (auto *CD : CoroDeads)
|
||||
CD->eraseFromParent();
|
||||
}
|
||||
|
||||
void coro::suppressCoroAllocs(CoroIdInst *CoroId) {
|
||||
|
||||
@ -337,6 +337,33 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that the coroutine is elided if marked by coro.dead
|
||||
; CHECK-LABEL: @callResume_implicit_destroy(
|
||||
define void @callResume_implicit_destroy() {
|
||||
entry:
|
||||
; CHECK: alloca [4 x i8], align 4
|
||||
; CHECK-NOT: coro.begin
|
||||
; CHECK-NOT: CustomAlloc
|
||||
; CHECK: call void @may_throw()
|
||||
%hdl = call ptr @f()
|
||||
|
||||
; Need to remove 'tail' from the first call to @bar
|
||||
; CHECK-NOT: tail call void @bar(
|
||||
; CHECK: call void @bar(
|
||||
tail call void @bar(ptr %hdl)
|
||||
; CHECK: tail call void @bar(
|
||||
tail call void @bar(ptr null)
|
||||
|
||||
; CHECK-NEXT: call fastcc void @f.resume(ptr %0)
|
||||
%0 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
|
||||
call fastcc void %0(ptr %hdl)
|
||||
|
||||
call void @llvm.coro.dead(ptr %hdl)
|
||||
|
||||
; CHECK-NEXT: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
declare token @llvm.coro.id(i32, ptr, ptr, ptr)
|
||||
declare i1 @llvm.coro.alloc(token)
|
||||
declare ptr @llvm.coro.free(token, ptr)
|
||||
|
||||
@ -27,9 +27,12 @@ body:
|
||||
cleanup:
|
||||
%mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
|
||||
%need.dyn.free = icmp ne ptr %mem, null
|
||||
br i1 %need.dyn.free, label %dyn.free, label %suspend
|
||||
br i1 %need.dyn.free, label %dyn.free, label %after.free
|
||||
dyn.free:
|
||||
call void @free(ptr %mem)
|
||||
br label %after.free
|
||||
after.free:
|
||||
call void @llvm.coro.dead(ptr %hdl)
|
||||
br label %suspend
|
||||
suspend:
|
||||
call void @llvm.coro.end(ptr %hdl, i1 false, token none)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user