[orc-rt] Fix some Session::shutdown bugs. (#177528)

All calls to Session::shutdown were enquing their on-shutdown-complete
callbacks in Session's ShutdownInfo struct, but this queue is only
drained once by the thread that initiates shutdown. After the queue is
drained, subsequent calls to Session::shutdown were enquing their
callbacks in a queue that would never be drained.

This patch updates Session::shutdown to check whether shutdown has
completed already and, if so, run the on-shutdown-complete immediately.

This patch also fixes a concurrency bug: Session::shutdownComplete was
accessing SI->OnCompletes outside the session mutex, but this could lead
to corruption of SI->OnCompletes if a concurrent call to
Session::shutdown tried to enqueue a new callback to SI->OnCompletes
concurrently. This has been fixed by moving the SI->OnCompletes queue to
a new variable under the Session mutex, then draining the new queue
outside the mutex. (No testcase yet: this was discovered by observation,
and replicating the bug would depend on timing).
This commit is contained in:
Lang Hames 2026-01-23 17:19:16 +11:00 committed by GitHub
parent dcd6468893
commit b043479e0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 8 deletions

View File

@ -19,21 +19,40 @@ Session::ControllerAccess::~ControllerAccess() = default;
Session::~Session() { waitForShutdown(); }
void Session::shutdown(OnShutdownCompleteFn OnShutdownComplete) {
assert(OnShutdownComplete && "OnShutdownComplete must be set");
// Safe to call concurrently / redundantly.
detachFromController();
{
std::scoped_lock<std::mutex> Lock(M);
if (SI) {
// SI exists: someone called shutdown already. If the shutdown is not yet
// complete then just add OnShutdownComplete to the list of pending
// callbacks for the in-progress shutdown, then return.
// (If the shutdown is already complete then we'll run the handler
// directly below).
if (!SI->Complete)
return SI->OnCompletes.push_back(std::move(OnShutdownComplete));
} else {
// SI does not exist: We're the first to call shutdown. Create a
// ShutdownInfo struct and add OnShutdownComplete to the list of pending
// callbacks, then call shutdownNext below (outside the lock).
SI = std::make_unique<ShutdownInfo>();
SI->OnCompletes.push_back(std::move(OnShutdownComplete));
return;
std::swap(SI->ResourceMgrs, ResourceMgrs);
}
SI = std::make_unique<ShutdownInfo>();
SI->OnCompletes.push_back(std::move(OnShutdownComplete));
std::swap(SI->ResourceMgrs, ResourceMgrs);
}
// OnShutdownComplete is set (i.e. not moved into the list of pending
// callbacks). This can only happen if shutdown is already complete. Call
// OnComplete directly and return.
if (OnShutdownComplete)
return OnShutdownComplete();
// OnShutdownComplete is _not_ set (i.e. was moved into the list of pending
// handlers), and we didn't return under the lock above, so we must be
// responsible for the shutdown. Call shutdownNext.
shutdownNext(Error::success());
}
@ -87,14 +106,16 @@ void Session::shutdownComplete() {
TmpDispatcher->shutdown();
for (auto &OnShutdownComplete : SI->OnCompletes)
OnShutdownComplete();
std::vector<OnShutdownCompleteFn> OnCompletes;
{
std::lock_guard<std::mutex> Lock(M);
SI->Complete = true;
OnCompletes = std::move(SI->OnCompletes);
}
for (auto &OnShutdownComplete : OnCompletes)
OnShutdownComplete();
SI->CompleteCV.notify_all();
}

View File

@ -464,3 +464,14 @@ TEST(ControllerAccessTest, CallFromController) {
S.waitForShutdown();
}
TEST(ControllerAccessTest, RedundantAsyncShutdown) {
// Check that redundant calls to shutdown have their callbacks run.
std::deque<std::unique_ptr<Task>> Tasks;
Session S(std::make_unique<EnqueueingDispatcher>(Tasks), noErrors);
S.waitForShutdown();
bool RedundantCallbackRan = false;
S.shutdown([&]() { RedundantCallbackRan = true; });
EXPECT_TRUE(RedundantCallbackRan);
}