[clang][modules] Prevent deadlock in module cache (#182722)

When there's a dependency cycle between modules, the dependency scanner
may encounter a deadlock. This was caused by not respecting the lock
timeout. But even with the timeout implemented, leaving
`unsafeMaybeUnlock()` unimplemented means trying to take a lock after a
timeout would still fail and prevent making progress. This PR implements
this API in a way to avoid UB on `std::mutex` (when it's unlocked by
someone else than the owner). Lastly, this PR makes sure that
`unsafeUnlock()` ends the wait of existing threads, so that they don't
need to hit the full timeout amount.

This PR also implements `-fimplicit-modules-lock-timeout=<seconds>` that
allows tweaking the default 90-second lock timeout, and adds `#pragma
clang __debug sleep` that makes it easier to achieve desired execution
ordering.

rdar://170738600
This commit is contained in:
Jan Svoboda 2026-02-27 09:36:14 -08:00 committed by GitHub
parent 0617623858
commit de4a1a77e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 148 additions and 34 deletions

View File

@ -13,14 +13,18 @@
#include "llvm/ADT/StringMap.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <shared_mutex>
namespace clang {
namespace dependencies {
struct ModuleCacheEntry {
std::shared_mutex CompilationMutex;
std::mutex Mutex;
std::condition_variable CondVar;
bool Locked = false;
unsigned Generation = 0;
std::atomic<std::time_t> Timestamp = 0;
};

View File

@ -495,6 +495,9 @@ public:
/// The list of files to embed into the compiled module file.
std::vector<std::string> ModulesEmbedFiles;
/// The time in seconds to wait on an implicit module lock before timing out.
unsigned ImplicitModulesLockTimeoutSeconds = 90;
/// The list of AST files to merge.
std::vector<std::string> ASTMergeFiles;

View File

@ -3498,6 +3498,12 @@ def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>,
Visibility<[ClangOption, CLOption, CC1Option]>,
HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">;
def fimplicit_modules_lock_timeout_EQ : Joined<["-"], "fimplicit-modules-lock-timeout=">,
Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>,
MetaVarName<"<seconds>">,
HelpText<"Time to wait on an implicit module lock before timing out">,
MarshallingInfoInt<FrontendOpts<"ImplicitModulesLockTimeoutSeconds">, "90">;
defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf",
LangOpts<"SkipODRCheckInGMF">, DefaultFalse,
PosFlag<SetTrue, [], [CC1Option],

View File

@ -12,41 +12,62 @@
#include "llvm/Support/AdvisoryLock.h"
#include "llvm/Support/Chrono.h"
#include <mutex>
using namespace clang;
using namespace dependencies;
namespace {
class ReaderWriterLock : public llvm::AdvisoryLock {
// TODO: Consider using std::atomic::{wait,notify_all} when we move to C++20.
std::unique_lock<std::shared_mutex> OwningLock;
ModuleCacheEntry &Entry;
std::optional<unsigned> OwnedGeneration;
public:
ReaderWriterLock(std::shared_mutex &Mutex)
: OwningLock(Mutex, std::defer_lock) {}
ReaderWriterLock(ModuleCacheEntry &Entry) : Entry(Entry) {}
Expected<bool> tryLock() override { return OwningLock.try_lock(); }
Expected<bool> tryLock() override {
std::lock_guard<std::mutex> Lock(Entry.Mutex);
if (Entry.Locked)
return false;
Entry.Locked = true;
OwnedGeneration = Entry.Generation;
return true;
}
llvm::WaitForUnlockResult
waitForUnlockFor(std::chrono::seconds MaxSeconds) override {
assert(!OwningLock);
// We do not respect the timeout here. It's very generous for implicit
// modules, so we'd typically only reach it if the owner crashed (but so did
// we, since we run in the same process), or encountered deadlock.
(void)MaxSeconds;
std::shared_lock<std::shared_mutex> Lock(*OwningLock.mutex());
return llvm::WaitForUnlockResult::Success;
assert(!OwnedGeneration);
std::unique_lock<std::mutex> Lock(Entry.Mutex);
unsigned CurrentGeneration = Entry.Generation;
bool Success = Entry.CondVar.wait_for(Lock, MaxSeconds, [&] {
// We check not only Locked, but also Generation to break the wait in case
// of unsafeUnlock() and successful tryLock().
return !Entry.Locked || Entry.Generation != CurrentGeneration;
});
return Success ? llvm::WaitForUnlockResult::Success
: llvm::WaitForUnlockResult::Timeout;
}
std::error_code unsafeMaybeUnlock() override {
// Unlocking the mutex here would trigger UB and we don't expect this to be
// actually called when compiling scanning modules due to the no-timeout
// guarantee above.
std::error_code unsafeUnlock() override {
{
std::lock_guard<std::mutex> Lock(Entry.Mutex);
Entry.Generation += 1;
Entry.Locked = false;
}
Entry.CondVar.notify_all();
return {};
}
~ReaderWriterLock() override = default;
~ReaderWriterLock() override {
if (OwnedGeneration) {
{
std::lock_guard<std::mutex> Lock(Entry.Mutex);
// Avoid stomping over the state managed by someone else after
// unsafeUnlock() and successful tryLock().
if (*OwnedGeneration == Entry.Generation)
Entry.Locked = false;
}
Entry.CondVar.notify_all();
}
}
};
class InProcessModuleCache : public ModuleCache {
@ -64,14 +85,14 @@ public:
void prepareForGetLock(StringRef Filename) override {}
std::unique_ptr<llvm::AdvisoryLock> getLock(StringRef Filename) override {
auto &CompilationMutex = [&]() -> std::shared_mutex & {
auto &Entry = [&]() -> ModuleCacheEntry & {
std::lock_guard<std::mutex> Lock(Entries.Mutex);
auto &Entry = Entries.Map[Filename];
if (!Entry)
Entry = std::make_unique<ModuleCacheEntry>();
return Entry->CompilationMutex;
return *Entry;
}();
return std::make_unique<ReaderWriterLock>(CompilationMutex);
return std::make_unique<ReaderWriterLock>(Entry);
}
std::time_t getModuleTimestamp(StringRef Filename) override {

View File

@ -3912,6 +3912,8 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
CmdArgs.push_back(Args.MakeArgString(Path));
}
Args.AddLastArg(CmdArgs, options::OPT_fimplicit_modules_lock_timeout_EQ);
}
if (HaveModules) {

View File

@ -1514,7 +1514,9 @@ static bool compileModuleAndReadASTBehindLock(
// Someone else is responsible for building the module. Wait for them to
// finish.
switch (Lock->waitForUnlockFor(std::chrono::seconds(90))) {
unsigned Timeout =
ImportingInstance.getFrontendOpts().ImplicitModulesLockTimeoutSeconds;
switch (Lock->waitForUnlockFor(std::chrono::seconds(Timeout))) {
case llvm::WaitForUnlockResult::Success:
break; // The interesting case.
case llvm::WaitForUnlockResult::OwnerDied:
@ -1522,11 +1524,11 @@ static bool compileModuleAndReadASTBehindLock(
case llvm::WaitForUnlockResult::Timeout:
// Since the InMemoryModuleCache takes care of correctness, we try waiting
// for someone else to complete the build so that it does not happen
// twice. In case of timeout, build it ourselves.
// twice. In case of timeout, try to build it ourselves again.
Diags.Report(ModuleNameLoc, diag::remark_module_lock_timeout)
<< Module->Name;
// Clear the lock file so that future invocations can make progress.
Lock->unsafeMaybeUnlock();
Lock->unsafeUnlock();
continue;
}

View File

@ -46,6 +46,7 @@
#include <cstdint>
#include <optional>
#include <string>
#include <thread>
#include <utility>
#include <vector>
@ -1079,6 +1080,8 @@ struct PragmaDebugHandler : public PragmaHandler {
Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
PP.EnterToken(Crasher, /*IsReinject*/ false);
}
} else if (II->isStr("sleep")) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
} else if (II->isStr("dump")) {
Token DumpAnnot;
DumpAnnot.startToken();

View File

@ -0,0 +1,37 @@
// This test checks that implicit modules do not encounter a deadlock on a dependency cycle.
// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
// RUN: not clang-scan-deps -mode preprocess -format experimental-full \
// RUN: -compilation-database %t/cdb.json -j 2 -o %t/out 2> %t/err
// RUN: FileCheck %s --input-file %t/err
// CHECK-DAG: fatal error: cyclic dependency in module 'M': M -> N -> M
// CHECK-DAG: fatal error: cyclic dependency in module 'N': N -> M -> N
//--- cdb.json.in
[
{
"file": "DIR/tu1.c",
"directory": "DIR",
"command": "clang -fmodules -fmodules-cache-path=DIR/cache -fimplicit-modules-lock-timeout=3 DIR/tu1.c -o DIR/tu1.o"
},
{
"file": "DIR/tu2.c",
"directory": "DIR",
"command": "clang -fmodules -fmodules-cache-path=DIR/cache -fimplicit-modules-lock-timeout=3 DIR/tu2.c -o DIR/tu2.o"
}
]
//--- tu1.c
#include "m.h"
//--- tu2.c
#include "n.h"
//--- module.modulemap
module M { header "m.h" }
module N { header "n.h" }
//--- m.h
#pragma clang __debug sleep // Give enough time for tu2.c to start building N.
#include "n.h"
//--- n.h
#pragma clang __debug sleep // Give enough time for tu1.c to start building M.
#include "m.h"

View File

@ -0,0 +1,36 @@
// This test checks that implicit modules do not encounter a deadlock on a dependency cycle.
// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: %python %t/run_concurrently.py \
// RUN: "not %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/cache -fimplicit-modules-lock-timeout=3 %t/tu1.c 2> %t/err1" \
// RUN: "not %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/cache -fimplicit-modules-lock-timeout=3 %t/tu2.c 2> %t/err2"
// RUN: FileCheck %s --input-file %t/err1 --check-prefix=CHECK1
// RUN: FileCheck %s --input-file %t/err2 --check-prefix=CHECK2
// CHECK1: fatal error: cyclic dependency in module 'M': M -> N -> M
// CHECK2: fatal error: cyclic dependency in module 'N': N -> M -> N
//--- run_concurrently.py
import subprocess, sys, threading
def run(cmd):
subprocess.run(cmd, shell=True)
threads = [threading.Thread(target=run, args=(cmd,)) for cmd in sys.argv[1:]]
for t in threads: t.start()
for t in threads: t.join()
//--- tu1.c
#include "m.h"
//--- tu2.c
#include "n.h"
//--- module.modulemap
module M { header "m.h" }
module N { header "n.h" }
//--- m.h
#pragma clang __debug sleep // Give enough time for tu2.c to start building N.
#include "n.h"
//--- n.h
#pragma clang __debug sleep // Give enough time for tu1.c to start building M.
#include "m.h"

View File

@ -48,7 +48,7 @@ public:
/// For a lock owned by someone else, unlock it. A permitted side-effect is
/// that another thread/process may acquire ownership of the lock before the
/// existing owner unlocks it. This is an unsafe operation.
virtual std::error_code unsafeMaybeUnlock() = 0;
virtual std::error_code unsafeUnlock() = 0;
/// Unlocks the lock if its ownership was previously acquired by \c tryLock().
virtual ~AdvisoryLock() = default;

View File

@ -61,7 +61,7 @@ public:
/// Remove the lock file. This may delete a different lock file than
/// the one previously read if there is a race.
std::error_code unsafeMaybeUnlock() override;
std::error_code unsafeUnlock() override;
/// Unlocks the lock if previously acquired by \c tryLock().
~LockFileManager() override;

View File

@ -294,7 +294,7 @@ LockFileManager::waitForUnlockFor(std::chrono::seconds MaxSeconds) {
return WaitForUnlockResult::Timeout;
}
std::error_code LockFileManager::unsafeMaybeUnlock() {
std::error_code LockFileManager::unsafeUnlock() {
auto BypassSandbox = sys::sandbox::scopedDisable();
return sys::fs::remove(LockFileName);

View File

@ -496,7 +496,7 @@ Error OnDiskOutputFile::keep() {
if (Error Err = Lock.tryLock().moveInto(Owned)) {
// If we error acquiring a lock, we cannot ensure appends
// to the trace file are atomic - cannot ensure output correctness.
Lock.unsafeMaybeUnlock();
Lock.unsafeUnlock();
return convertToOutputError(
OutputPath, std::make_error_code(std::errc::no_lock_available));
}
@ -508,7 +508,7 @@ Error OnDiskOutputFile::keep() {
return convertToOutputError(OutputPath, EC);
Out << (*Content)->getBuffer();
Out.close();
Lock.unsafeMaybeUnlock();
Lock.unsafeUnlock();
if (Out.has_error())
return convertToOutputError(OutputPath, Out.error());
// Remove temp file and done.
@ -528,7 +528,7 @@ Error OnDiskOutputFile::keep() {
// the lock, causing other waiting processes to time-out. Let's clear
// the lock and try again right away. If we do start seeing compiler
// hangs in this location, we will need to re-consider.
Lock.unsafeMaybeUnlock();
Lock.unsafeUnlock();
continue;
}
}

View File

@ -1575,7 +1575,7 @@ PreservedAnalyses AMDGPUSplitModulePass::run(Module &M,
dbgs()
<< "[amdgpu-split-module] unable to acquire lockfile, debug "
"output may be mangled by other processes\n");
Lock.unsafeMaybeUnlock();
Lock.unsafeUnlock();
break; // give up
}
}