[clang][modules] Prevent deadlock in module cache (#182722)
When there's a dependency cycle between modules, the dependency scanner may encounter a deadlock. This was caused by not respecting the lock timeout. But even with the timeout implemented, leaving `unsafeMaybeUnlock()` unimplemented means trying to take a lock after a timeout would still fail and prevent making progress. This PR implements this API in a way to avoid UB on `std::mutex` (when it's unlocked by someone else than the owner). Lastly, this PR makes sure that `unsafeUnlock()` ends the wait of existing threads, so that they don't need to hit the full timeout amount. This PR also implements `-fimplicit-modules-lock-timeout=<seconds>` that allows tweaking the default 90-second lock timeout, and adds `#pragma clang __debug sleep` that makes it easier to achieve desired execution ordering. rdar://170738600
This commit is contained in:
parent
0617623858
commit
de4a1a77e1
@ -13,14 +13,18 @@
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace clang {
|
||||
namespace dependencies {
|
||||
|
||||
struct ModuleCacheEntry {
|
||||
std::shared_mutex CompilationMutex;
|
||||
std::mutex Mutex;
|
||||
std::condition_variable CondVar;
|
||||
bool Locked = false;
|
||||
unsigned Generation = 0;
|
||||
|
||||
std::atomic<std::time_t> Timestamp = 0;
|
||||
};
|
||||
|
||||
|
||||
@ -495,6 +495,9 @@ public:
|
||||
/// The list of files to embed into the compiled module file.
|
||||
std::vector<std::string> ModulesEmbedFiles;
|
||||
|
||||
/// The time in seconds to wait on an implicit module lock before timing out.
|
||||
unsigned ImplicitModulesLockTimeoutSeconds = 90;
|
||||
|
||||
/// The list of AST files to merge.
|
||||
std::vector<std::string> ASTMergeFiles;
|
||||
|
||||
|
||||
@ -3498,6 +3498,12 @@ def fmodule_output : Flag<["-"], "fmodule-output">, Flags<[NoXarchOption]>,
|
||||
Visibility<[ClangOption, CLOption, CC1Option]>,
|
||||
HelpText<"Save intermediate module file results when compiling a standard C++ module unit.">;
|
||||
|
||||
def fimplicit_modules_lock_timeout_EQ : Joined<["-"], "fimplicit-modules-lock-timeout=">,
|
||||
Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, CC1Option]>,
|
||||
MetaVarName<"<seconds>">,
|
||||
HelpText<"Time to wait on an implicit module lock before timing out">,
|
||||
MarshallingInfoInt<FrontendOpts<"ImplicitModulesLockTimeoutSeconds">, "90">;
|
||||
|
||||
defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf",
|
||||
LangOpts<"SkipODRCheckInGMF">, DefaultFalse,
|
||||
PosFlag<SetTrue, [], [CC1Option],
|
||||
|
||||
@ -12,41 +12,62 @@
|
||||
#include "llvm/Support/AdvisoryLock.h"
|
||||
#include "llvm/Support/Chrono.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
using namespace clang;
|
||||
using namespace dependencies;
|
||||
|
||||
namespace {
|
||||
class ReaderWriterLock : public llvm::AdvisoryLock {
|
||||
// TODO: Consider using std::atomic::{wait,notify_all} when we move to C++20.
|
||||
std::unique_lock<std::shared_mutex> OwningLock;
|
||||
ModuleCacheEntry &Entry;
|
||||
std::optional<unsigned> OwnedGeneration;
|
||||
|
||||
public:
|
||||
ReaderWriterLock(std::shared_mutex &Mutex)
|
||||
: OwningLock(Mutex, std::defer_lock) {}
|
||||
ReaderWriterLock(ModuleCacheEntry &Entry) : Entry(Entry) {}
|
||||
|
||||
Expected<bool> tryLock() override { return OwningLock.try_lock(); }
|
||||
Expected<bool> tryLock() override {
|
||||
std::lock_guard<std::mutex> Lock(Entry.Mutex);
|
||||
if (Entry.Locked)
|
||||
return false;
|
||||
Entry.Locked = true;
|
||||
OwnedGeneration = Entry.Generation;
|
||||
return true;
|
||||
}
|
||||
|
||||
llvm::WaitForUnlockResult
|
||||
waitForUnlockFor(std::chrono::seconds MaxSeconds) override {
|
||||
assert(!OwningLock);
|
||||
// We do not respect the timeout here. It's very generous for implicit
|
||||
// modules, so we'd typically only reach it if the owner crashed (but so did
|
||||
// we, since we run in the same process), or encountered deadlock.
|
||||
(void)MaxSeconds;
|
||||
std::shared_lock<std::shared_mutex> Lock(*OwningLock.mutex());
|
||||
return llvm::WaitForUnlockResult::Success;
|
||||
assert(!OwnedGeneration);
|
||||
std::unique_lock<std::mutex> Lock(Entry.Mutex);
|
||||
unsigned CurrentGeneration = Entry.Generation;
|
||||
bool Success = Entry.CondVar.wait_for(Lock, MaxSeconds, [&] {
|
||||
// We check not only Locked, but also Generation to break the wait in case
|
||||
// of unsafeUnlock() and successful tryLock().
|
||||
return !Entry.Locked || Entry.Generation != CurrentGeneration;
|
||||
});
|
||||
return Success ? llvm::WaitForUnlockResult::Success
|
||||
: llvm::WaitForUnlockResult::Timeout;
|
||||
}
|
||||
|
||||
std::error_code unsafeMaybeUnlock() override {
|
||||
// Unlocking the mutex here would trigger UB and we don't expect this to be
|
||||
// actually called when compiling scanning modules due to the no-timeout
|
||||
// guarantee above.
|
||||
std::error_code unsafeUnlock() override {
|
||||
{
|
||||
std::lock_guard<std::mutex> Lock(Entry.Mutex);
|
||||
Entry.Generation += 1;
|
||||
Entry.Locked = false;
|
||||
}
|
||||
Entry.CondVar.notify_all();
|
||||
return {};
|
||||
}
|
||||
|
||||
~ReaderWriterLock() override = default;
|
||||
~ReaderWriterLock() override {
|
||||
if (OwnedGeneration) {
|
||||
{
|
||||
std::lock_guard<std::mutex> Lock(Entry.Mutex);
|
||||
// Avoid stomping over the state managed by someone else after
|
||||
// unsafeUnlock() and successful tryLock().
|
||||
if (*OwnedGeneration == Entry.Generation)
|
||||
Entry.Locked = false;
|
||||
}
|
||||
Entry.CondVar.notify_all();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class InProcessModuleCache : public ModuleCache {
|
||||
@ -64,14 +85,14 @@ public:
|
||||
void prepareForGetLock(StringRef Filename) override {}
|
||||
|
||||
std::unique_ptr<llvm::AdvisoryLock> getLock(StringRef Filename) override {
|
||||
auto &CompilationMutex = [&]() -> std::shared_mutex & {
|
||||
auto &Entry = [&]() -> ModuleCacheEntry & {
|
||||
std::lock_guard<std::mutex> Lock(Entries.Mutex);
|
||||
auto &Entry = Entries.Map[Filename];
|
||||
if (!Entry)
|
||||
Entry = std::make_unique<ModuleCacheEntry>();
|
||||
return Entry->CompilationMutex;
|
||||
return *Entry;
|
||||
}();
|
||||
return std::make_unique<ReaderWriterLock>(CompilationMutex);
|
||||
return std::make_unique<ReaderWriterLock>(Entry);
|
||||
}
|
||||
|
||||
std::time_t getModuleTimestamp(StringRef Filename) override {
|
||||
|
||||
@ -3912,6 +3912,8 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
|
||||
Path.insert(Path.begin(), Arg, Arg + strlen(Arg));
|
||||
CmdArgs.push_back(Args.MakeArgString(Path));
|
||||
}
|
||||
|
||||
Args.AddLastArg(CmdArgs, options::OPT_fimplicit_modules_lock_timeout_EQ);
|
||||
}
|
||||
|
||||
if (HaveModules) {
|
||||
|
||||
@ -1514,7 +1514,9 @@ static bool compileModuleAndReadASTBehindLock(
|
||||
|
||||
// Someone else is responsible for building the module. Wait for them to
|
||||
// finish.
|
||||
switch (Lock->waitForUnlockFor(std::chrono::seconds(90))) {
|
||||
unsigned Timeout =
|
||||
ImportingInstance.getFrontendOpts().ImplicitModulesLockTimeoutSeconds;
|
||||
switch (Lock->waitForUnlockFor(std::chrono::seconds(Timeout))) {
|
||||
case llvm::WaitForUnlockResult::Success:
|
||||
break; // The interesting case.
|
||||
case llvm::WaitForUnlockResult::OwnerDied:
|
||||
@ -1522,11 +1524,11 @@ static bool compileModuleAndReadASTBehindLock(
|
||||
case llvm::WaitForUnlockResult::Timeout:
|
||||
// Since the InMemoryModuleCache takes care of correctness, we try waiting
|
||||
// for someone else to complete the build so that it does not happen
|
||||
// twice. In case of timeout, build it ourselves.
|
||||
// twice. In case of timeout, try to build it ourselves again.
|
||||
Diags.Report(ModuleNameLoc, diag::remark_module_lock_timeout)
|
||||
<< Module->Name;
|
||||
// Clear the lock file so that future invocations can make progress.
|
||||
Lock->unsafeMaybeUnlock();
|
||||
Lock->unsafeUnlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@ -46,6 +46,7 @@
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@ -1079,6 +1080,8 @@ struct PragmaDebugHandler : public PragmaHandler {
|
||||
Crasher.setAnnotationRange(SourceRange(Tok.getLocation()));
|
||||
PP.EnterToken(Crasher, /*IsReinject*/ false);
|
||||
}
|
||||
} else if (II->isStr("sleep")) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
} else if (II->isStr("dump")) {
|
||||
Token DumpAnnot;
|
||||
DumpAnnot.startToken();
|
||||
|
||||
37
clang/test/ClangScanDeps/modules-cycle-deadlock.c
Normal file
37
clang/test/ClangScanDeps/modules-cycle-deadlock.c
Normal file
@ -0,0 +1,37 @@
|
||||
// This test checks that implicit modules do not encounter a deadlock on a dependency cycle.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
// RUN: sed "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
|
||||
// RUN: not clang-scan-deps -mode preprocess -format experimental-full \
|
||||
// RUN: -compilation-database %t/cdb.json -j 2 -o %t/out 2> %t/err
|
||||
// RUN: FileCheck %s --input-file %t/err
|
||||
// CHECK-DAG: fatal error: cyclic dependency in module 'M': M -> N -> M
|
||||
// CHECK-DAG: fatal error: cyclic dependency in module 'N': N -> M -> N
|
||||
|
||||
//--- cdb.json.in
|
||||
[
|
||||
{
|
||||
"file": "DIR/tu1.c",
|
||||
"directory": "DIR",
|
||||
"command": "clang -fmodules -fmodules-cache-path=DIR/cache -fimplicit-modules-lock-timeout=3 DIR/tu1.c -o DIR/tu1.o"
|
||||
},
|
||||
{
|
||||
"file": "DIR/tu2.c",
|
||||
"directory": "DIR",
|
||||
"command": "clang -fmodules -fmodules-cache-path=DIR/cache -fimplicit-modules-lock-timeout=3 DIR/tu2.c -o DIR/tu2.o"
|
||||
}
|
||||
]
|
||||
//--- tu1.c
|
||||
#include "m.h"
|
||||
//--- tu2.c
|
||||
#include "n.h"
|
||||
//--- module.modulemap
|
||||
module M { header "m.h" }
|
||||
module N { header "n.h" }
|
||||
//--- m.h
|
||||
#pragma clang __debug sleep // Give enough time for tu2.c to start building N.
|
||||
#include "n.h"
|
||||
//--- n.h
|
||||
#pragma clang __debug sleep // Give enough time for tu1.c to start building M.
|
||||
#include "m.h"
|
||||
36
clang/test/Modules/implicit-cycle-deadlock.c
Normal file
36
clang/test/Modules/implicit-cycle-deadlock.c
Normal file
@ -0,0 +1,36 @@
|
||||
// This test checks that implicit modules do not encounter a deadlock on a dependency cycle.
|
||||
|
||||
// RUN: rm -rf %t
|
||||
// RUN: split-file %s %t
|
||||
|
||||
// RUN: %python %t/run_concurrently.py \
|
||||
// RUN: "not %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/cache -fimplicit-modules-lock-timeout=3 %t/tu1.c 2> %t/err1" \
|
||||
// RUN: "not %clang -fsyntax-only -fmodules -fmodules-cache-path=%t/cache -fimplicit-modules-lock-timeout=3 %t/tu2.c 2> %t/err2"
|
||||
|
||||
// RUN: FileCheck %s --input-file %t/err1 --check-prefix=CHECK1
|
||||
// RUN: FileCheck %s --input-file %t/err2 --check-prefix=CHECK2
|
||||
// CHECK1: fatal error: cyclic dependency in module 'M': M -> N -> M
|
||||
// CHECK2: fatal error: cyclic dependency in module 'N': N -> M -> N
|
||||
|
||||
//--- run_concurrently.py
|
||||
import subprocess, sys, threading
|
||||
|
||||
def run(cmd):
|
||||
subprocess.run(cmd, shell=True)
|
||||
|
||||
threads = [threading.Thread(target=run, args=(cmd,)) for cmd in sys.argv[1:]]
|
||||
for t in threads: t.start()
|
||||
for t in threads: t.join()
|
||||
//--- tu1.c
|
||||
#include "m.h"
|
||||
//--- tu2.c
|
||||
#include "n.h"
|
||||
//--- module.modulemap
|
||||
module M { header "m.h" }
|
||||
module N { header "n.h" }
|
||||
//--- m.h
|
||||
#pragma clang __debug sleep // Give enough time for tu2.c to start building N.
|
||||
#include "n.h"
|
||||
//--- n.h
|
||||
#pragma clang __debug sleep // Give enough time for tu1.c to start building M.
|
||||
#include "m.h"
|
||||
@ -48,7 +48,7 @@ public:
|
||||
/// For a lock owned by someone else, unlock it. A permitted side-effect is
|
||||
/// that another thread/process may acquire ownership of the lock before the
|
||||
/// existing owner unlocks it. This is an unsafe operation.
|
||||
virtual std::error_code unsafeMaybeUnlock() = 0;
|
||||
virtual std::error_code unsafeUnlock() = 0;
|
||||
|
||||
/// Unlocks the lock if its ownership was previously acquired by \c tryLock().
|
||||
virtual ~AdvisoryLock() = default;
|
||||
|
||||
@ -61,7 +61,7 @@ public:
|
||||
|
||||
/// Remove the lock file. This may delete a different lock file than
|
||||
/// the one previously read if there is a race.
|
||||
std::error_code unsafeMaybeUnlock() override;
|
||||
std::error_code unsafeUnlock() override;
|
||||
|
||||
/// Unlocks the lock if previously acquired by \c tryLock().
|
||||
~LockFileManager() override;
|
||||
|
||||
@ -294,7 +294,7 @@ LockFileManager::waitForUnlockFor(std::chrono::seconds MaxSeconds) {
|
||||
return WaitForUnlockResult::Timeout;
|
||||
}
|
||||
|
||||
std::error_code LockFileManager::unsafeMaybeUnlock() {
|
||||
std::error_code LockFileManager::unsafeUnlock() {
|
||||
auto BypassSandbox = sys::sandbox::scopedDisable();
|
||||
|
||||
return sys::fs::remove(LockFileName);
|
||||
|
||||
@ -496,7 +496,7 @@ Error OnDiskOutputFile::keep() {
|
||||
if (Error Err = Lock.tryLock().moveInto(Owned)) {
|
||||
// If we error acquiring a lock, we cannot ensure appends
|
||||
// to the trace file are atomic - cannot ensure output correctness.
|
||||
Lock.unsafeMaybeUnlock();
|
||||
Lock.unsafeUnlock();
|
||||
return convertToOutputError(
|
||||
OutputPath, std::make_error_code(std::errc::no_lock_available));
|
||||
}
|
||||
@ -508,7 +508,7 @@ Error OnDiskOutputFile::keep() {
|
||||
return convertToOutputError(OutputPath, EC);
|
||||
Out << (*Content)->getBuffer();
|
||||
Out.close();
|
||||
Lock.unsafeMaybeUnlock();
|
||||
Lock.unsafeUnlock();
|
||||
if (Out.has_error())
|
||||
return convertToOutputError(OutputPath, Out.error());
|
||||
// Remove temp file and done.
|
||||
@ -528,7 +528,7 @@ Error OnDiskOutputFile::keep() {
|
||||
// the lock, causing other waiting processes to time-out. Let's clear
|
||||
// the lock and try again right away. If we do start seeing compiler
|
||||
// hangs in this location, we will need to re-consider.
|
||||
Lock.unsafeMaybeUnlock();
|
||||
Lock.unsafeUnlock();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1575,7 +1575,7 @@ PreservedAnalyses AMDGPUSplitModulePass::run(Module &M,
|
||||
dbgs()
|
||||
<< "[amdgpu-split-module] unable to acquire lockfile, debug "
|
||||
"output may be mangled by other processes\n");
|
||||
Lock.unsafeMaybeUnlock();
|
||||
Lock.unsafeUnlock();
|
||||
break; // give up
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user