[clang] Introduce ModuleCache::read() (#188876)

This PR introduces new `ModuleCache` API for reading PCM files. This
makes it so that we don't go through the `FileManager` and VFS, which is
problematic downstream. We interpose a VFS that unintentionally shuffles
implicitly-built modules in and out of the CAS database, leading to some
unnecessary storage and runtime overhead. Moreover, this (together with
a reading API) will enable adding a caching layer into the
`InProcessModuleCache` implementation, hopefully reducing IO cost.
This commit is contained in:
Jan Svoboda 2026-04-01 12:18:06 -07:00 committed by GitHub
parent 75333a0981
commit 8ea475fdf9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 75 additions and 22 deletions

View File

@ -12,9 +12,11 @@
#include "clang/Basic/LLVM.h"
#include <ctime>
#include <memory>
namespace llvm {
class AdvisoryLock;
class MemoryBuffer;
} // namespace llvm
namespace clang {
@ -52,7 +54,10 @@ public:
virtual InMemoryModuleCache &getInMemoryModuleCache() = 0;
virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0;
// TODO: Virtualize writing/reading PCM files, etc.
// TODO: Virtualize writing PCM files.
virtual Expected<std::unique_ptr<llvm::MemoryBuffer>>
read(StringRef FileName, off_t &Size, time_t &ModTime) = 0;
virtual ~ModuleCache() = default;
};
@ -65,6 +70,10 @@ std::shared_ptr<ModuleCache> createCrossProcessModuleCache();
/// Shared implementation of `ModuleCache::maybePrune()`.
void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter);
/// Shared implementation of `ModuleCache::read()`.
Expected<std::unique_ptr<llvm::MemoryBuffer>>
readImpl(StringRef FileName, off_t &Size, time_t &ModTime);
} // namespace clang
#endif

View File

@ -11,6 +11,8 @@
#include "clang/Serialization/InMemoryModuleCache.h"
#include "llvm/Support/AdvisoryLock.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/IOSandbox.h"
using namespace clang;
using namespace dependencies;
@ -131,6 +133,16 @@ public:
const InMemoryModuleCache &getInMemoryModuleCache() const override {
return InMemory;
}
Expected<std::unique_ptr<llvm::MemoryBuffer>>
read(StringRef FileName, off_t &Size, time_t &ModTime) override {
// This is a compiler-internal input/output, let's bypass the sandbox.
auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
// FIXME: This only needs to go to disk once per build, not in every
// compilation. Introduce in-memory cache.
return readImpl(FileName, Size, ModTime);
}
};
} // namespace

View File

@ -10,6 +10,7 @@
#include "clang/Serialization/InMemoryModuleCache.h"
#include "clang/Serialization/ModuleFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/IOSandbox.h"
#include "llvm/Support/LockFileManager.h"
@ -101,6 +102,25 @@ void clang::maybePruneImpl(StringRef Path, time_t PruneInterval,
}
}
Expected<std::unique_ptr<llvm::MemoryBuffer>>
clang::readImpl(StringRef FileName, off_t &Size, time_t &ModTime) {
Expected<llvm::sys::fs::file_t> FD =
llvm::sys::fs::openNativeFileForRead(FileName);
if (!FD)
return FD.takeError();
llvm::sys::fs::file_status Status;
if (std::error_code EC = llvm::sys::fs::status(*FD, Status))
return llvm::errorCodeToError(EC);
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buf =
llvm::MemoryBuffer::getOpenFile(*FD, FileName, Status.getSize(),
/*RequiresNullTerminator=*/false);
if (!Buf)
return llvm::errorCodeToError(Buf.getError());
Size = Status.getSize();
ModTime = llvm::sys::toTimeT(Status.getLastModificationTime());
return std::move(*Buf);
}
namespace {
class CrossProcessModuleCache : public ModuleCache {
InMemoryModuleCache InMemory;
@ -161,6 +181,14 @@ public:
const InMemoryModuleCache &getInMemoryModuleCache() const override {
return InMemory;
}
Expected<std::unique_ptr<llvm::MemoryBuffer>>
read(StringRef FileName, off_t &Size, time_t &ModTime) override {
// This is a compiler-internal input/output, let's bypass the sandbox.
auto BypassSandbox = llvm::sys::sandbox::scopedDisable();
return readImpl(FileName, Size, ModTime);
}
};
} // namespace

View File

@ -28,6 +28,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
@ -188,33 +189,36 @@ ModuleManager::AddModuleResult ModuleManager::addModule(
// import it earlier.
return OutOfDate;
} else {
OptionalFileEntryRef Entry =
expectedToOptional(FileName == StringRef("-")
? FileMgr.getSTDIN()
: FileMgr.getFileRef(FileName, /*OpenFile=*/true,
/*CacheFailure=*/false));
if (!Entry) {
ErrorStr = "module file not found";
return Missing;
}
auto Buf = [&]() -> Expected<std::unique_ptr<llvm::MemoryBuffer>> {
// Implicit modules live in the module cache.
if (FileName.getImplicitModuleSuffixLength())
return ModCache.read(FileName, Size, ModTime);
// Get a buffer of the file and close the file descriptor when done.
// The file is volatile because in a parallel build we expect multiple
// compiler processes to use the same module file rebuilding it if needed.
//
// RequiresNullTerminator is false because module files don't need it, and
// this allows the file to still be mmapped.
auto Buf = FileMgr.getBufferForFile(*Entry,
/*IsVolatile=*/true,
/*RequiresNullTerminator=*/false);
// Explicit modules are treated as any other compiler input file, load
// them via FileManager.
Expected<FileEntryRef> Entry =
FileName == StringRef("-")
? FileMgr.getSTDIN()
: FileMgr.getFileRef(FileName, /*OpenFile=*/true,
/*CacheFailure=*/false);
if (!Entry)
return Entry.takeError();
Size = Entry->getSize();
ModTime = Entry->getModificationTime();
// RequiresNullTerminator is false because module files don't need it, and
// this allows the file to still be mmapped.
return llvm::errorOrToExpected(
FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/false,
/*RequiresNullTerminator=*/false));
}();
if (!Buf) {
ErrorStr = Buf.getError().message();
ErrorStr = llvm::toString(Buf.takeError());
return Missing;
}
Size = Entry->getSize();
ModTime = Entry->getModificationTime();
NewFileBuffer = std::move(*Buf);
ModuleBuffer = NewFileBuffer.get();
}