From 75c6f4791c8c1031e5ab170a9987c4d300026e1e Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 1 Apr 2026 13:00:46 -0700 Subject: [PATCH] [clang] Introduce `ModuleCache::write()` (#188877) This introduces new `ModuleCache` interface for writing PCM files. Together with #188876, this will enable adding a caching layer into the `InProcessModuleCache` implementation, hopefully reducing IO cost. Moreover, this makes it super explicit that the PCM is written before its timestamp, which is an important invariant that we've broken before. --- .../clang/Basic/DiagnosticCommonKinds.td | 1 + .../include/clang/Frontend/CompilerInstance.h | 10 +-- .../include/clang/Frontend/FrontendActions.h | 12 ++++ .../include/clang/Serialization/ModuleCache.h | 9 ++- .../InProcessModuleCache.cpp | 9 +++ clang/lib/Frontend/CompilerInstance.cpp | 61 ++++++++++++++----- clang/lib/Frontend/FrontendActions.cpp | 3 +- clang/lib/Serialization/ModuleCache.cpp | 40 ++++++++++++ 8 files changed, 123 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index cb267e3ee05c..b5f99606789f 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -103,6 +103,7 @@ def err_deleted_non_function : Error< "only functions can have deleted definitions">; def err_module_not_found : Error<"module '%0' not found">, DefaultFatal; def err_module_not_built : Error<"could not build module '%0'">, DefaultFatal; +def err_module_not_written : Error<"could not write module file for '%0' to '%1': %2">, DefaultFatal; def err_module_build_disabled: Error< "module '%0' is needed but has not been provided, and implicit use of module " "files is disabled">, DefaultFatal; diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 0d684d5c7f9f..01f83498d8c8 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -937,12 +937,14 @@ public: std::optional ThreadSafeConfig = std::nullopt); /// Compile a module file for the given module, using the options - /// provided by the importing compiler instance. Returns true if the module - /// was built without errors. + /// provided by the importing compiler instance. Returns the PCM file in + /// a buffer. // FIXME: This should be private, but it's called from static non-member // functions in the implementation file. - bool compileModule(SourceLocation ImportLoc, StringRef ModuleName, - StringRef ModuleFileName, CompilerInstance &Instance); + std::unique_ptr compileModule(SourceLocation ImportLoc, + StringRef ModuleName, + StringRef ModuleFileName, + CompilerInstance &Instance); ModuleLoadResult loadModule(SourceLocation ImportLoc, ModuleIdPath Path, Module::NameVisibilityKind Visibility, diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index 87a9f0d4cb06..c5aff7ae1a71 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -114,6 +114,15 @@ public: }; class GenerateModuleAction : public ASTFrontendAction { +public: + /// When \c OS is non-null, uses it for outputting the PCM file instead of + /// automatically creating an output file. + explicit GenerateModuleAction(std::unique_ptr OS = nullptr) + : OS(std::move(OS)) {} + +private: + std::unique_ptr OS; + virtual std::unique_ptr CreateOutputFile(CompilerInstance &CI, StringRef InFile) = 0; @@ -145,6 +154,9 @@ protected: }; class GenerateModuleFromModuleMapAction : public GenerateModuleAction { +public: + using GenerateModuleAction::GenerateModuleAction; + private: bool BeginSourceFileAction(CompilerInstance &CI) override; diff --git a/clang/include/clang/Serialization/ModuleCache.h b/clang/include/clang/Serialization/ModuleCache.h index a945135b4c42..107f87b32600 100644 --- a/clang/include/clang/Serialization/ModuleCache.h +++ b/clang/include/clang/Serialization/ModuleCache.h @@ -13,10 +13,12 @@ #include #include +#include namespace llvm { class AdvisoryLock; class MemoryBuffer; +class MemoryBufferRef; } // namespace llvm namespace clang { @@ -54,7 +56,9 @@ public: virtual InMemoryModuleCache &getInMemoryModuleCache() = 0; virtual const InMemoryModuleCache &getInMemoryModuleCache() const = 0; - // TODO: Virtualize writing PCM files. + /// Write the PCM contents to the given path in the module cache. + virtual std::error_code write(StringRef Path, + llvm::MemoryBufferRef Buffer) = 0; virtual Expected> read(StringRef FileName, off_t &Size, time_t &ModTime) = 0; @@ -71,6 +75,9 @@ std::shared_ptr createCrossProcessModuleCache(); /// Shared implementation of `ModuleCache::maybePrune()`. void maybePruneImpl(StringRef Path, time_t PruneInterval, time_t PruneAfter); +/// Shared implementation of `ModuleCache::write()`. +std::error_code writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer); + /// Shared implementation of `ModuleCache::read()`. Expected> readImpl(StringRef FileName, off_t &Size, time_t &ModTime); diff --git a/clang/lib/DependencyScanning/InProcessModuleCache.cpp b/clang/lib/DependencyScanning/InProcessModuleCache.cpp index 9f46f32bf331..0565f5eebfe0 100644 --- a/clang/lib/DependencyScanning/InProcessModuleCache.cpp +++ b/clang/lib/DependencyScanning/InProcessModuleCache.cpp @@ -134,6 +134,15 @@ public: return InMemory; } + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + // This is a compiler-internal input/output, let's bypass the sandbox. + auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); + + // FIXME: This could use an in-memory cache to avoid IO, and only write to + // disk at the end of the scan. + return writeImpl(Path, Buffer); + } + Expected> read(StringRef FileName, off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox. diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 33919bc1c463..dbeafaea19ba 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/Timer.h" #include "llvm/Support/VirtualFileSystem.h" @@ -1236,10 +1237,10 @@ public: }; } // namespace -bool CompilerInstance::compileModule(SourceLocation ImportLoc, - StringRef ModuleName, - StringRef ModuleFileName, - CompilerInstance &Instance) { +std::unique_ptr +CompilerInstance::compileModule(SourceLocation ImportLoc, StringRef ModuleName, + StringRef ModuleFileName, + CompilerInstance &Instance) { PrettyStackTraceBuildModule CrashInfo(ModuleName, ModuleFileName); llvm::TimeTraceScope TimeScope("Module Compile", ModuleName); @@ -1248,18 +1249,22 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc, if (getModuleCache().getInMemoryModuleCache().isPCMFinal(ModuleFileName)) { getDiagnostics().Report(ImportLoc, diag::err_module_rebuild_finalized) << ModuleName; - return false; + return nullptr; } getDiagnostics().Report(ImportLoc, diag::remark_module_build) << ModuleName << ModuleFileName; + SmallString<0> Buffer; + // Execute the action to actually build the module in-place. Use a separate // thread so that we get a stack large enough. bool Crashed = !llvm::CrashRecoveryContext().RunSafelyOnNewStack( [&]() { + auto OS = std::make_unique(Buffer); + std::unique_ptr Action = - std::make_unique(); + std::make_unique(std::move(OS)); if (auto WrapGenModuleAction = Instance.getGenModuleActionWrapper()) Action = WrapGenModuleAction(Instance.getFrontendOpts(), @@ -1295,10 +1300,17 @@ bool CompilerInstance::compileModule(SourceLocation ImportLoc, setBuildGlobalModuleIndex(true); } - // If \p AllowPCMWithCompilerErrors is set return 'success' even if errors + if (Crashed) + return nullptr; + + // Unless \p AllowPCMWithCompilerErrors is set, return 'failure' if errors // occurred. - return !Instance.getDiagnostics().hasErrorOccurred() || - Instance.getFrontendOpts().AllowPCMWithCompilerErrors; + if (Instance.getDiagnostics().hasErrorOccurred() && + !Instance.getFrontendOpts().AllowPCMWithCompilerErrors) + return nullptr; + + return std::make_unique( + std::move(Buffer), Instance.getFrontendOpts().OutputFile); } static OptionalFileEntryRef getPublicModuleMap(FileEntryRef File, @@ -1440,13 +1452,17 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, SourceLocation ModuleNameLoc, Module *Module, ModuleFileName ModuleFileName) { + std::unique_ptr Buffer; + { auto Instance = ImportingInstance.cloneForModuleCompile( ModuleNameLoc, Module, ModuleFileName); - if (!ImportingInstance.compileModule(ModuleNameLoc, - Module->getTopLevelModuleName(), - ModuleFileName, *Instance)) { + Buffer = ImportingInstance.compileModule(ModuleNameLoc, + Module->getTopLevelModuleName(), + ModuleFileName, *Instance); + + if (!Buffer) { ImportingInstance.getDiagnostics().Report(ModuleNameLoc, diag::err_module_not_built) << Module->Name << SourceRange(ImportLoc, ModuleNameLoc); @@ -1454,6 +1470,16 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance, } } + std::error_code EC = + ImportingInstance.getModuleCache().write(ModuleFileName, *Buffer); + if (EC) { + ImportingInstance.getDiagnostics().Report(ModuleNameLoc, + diag::err_module_not_written) + << Module->Name << ModuleFileName << EC.message() + << SourceRange(ImportLoc, ModuleNameLoc); + return false; + } + // The module is built successfully, we can update its timestamp now. if (ImportingInstance.getPreprocessor() .getHeaderSearchInfo() @@ -2195,8 +2221,9 @@ void CompilerInstance::createModuleFromSource(SourceLocation ImportLoc, // output is nondeterministic (as .pcm files refer to each other by name). // Can this affect the output in any way? SmallString<128> ModuleFileName; + int FD; if (std::error_code EC = llvm::sys::fs::createTemporaryFile( - CleanModuleName, "pcm", ModuleFileName)) { + CleanModuleName, "pcm", FD, ModuleFileName)) { getDiagnostics().Report(ImportLoc, diag::err_fe_unable_to_open_output) << ModuleFileName << EC.message(); return; @@ -2224,12 +2251,14 @@ void CompilerInstance::createModuleFromSource(SourceLocation ImportLoc, Other->DeleteBuiltModules = false; // Build the module, inheriting any modules that we've built locally. - bool Success = compileModule(ImportLoc, ModuleName, ModuleFileName, *Other); - + std::unique_ptr Buffer = + compileModule(ImportLoc, ModuleName, ModuleFileName, *Other); BuiltModules = std::move(Other->BuiltModules); - if (Success) { + if (Buffer) { + llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); BuiltModules[std::string(ModuleName)] = std::string(ModuleFileName); + OS << Buffer->getBuffer(); llvm::sys::RemoveFileOnSignal(ModuleFileName); } } diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index e5eaab0da7ad..42f1ae3d83ed 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -188,7 +188,8 @@ bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) { std::vector> GenerateModuleAction::CreateMultiplexConsumer(CompilerInstance &CI, StringRef InFile) { - std::unique_ptr OS = CreateOutputFile(CI, InFile); + if (!OS) + OS = CreateOutputFile(CI, InFile); if (!OS) return {}; diff --git a/clang/lib/Serialization/ModuleCache.cpp b/clang/lib/Serialization/ModuleCache.cpp index 5e618a3da08d..bd57f4322669 100644 --- a/clang/lib/Serialization/ModuleCache.cpp +++ b/clang/lib/Serialization/ModuleCache.cpp @@ -102,6 +102,39 @@ void clang::maybePruneImpl(StringRef Path, time_t PruneInterval, } } +std::error_code clang::writeImpl(StringRef Path, llvm::MemoryBufferRef Buffer) { + StringRef Extension = llvm::sys::path::extension(Path); + SmallString<128> ModelPath = StringRef(Path).drop_back(Extension.size()); + ModelPath += "-%%%%%%%%"; + ModelPath += Extension; + ModelPath += ".tmp"; + + std::error_code EC; + int FD; + SmallString<128> TmpPath; + if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath))) { + if (EC != std::errc::no_such_file_or_directory) + return EC; + + StringRef Dir = llvm::sys::path::parent_path(Path); + if (std::error_code InnerEC = llvm::sys::fs::create_directories(Dir)) + return InnerEC; + + if ((EC = llvm::sys::fs::createUniqueFile(ModelPath, FD, TmpPath))) + return EC; + } + + { + llvm::raw_fd_ostream OS(FD, /*shouldClose=*/true); + OS << Buffer.getBuffer(); + } + + if ((EC = llvm::sys::fs::rename(TmpPath, Path))) + return EC; + + return {}; +} + Expected> clang::readImpl(StringRef FileName, off_t &Size, time_t &ModTime) { Expected FD = @@ -182,6 +215,13 @@ public: return InMemory; } + std::error_code write(StringRef Path, llvm::MemoryBufferRef Buffer) override { + // This is a compiler-internal input/output, let's bypass the sandbox. + auto BypassSandbox = llvm::sys::sandbox::scopedDisable(); + + return writeImpl(Path, Buffer); + } + Expected> read(StringRef FileName, off_t &Size, time_t &ModTime) override { // This is a compiler-internal input/output, let's bypass the sandbox.