llvm-project/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
Jan Svoboda 8cc2a13727 [clang][deps] Handle symlinks in minimizing FS
The minimizing and caching filesystem used by the dependency scanner can be configured to **not** minimize some files. That's necessary when scanning a TU with prebuilt inputs (i.e. PCH) that refer to the original (non-minimized) files. Minimizing such files in the dependency scanner would cause discrepancy between the current perceived state of the filesystem and the file sizes stored in the AST file. By not minimizing such files, we avoid creating the discrepancy.

The problem with the current approach is that files that should not be minimized are identified by their path. This breaks down when the prebuilt input (PCH) and the current TU refer to the same file via different paths (i.e. symlinks). This patch switches from paths to `llvm::sys::fs::UniqueID` when identifying ignored files. This is consistent with how the rest of Clang treats files.

Depends on D114966.

Reviewed By: dexonsmith, arphaman

Differential Revision: https://reviews.llvm.org/D114971
2022-01-21 13:04:25 +01:00

362 lines
14 KiB
C++

//===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h"
#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/Threading.h"
using namespace clang;
using namespace tooling;
using namespace dependencies;
llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
// Load the file and its content from the file system.
auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
if (!MaybeFile)
return MaybeFile.getError();
auto File = std::move(*MaybeFile);
auto MaybeStat = File->status();
if (!MaybeStat)
return MaybeStat.getError();
auto Stat = std::move(*MaybeStat);
auto MaybeBuffer = File->getBuffer(Stat.getName());
if (!MaybeBuffer)
return MaybeBuffer.getError();
auto Buffer = std::move(*MaybeBuffer);
// If the file size changed between read and stat, pretend it didn't.
if (Stat.getSize() != Buffer->getBufferSize())
Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
return TentativeEntry(Stat, std::move(Buffer));
}
EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
if (Entry.isError() || Entry.isDirectory() || Disable ||
!shouldMinimize(Filename, Entry.getUniqueID()))
return EntryRef(/*Minimized=*/false, Filename, Entry);
CachedFileContents *Contents = Entry.getContents();
assert(Contents && "contents not initialized");
// Double-checked locking.
if (Contents->MinimizedAccess.load())
return EntryRef(/*Minimized=*/true, Filename, Entry);
std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
// Double-checked locking.
if (Contents->MinimizedAccess.load())
return EntryRef(/*Minimized=*/true, Filename, Entry);
llvm::SmallString<1024> MinimizedFileContents;
// Minimize the file down to directives that might affect the dependencies.
SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
MinimizedFileContents, Tokens)) {
// FIXME: Propagate the diagnostic if desired by the client.
// Use the original file if the minimization failed.
Contents->MinimizedStorage =
llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
return EntryRef(/*Minimized=*/true, Filename, Entry);
}
// The contents produced by the minimizer must be null terminated.
assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
"not null terminated contents");
// Compute the skipped PP ranges that speedup skipping over inactive
// preprocessor blocks.
llvm::SmallVector<minimize_source_to_dependency_directives::SkippedRange, 32>
SkippedRanges;
minimize_source_to_dependency_directives::computeSkippedRanges(Tokens,
SkippedRanges);
PreprocessorSkippedRangeMapping Mapping;
for (const auto &Range : SkippedRanges) {
if (Range.Length < 16) {
// Ignore small ranges as non-profitable.
// FIXME: This is a heuristic, its worth investigating the tradeoffs
// when it should be applied.
continue;
}
Mapping[Range.Offset] = Range.Length;
}
Contents->PPSkippedRangeMapping = std::move(Mapping);
Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
std::move(MinimizedFileContents));
// This function performed double-checked locking using `MinimizedAccess`.
// Assigning it must be the last thing this function does. If we were to
// assign it before `PPSkippedRangeMapping`, other threads may skip the
// critical section (`MinimizedAccess != nullptr`) and access the mappings
// that are about to be initialized, leading to a data race.
Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
return EntryRef(/*Minimized=*/true, Filename, Entry);
}
DependencyScanningFilesystemSharedCache::
DependencyScanningFilesystemSharedCache() {
// This heuristic was chosen using a empirical testing on a
// reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
// sharding gives a performance edge by reducing the lock contention.
// FIXME: A better heuristic might also consider the OS to account for
// the different cost of lock contention on different OSes.
NumShards =
std::max(2u, llvm::hardware_concurrency().compute_thread_count() / 4);
CacheShards = std::make_unique<CacheShard[]>(NumShards);
}
DependencyScanningFilesystemSharedCache::CacheShard &
DependencyScanningFilesystemSharedCache::getShardForFilename(
StringRef Filename) const {
return CacheShards[llvm::hash_value(Filename) % NumShards];
}
DependencyScanningFilesystemSharedCache::CacheShard &
DependencyScanningFilesystemSharedCache::getShardForUID(
llvm::sys::fs::UniqueID UID) const {
auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
return CacheShards[Hash % NumShards];
}
const CachedFileSystemEntry *
DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
StringRef Filename) const {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = EntriesByFilename.find(Filename);
return It == EntriesByFilename.end() ? nullptr : It->getValue();
}
const CachedFileSystemEntry *
DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
llvm::sys::fs::UniqueID UID) const {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = EntriesByUID.find(UID);
return It == EntriesByUID.end() ? nullptr : It->getSecond();
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,
llvm::ErrorOr<llvm::vfs::Status> Stat) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
if (Insertion.second)
Insertion.first->second =
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
return *Insertion.first->second;
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
std::unique_ptr<llvm::MemoryBuffer> Contents) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByUID.insert({UID, nullptr});
if (Insertion.second) {
CachedFileContents *StoredContents = nullptr;
if (Contents)
StoredContents = new (ContentsStorage.Allocate())
CachedFileContents(std::move(Contents));
Insertion.first->second = new (EntryStorage.Allocate())
CachedFileSystemEntry(std::move(Stat), StoredContents);
}
return *Insertion.first->second;
}
const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,
const CachedFileSystemEntry &Entry) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
}
/// Whitelist file extensions that should be minimized, treating no extension as
/// a source file that should be minimized.
///
/// This is kinda hacky, it would be better if we knew what kind of file Clang
/// was expecting instead.
static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return true; // C++ standard library
return llvm::StringSwitch<bool>(Ext)
.CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
.CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
.CasesLower(".m", ".mm", true)
.CasesLower(".i", ".ii", ".mi", ".mmi", true)
.CasesLower(".def", ".inc", true)
.Default(false);
}
static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
// Only cache stat failures on source files.
return shouldMinimizeBasedOnExtension(Filename);
}
void DependencyScanningWorkerFilesystem::disableMinimization(
StringRef Filename) {
// Since we're not done setting up `NotToBeMinimized` yet, we need to disable
// minimization explicitly.
if (llvm::ErrorOr<EntryRef> Result =
getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true))
NotToBeMinimized.insert(Result->getStatus().getUniqueID());
}
bool DependencyScanningWorkerFilesystem::shouldMinimize(
StringRef Filename, llvm::sys::fs::UniqueID UID) {
return shouldMinimizeBasedOnExtension(Filename) &&
!NotToBeMinimized.contains(UID);
}
const CachedFileSystemEntry &
DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
TentativeEntry TEntry) {
auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
std::move(TEntry.Status),
std::move(TEntry.Contents));
}
const CachedFileSystemEntry *
DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
StringRef Filename) {
if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
return Entry;
auto &Shard = SharedCache.getShardForFilename(Filename);
if (const auto *Entry = Shard.findEntryByFilename(Filename))
return &LocalCache.insertEntryForFilename(Filename, *Entry);
return nullptr;
}
llvm::ErrorOr<const CachedFileSystemEntry &>
DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
if (!Stat) {
if (!shouldCacheStatFailures(Filename))
return Stat.getError();
const auto &Entry =
getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
return insertLocalEntryForFilename(Filename, Entry);
}
if (const auto *Entry = findSharedEntryByUID(*Stat))
return insertLocalEntryForFilename(Filename, *Entry);
auto TEntry =
Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
const CachedFileSystemEntry *SharedEntry = [&]() {
if (TEntry) {
const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
}
return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
}();
return insertLocalEntryForFilename(Filename, *SharedEntry);
}
llvm::ErrorOr<EntryRef>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
StringRef Filename, bool DisableMinimization) {
if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
return minimizeIfNecessary(*Entry, Filename, DisableMinimization)
.unwrapError();
auto MaybeEntry = computeAndStoreResult(Filename);
if (!MaybeEntry)
return MaybeEntry.getError();
return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization)
.unwrapError();
}
llvm::ErrorOr<llvm::vfs::Status>
DependencyScanningWorkerFilesystem::status(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
if (!Result)
return Result.getError();
return Result->getStatus();
}
namespace {
/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
/// this subclass.
class MinimizedVFSFile final : public llvm::vfs::File {
public:
MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
llvm::vfs::Status Stat)
: Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
create(EntryRef Entry,
ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
bool IsVolatile) override {
return std::move(Buffer);
}
std::error_code close() override { return {}; }
private:
std::unique_ptr<llvm::MemoryBuffer> Buffer;
llvm::vfs::Status Stat;
};
} // end anonymous namespace
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
assert(!Entry.isError() && "error");
if (Entry.isDirectory())
return std::make_error_code(std::errc::is_a_directory);
auto Result = std::make_unique<MinimizedVFSFile>(
llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
Entry.getStatus().getName(),
/*RequiresNullTerminator=*/false),
Entry.getStatus());
const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)
(*PPSkipMappings)[Result->Buffer->getBufferStart()] = EntrySkipMappings;
return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
std::unique_ptr<llvm::vfs::File>(std::move(Result)));
}
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
SmallString<256> OwnedFilename;
StringRef Filename = Path.toStringRef(OwnedFilename);
llvm::ErrorOr<EntryRef> Result = getOrCreateFileSystemEntry(Filename);
if (!Result)
return Result.getError();
return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
}