Sam McCall 5574a5894f
[include-cleaner] don't consider the associated header unused (#67228)
Loosely, the "associated header" of `foo.cpp` is `foo.h`.
It should be included, many styles include it first.

So far we haven't special cased it in any way, and require this include
to be justified. e.g. if foo.cpp defines a function declared in foo.h,
then the #include is allowed to check these declarations match.

However this doesn't really align with what users want:
- people reasonably want to include the associated header for the
  side-effect of validating that it compiles.
  In the degenerate case, `lib.cpp`is just `#include "lib.h"` (see bug)
- That `void foo(){}` IWYU-uses `void foo();` is a bit artificial, and
  most users won't internalize this. Instead they'll stick with the
  simpler model "include the header that defines your API".
  In the rare cases where these give different answers[1], our current
  behavior is a puzzling special case from the user POV.
  It is more user-friendly to accept both models.
- even where this diagnostic is a true positive (defs don't match header
  decls) the diagnostic does not communicate this usefully.

Fixes https://github.com/llvm/llvm-project/issues/67140

[1] Example of an associated header that's not IWYU-used:
```
// http.h
inline URL buildHttpURL(string host, int port, string path) {
  return "http://" + host + ":" + port + "/" + path;
}
// http.cpp
class HTTPURLHandler : URLHandler { ... };
REGISTER_URL_HANDLER("http", HTTPURLHandler);
```
2024-06-19 15:29:05 +02:00

500 lines
18 KiB
C++

//===--- Record.cpp - Record compiler events ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclGroup.h"
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/DirectoryLookup.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem/UniqueID.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
#include <algorithm>
#include <assert.h>
#include <memory>
#include <optional>
#include <set>
#include <utility>
#include <vector>
namespace clang::include_cleaner {
namespace {
class PPRecorder : public PPCallbacks {
public:
PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
: Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {
for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range())
if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName());
}
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override {
Active = SM.isWrittenInMainFile(Loc);
}
void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
StringRef SpelledFilename, bool IsAngled,
CharSourceRange FilenameRange,
OptionalFileEntryRef File, StringRef SearchPath,
StringRef RelativePath, const Module *SuggestedModule,
bool ModuleImported,
SrcMgr::CharacteristicKind) override {
if (!Active)
return;
Include I;
I.HashLocation = Hash;
I.Resolved = File;
I.Line = SM.getSpellingLineNumber(Hash);
I.Spelled = SpelledFilename;
I.Angled = IsAngled;
Recorded.Includes.add(I);
}
void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
SourceRange Range, const MacroArgs *Args) override {
if (!Active)
return;
recordMacroRef(MacroName, *MD.getMacroInfo());
}
void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
if (!Active)
return;
const auto *MI = MD->getMacroInfo();
// The tokens of a macro definition could refer to a macro.
// Formally this reference isn't resolved until this macro is expanded,
// but we want to treat it as a reference anyway.
for (const auto &Tok : MI->tokens()) {
auto *II = Tok.getIdentifierInfo();
// Could this token be a reference to a macro? (Not param to this macro).
if (!II || !II->hadMacroDefinition() ||
llvm::is_contained(MI->params(), II))
continue;
if (const MacroInfo *MI = PP.getMacroInfo(II))
recordMacroRef(Tok, *MI);
}
}
void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
const MacroDirective *) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroName, *MI);
}
void Ifdef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
}
void Ifndef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
}
using PPCallbacks::Elifdef;
using PPCallbacks::Elifndef;
void Elifdef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
}
void Elifndef(SourceLocation Loc, const Token &MacroNameTok,
const MacroDefinition &MD) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
}
void Defined(const Token &MacroNameTok, const MacroDefinition &MD,
SourceRange Range) override {
if (!Active)
return;
if (const auto *MI = MD.getMacroInfo())
recordMacroRef(MacroNameTok, *MI, RefType::Ambiguous);
}
private:
void recordMacroRef(const Token &Tok, const MacroInfo &MI,
RefType RT = RefType::Explicit) {
if (MI.isBuiltinMacro())
return; // __FILE__ is not a reference.
Recorded.MacroReferences.push_back(
SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
Tok.getLocation(), RT});
}
bool Active = false;
RecordedPP &Recorded;
const Preprocessor &PP;
const SourceManager &SM;
};
} // namespace
class PragmaIncludes::RecordPragma : public PPCallbacks, public CommentHandler {
public:
RecordPragma(const CompilerInstance &CI, PragmaIncludes *Out)
: RecordPragma(CI.getPreprocessor(), Out) {}
RecordPragma(const Preprocessor &P, PragmaIncludes *Out)
: SM(P.getSourceManager()), HeaderInfo(P.getHeaderSearchInfo()), Out(Out),
Arena(std::make_shared<llvm::BumpPtrAllocator>()),
UniqueStrings(*Arena),
MainFileStem(llvm::sys::path::stem(
SM.getNonBuiltinFilenameForID(SM.getMainFileID()).value_or(""))) {}
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
FileID PrevFID) override {
InMainFile = SM.isWrittenInMainFile(Loc);
if (Reason == PPCallbacks::ExitFile) {
// At file exit time HeaderSearchInfo is valid and can be used to
// determine whether the file was a self-contained header or not.
if (OptionalFileEntryRef FE = SM.getFileEntryRefForID(PrevFID)) {
if (tooling::isSelfContainedHeader(*FE, SM, HeaderInfo))
Out->NonSelfContainedFiles.erase(FE->getUniqueID());
else
Out->NonSelfContainedFiles.insert(FE->getUniqueID());
}
}
}
void EndOfMainFile() override {
for (auto &It : Out->IWYUExportBy) {
llvm::sort(It.getSecond());
It.getSecond().erase(
std::unique(It.getSecond().begin(), It.getSecond().end()),
It.getSecond().end());
}
Out->Arena.emplace_back(std::move(Arena));
}
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
llvm::StringRef FileName, bool IsAngled,
CharSourceRange /*FilenameRange*/,
OptionalFileEntryRef File,
llvm::StringRef /*SearchPath*/,
llvm::StringRef /*RelativePath*/,
const clang::Module * /*SuggestedModule*/,
bool /*ModuleImported*/,
SrcMgr::CharacteristicKind FileKind) override {
FileID HashFID = SM.getFileID(HashLoc);
int HashLine = SM.getLineNumber(HashFID, SM.getFileOffset(HashLoc));
std::optional<Header> IncludedHeader;
if (IsAngled)
if (auto StandardHeader =
tooling::stdlib::Header::named("<" + FileName.str() + ">")) {
IncludedHeader = *StandardHeader;
}
if (!IncludedHeader && File)
IncludedHeader = *File;
checkForExport(HashFID, HashLine, IncludedHeader, File);
checkForKeep(HashLine, File);
checkForDeducedAssociated(IncludedHeader);
}
void checkForExport(FileID IncludingFile, int HashLine,
std::optional<Header> IncludedHeader,
OptionalFileEntryRef IncludedFile) {
if (ExportStack.empty())
return;
auto &Top = ExportStack.back();
if (Top.SeenAtFile != IncludingFile)
return;
// Make sure current include is covered by the export pragma.
if ((Top.Block && HashLine > Top.SeenAtLine) ||
Top.SeenAtLine == HashLine) {
if (IncludedFile)
Out->IWYUExportBy[IncludedFile->getUniqueID()].push_back(Top.Path);
if (IncludedHeader && IncludedHeader->kind() == Header::Standard)
Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
// main-file #include with export pragma should never be removed.
if (Top.SeenAtFile == SM.getMainFileID() && IncludedFile)
Out->ShouldKeep.insert(IncludedFile->getUniqueID());
}
if (!Top.Block) // Pop immediately for single-line export pragma.
ExportStack.pop_back();
}
void checkForKeep(int HashLine, OptionalFileEntryRef IncludedFile) {
if (!InMainFile || KeepStack.empty())
return;
KeepPragma &Top = KeepStack.back();
// Check if the current include is covered by a keep pragma.
if (IncludedFile && ((Top.Block && HashLine > Top.SeenAtLine) ||
Top.SeenAtLine == HashLine)) {
Out->ShouldKeep.insert(IncludedFile->getUniqueID());
}
if (!Top.Block)
KeepStack.pop_back(); // Pop immediately for single-line keep pragma.
}
// Consider marking H as the "associated header" of the main file.
//
// Our heuristic:
// - it must be the first #include in the main file
// - it must have the same name stem as the main file (foo.h and foo.cpp)
// (IWYU pragma: associated is also supported, just not by this function).
//
// We consider the associated header as if it had a keep pragma.
// (Unlike IWYU, we don't treat #includes inside the associated header as if
// they were written in the main file.)
void checkForDeducedAssociated(std::optional<Header> H) {
namespace path = llvm::sys::path;
if (!InMainFile || SeenAssociatedCandidate)
return;
SeenAssociatedCandidate = true; // Only the first #include is our candidate.
if (!H || H->kind() != Header::Physical)
return;
if (path::stem(H->physical().getName(), path::Style::posix) == MainFileStem)
Out->ShouldKeep.insert(H->physical().getUniqueID());
}
bool HandleComment(Preprocessor &PP, SourceRange Range) override {
auto &SM = PP.getSourceManager();
auto Pragma =
tooling::parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
if (!Pragma)
return false;
auto [CommentFID, CommentOffset] = SM.getDecomposedLoc(Range.getBegin());
int CommentLine = SM.getLineNumber(CommentFID, CommentOffset);
if (InMainFile) {
if (Pragma->starts_with("keep") ||
// Limited support for associated headers: never consider unused.
Pragma->starts_with("associated")) {
KeepStack.push_back({CommentLine, false});
} else if (Pragma->starts_with("begin_keep")) {
KeepStack.push_back({CommentLine, true});
} else if (Pragma->starts_with("end_keep") && !KeepStack.empty()) {
assert(KeepStack.back().Block);
KeepStack.pop_back();
}
}
auto FE = SM.getFileEntryRefForID(CommentFID);
if (!FE) {
// This can only happen when the buffer was registered virtually into
// SourceManager and FileManager has no idea about it. In such a scenario,
// that file cannot be discovered by HeaderSearch, therefore no "explicit"
// includes for that file.
return false;
}
auto CommentUID = FE->getUniqueID();
if (Pragma->consume_front("private")) {
StringRef PublicHeader;
if (Pragma->consume_front(", include ")) {
// We always insert using the spelling from the pragma.
PublicHeader =
save(Pragma->starts_with("<") || Pragma->starts_with("\"")
? (*Pragma)
: ("\"" + *Pragma + "\"").str());
}
Out->IWYUPublic.insert({CommentUID, PublicHeader});
return false;
}
if (Pragma->consume_front("always_keep")) {
Out->ShouldKeep.insert(CommentUID);
return false;
}
auto Filename = FE->getName();
// Record export pragma.
if (Pragma->starts_with("export")) {
ExportStack.push_back({CommentLine, CommentFID, save(Filename), false});
} else if (Pragma->starts_with("begin_exports")) {
ExportStack.push_back({CommentLine, CommentFID, save(Filename), true});
} else if (Pragma->starts_with("end_exports")) {
// FIXME: be robust on unmatching cases. We should only pop the stack if
// the begin_exports and end_exports is in the same file.
if (!ExportStack.empty()) {
assert(ExportStack.back().Block);
ExportStack.pop_back();
}
}
return false;
}
private:
StringRef save(llvm::StringRef S) { return UniqueStrings.save(S); }
bool InMainFile = false;
const SourceManager &SM;
const HeaderSearch &HeaderInfo;
PragmaIncludes *Out;
std::shared_ptr<llvm::BumpPtrAllocator> Arena;
/// Intern table for strings. Contents are on the arena.
llvm::StringSaver UniqueStrings;
// Used when deducing associated header.
llvm::StringRef MainFileStem;
bool SeenAssociatedCandidate = false;
struct ExportPragma {
// The line number where we saw the begin_exports or export pragma.
int SeenAtLine = 0; // 1-based line number.
// The file where we saw the pragma.
FileID SeenAtFile;
// Name (per FileEntry::getName()) of the file SeenAtFile.
StringRef Path;
// true if it is a block begin/end_exports pragma; false if it is a
// single-line export pragma.
bool Block = false;
};
// A stack for tracking all open begin_exports or single-line export.
std::vector<ExportPragma> ExportStack;
struct KeepPragma {
// The line number where we saw the begin_keep or keep pragma.
int SeenAtLine = 0; // 1-based line number.
// true if it is a block begin/end_keep pragma; false if it is a
// single-line keep pragma.
bool Block = false;
};
// A stack for tracking all open begin_keep pragmas or single-line keeps.
std::vector<KeepPragma> KeepStack;
};
void PragmaIncludes::record(const CompilerInstance &CI) {
auto Record = std::make_unique<RecordPragma>(CI, this);
CI.getPreprocessor().addCommentHandler(Record.get());
CI.getPreprocessor().addPPCallbacks(std::move(Record));
}
void PragmaIncludes::record(Preprocessor &P) {
auto Record = std::make_unique<RecordPragma>(P, this);
P.addCommentHandler(Record.get());
P.addPPCallbacks(std::move(Record));
}
llvm::StringRef PragmaIncludes::getPublic(const FileEntry *F) const {
auto It = IWYUPublic.find(F->getUniqueID());
if (It == IWYUPublic.end())
return "";
return It->getSecond();
}
static llvm::SmallVector<FileEntryRef>
toFileEntries(llvm::ArrayRef<StringRef> FileNames, FileManager &FM) {
llvm::SmallVector<FileEntryRef> Results;
for (auto FName : FileNames) {
// FIMXE: log the failing cases?
if (auto FE = FM.getOptionalFileRef(FName))
Results.push_back(*FE);
}
return Results;
}
llvm::SmallVector<FileEntryRef>
PragmaIncludes::getExporters(const FileEntry *File, FileManager &FM) const {
auto It = IWYUExportBy.find(File->getUniqueID());
if (It == IWYUExportBy.end())
return {};
return toFileEntries(It->getSecond(), FM);
}
llvm::SmallVector<FileEntryRef>
PragmaIncludes::getExporters(tooling::stdlib::Header StdHeader,
FileManager &FM) const {
auto It = StdIWYUExportBy.find(StdHeader);
if (It == StdIWYUExportBy.end())
return {};
return toFileEntries(It->getSecond(), FM);
}
bool PragmaIncludes::isSelfContained(const FileEntry *FE) const {
return !NonSelfContainedFiles.contains(FE->getUniqueID());
}
bool PragmaIncludes::isPrivate(const FileEntry *FE) const {
return IWYUPublic.contains(FE->getUniqueID());
}
bool PragmaIncludes::shouldKeep(const FileEntry *FE) const {
return ShouldKeep.contains(FE->getUniqueID()) ||
NonSelfContainedFiles.contains(FE->getUniqueID());
}
namespace {
template <typename T> bool isImplicitTemplateSpecialization(const Decl *D) {
if (const auto *TD = dyn_cast<T>(D))
return TD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation;
return false;
}
} // namespace
std::unique_ptr<ASTConsumer> RecordedAST::record() {
class Recorder : public ASTConsumer {
RecordedAST *Out;
public:
Recorder(RecordedAST *Out) : Out(Out) {}
void Initialize(ASTContext &Ctx) override { Out->Ctx = &Ctx; }
bool HandleTopLevelDecl(DeclGroupRef DG) override {
const auto &SM = Out->Ctx->getSourceManager();
for (Decl *D : DG) {
if (!SM.isWrittenInMainFile(SM.getExpansionLoc(D->getLocation())))
continue;
if (isImplicitTemplateSpecialization<FunctionDecl>(D) ||
isImplicitTemplateSpecialization<CXXRecordDecl>(D) ||
isImplicitTemplateSpecialization<VarDecl>(D))
continue;
// FIXME: Filter out certain Obj-C as well.
Out->Roots.push_back(D);
}
return ASTConsumer::HandleTopLevelDecl(DG);
}
};
return std::make_unique<Recorder>(this);
}
std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
return std::make_unique<PPRecorder>(*this, PP);
}
} // namespace clang::include_cleaner