[include-cleaner] Add include-cleaner tool, with initial HTML report

The immediate goal is to start producing an HTML report to debug and explain
include-cleaner recommendations.
For now, this includes only the lowest-level piece: a list of the references
found in the source code.

How this fits into future ideas:
 - under refs we can also show the headers providing the symbol, which includes
   match those headers etc
 - we can also annotate the #include lines with which symbols they cover, and
   add whichever includes we're suggesting too
 - the include-cleaner tool will likely have modes where it emits diagnostics
   and/or applies edits, so the HTML report is behind a flag

Differential Revision: https://reviews.llvm.org/D135956
This commit is contained in:
Sam McCall 2022-10-14 12:56:41 +02:00
parent 34d18fd241
commit 6fa0e026c8
15 changed files with 514 additions and 1 deletions

View File

@ -1,4 +1,6 @@
include_directories(include)
add_subdirectory(lib)
add_subdirectory(tool)
if(CLANG_INCLUDE_TESTS)
add_subdirectory(test)
add_subdirectory(unittests)

View File

@ -0,0 +1,45 @@
//===--- Record.h - Record compiler events ------------------------- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Where Analysis.h analyzes AST nodes and recorded preprocessor events, this
// file defines ways to capture AST and preprocessor information from a parse.
//
// These are the simplest way to connect include-cleaner logic to the parser,
// but other ways are possible (for example clangd records includes separately).
//
//===----------------------------------------------------------------------===//
#ifndef CLANG_INCLUDE_CLEANER_RECORD_H
#define CLANG_INCLUDE_CLEANER_RECORD_H
#include <memory>
#include <vector>
namespace clang {
class ASTConsumer;
class ASTContext;
class Decl;
namespace include_cleaner {
// Contains recorded parser events relevant to include-cleaner.
struct RecordedAST {
// The consumer (when installed into clang) tracks declarations in this.
std::unique_ptr<ASTConsumer> record();
ASTContext *Ctx = nullptr;
// The set of declarations written at file scope inside the main file.
//
// These are the roots of the subtrees that should be traversed to find uses.
// (Traversing the TranslationUnitDecl would find uses inside headers!)
std::vector<Decl *> Roots;
};
} // namespace include_cleaner
} // namespace clang
#endif

View File

@ -25,6 +25,7 @@
#include "llvm/ADT/STLFunctionalExtras.h"
namespace clang {
class ASTContext;
class Decl;
class NamedDecl;
namespace include_cleaner {
@ -41,6 +42,11 @@ namespace include_cleaner {
/// being analyzed, in order to find all references within it.
void walkAST(Decl &Root, llvm::function_ref<void(SourceLocation, NamedDecl &)>);
/// Write an HTML summary of the analysis to the given stream.
/// FIXME: Once analysis has a public API, this should be public too.
void writeHTMLReport(FileID File, llvm::ArrayRef<Decl *> Roots, ASTContext &Ctx,
llvm::raw_ostream &OS);
} // namespace include_cleaner
} // namespace clang

View File

@ -1,6 +1,8 @@
set(LLVM_LINK_COMPONENTS Support)
add_clang_library(clangIncludeCleaner
HTMLReport.cpp
Record.cpp
WalkAST.cpp
LINK_LIBS

View File

@ -0,0 +1,198 @@
//===--- HTMLReport.cpp - Explain the analysis for humans -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// If we're debugging this tool or trying to explain its conclusions, we need to
// be able to identify specific facts about the code and the inferences made.
//
// This library prints an annotated version of the code
//
//===----------------------------------------------------------------------===//
#include "AnalysisInternal.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/PrettyPrinter.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Lex/Lexer.h"
#include "llvm/Support/raw_ostream.h"
namespace clang::include_cleaner {
namespace {
constexpr llvm::StringLiteral CSS = R"css(
pre { line-height: 1.5em; }
.ref { text-decoration: underline; color: #008; }
.sel { position: relative; cursor: pointer; }
#hover {
background-color: #aaccff; border: 1px solid black;
z-index: 1;
position: absolute; top: 100%; left: 0;
font-family: sans-serif;
padding: 0.5em;
}
#hover p, #hover pre { margin: 0; }
#hover section header { font-weight: bold; }
#hover section:not(:first-child) { margin-top: 1em; }
)css";
constexpr llvm::StringLiteral JS = R"js(
// Recreate the #hover div inside whichever target .sel element was clicked.
function select(event) {
var target = event.target.closest('.sel');
var hover = document.getElementById('hover');
if (hover) {
if (hover.parentElement == target) return;
hover.parentNode.removeChild(hover);
}
if (target == null) return;
hover = document.createElement('div');
hover.id = 'hover';
fillHover(hover, target);
target.appendChild(hover);
}
// Fill the #hover div with the templates named by data-hover in the target.
function fillHover(hover, target) {
target.dataset.hover?.split(',').forEach(function(id) {
for (c of document.getElementById(id).content.childNodes)
hover.appendChild(c.cloneNode(true));
})
}
)js";
// Print the declaration tersely, but enough to identify e.g. which overload.
std::string printDecl(const NamedDecl &ND) {
std::string S;
llvm::raw_string_ostream OS(S);
PrintingPolicy PP = ND.getASTContext().getPrintingPolicy();
PP.FullyQualifiedName = true;
PP.TerseOutput = true;
PP.SuppressInitializers = true;
ND.print(OS, PP);
llvm::erase_value(S, '\n');
return S;
}
class Reporter {
llvm::raw_ostream &OS;
const ASTContext &Ctx;
const SourceManager &SM;
FileID File;
struct Target {
const NamedDecl *D;
};
std::vector<Target> Targets;
std::vector<std::pair</*Offset*/ unsigned, /*TargetIndex*/ unsigned>> Refs;
public:
Reporter(llvm::raw_ostream &OS, ASTContext &Ctx, FileID File)
: OS(OS), Ctx(Ctx), SM(Ctx.getSourceManager()), File(File) {}
void addRef(SourceLocation Loc, const NamedDecl &D) {
auto Coords = SM.getDecomposedLoc(SM.getFileLoc(Loc));
if (Coords.first != File)
llvm::errs() << "Ref location outside file!\n";
Targets.push_back({&D});
Refs.push_back({Coords.second, Targets.size() - 1});
}
void write() {
OS << "<!doctype html>\n";
OS << "<html>\n";
OS << "<head>\n";
OS << "<style>" << CSS << "</style>\n";
OS << "<script>" << JS << "</script>\n";
for (unsigned I = 0; I < Targets.size(); ++I) {
OS << "<template id='t" << I << "'><section>";
writeTarget(Targets[I]);
OS << "</section></template>\n";
}
OS << "</head>\n";
OS << "<body>\n";
writeCode();
OS << "</body>\n";
OS << "</html>\n";
}
private:
void escapeChar(char C) {
switch (C) {
case '<':
OS << "&lt;";
break;
case '&':
OS << "&amp;";
break;
default:
OS << C;
}
}
void escapeString(llvm::StringRef S) {
for (char C : S)
escapeChar(C);
}
void writeTarget(const Target &T) {
OS << "<header>" << T.D->getDeclKindName() << " ";
escapeString(T.D->getQualifiedNameAsString());
OS << "</header>";
OS << "<p>declared at ";
escapeString(SM.getFileLoc(T.D->getLocation()).printToString(SM));
OS << "</p><pre>";
escapeString(printDecl(*T.D));
OS << "</pre>";
}
void writeCode() {
llvm::sort(Refs);
llvm::StringRef Code = SM.getBufferData(File);
OS << "<pre onclick='select(event)'>";
auto Rest = llvm::makeArrayRef(Refs);
unsigned End = 0;
for (unsigned I = 0; I < Code.size(); ++I) {
if (End == I && I > 0) {
OS << "</span>";
End = 0;
}
std::string TargetList;
Rest = Rest.drop_while([&](auto &R) {
if (R.first != I)
return false;
if (!TargetList.empty())
TargetList.push_back(',');
TargetList.push_back('t');
TargetList.append(std::to_string(R.second));
return true;
});
if (!TargetList.empty()) {
assert(End == 0 && "Overlapping tokens!");
OS << "<span class='ref sel' data-hover='" << TargetList << "'>";
End = I + Lexer::MeasureTokenLength(SM.getComposedLoc(File, I), SM,
Ctx.getLangOpts());
}
escapeChar(Code[I]);
}
OS << "</pre>\n";
}
};
} // namespace
void writeHTMLReport(FileID File, llvm::ArrayRef<Decl *> Roots, ASTContext &Ctx,
llvm::raw_ostream &OS) {
Reporter R(OS, Ctx, File);
for (Decl *Root : Roots)
walkAST(*Root,
[&](SourceLocation Loc, const NamedDecl &D) { R.addRef(Loc, D); });
R.write();
}
} // namespace clang::include_cleaner

View File

@ -0,0 +1,39 @@
//===--- Record.cpp - Record compiler events ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang-include-cleaner/Record.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/DeclGroup.h"
#include "clang/Basic/SourceManager.h"
namespace clang::include_cleaner {
std::unique_ptr<ASTConsumer> RecordedAST::record() {
class Recorder : public ASTConsumer {
RecordedAST *Out;
public:
Recorder(RecordedAST *Out) : Out(Out) {}
void Initialize(ASTContext &Ctx) override { Out->Ctx = &Ctx; }
bool HandleTopLevelDecl(DeclGroupRef DG) override {
const auto &SM = Out->Ctx->getSourceManager();
for (Decl *D : DG) {
if (!SM.isWrittenInMainFile(SM.getExpansionLoc(D->getLocation())))
continue;
// FIXME: Filter out certain Obj-C and template-related decls.
Out->Roots.push_back(D);
}
return ASTConsumer::HandleTopLevelDecl(DG);
}
};
return std::make_unique<Recorder>(this);
}
} // namespace clang::include_cleaner

View File

@ -0,0 +1,6 @@
#ifndef BAR_H
#define BAR_H
int bar();
#endif

View File

@ -0,0 +1,6 @@
#ifndef FOO_H
#define FOO_H
int foo();
#endif

View File

@ -0,0 +1,6 @@
// RUN: clang-include-cleaner -html=- %s -- -I %S/Inputs | FileCheck %s
#include "bar.h"
#include "foo.h"
int n = foo();
// CHECK: <span class='ref sel' data-hover='t{{[0-9]+}}'>foo</span>()

View File

@ -0,0 +1,12 @@
set(LLVM_LINK_COMPONENTS Support)
include_directories("../lib") # FIXME: use public APIs instead.
add_clang_tool(clang-include-cleaner IncludeCleaner.cpp)
clang_target_link_libraries(clang-include-cleaner PRIVATE
clangBasic
clangTooling
)
target_link_libraries(clang-include-cleaner PRIVATE
clangIncludeCleaner
)

View File

@ -0,0 +1,100 @@
//===--- IncludeCleaner.cpp - standalone tool for include analysis --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AnalysisInternal.h"
#include "clang-include-cleaner/Record.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
namespace clang {
namespace include_cleaner {
namespace {
namespace cl = llvm::cl;
llvm::StringRef Overview = llvm::StringLiteral(R"(
clang-include-cleaner analyzes the #include directives in source code.
It suggests removing headers that the code is not using.
It suggests inserting headers that the code relies on, but does not include.
These changes make the file more self-contained and (at scale) make the codebase
easier to reason about and modify.
The tool operates on *working* source code. This means it can suggest including
headers that are only indirectly included, but cannot suggest those that are
missing entirely. (clang-include-fixer can do this).
)")
.trim();
cl::OptionCategory IncludeCleaner("clang-include-cleaner");
cl::opt<std::string> HTMLReportPath{
"html",
cl::desc("Specify an output filename for an HTML report. "
"This describes both recommendations and reasons for changes."),
cl::cat(IncludeCleaner),
};
class HTMLReportAction : public clang::ASTFrontendAction {
RecordedAST AST;
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
StringRef File) override {
return AST.record();
}
void EndSourceFile() override {
std::error_code EC;
llvm::raw_fd_ostream OS(HTMLReportPath, EC);
if (EC) {
llvm::errs() << "Unable to write HTML report to " << HTMLReportPath
<< ": " << EC.message() << "\n";
exit(1);
}
writeHTMLReport(AST.Ctx->getSourceManager().getMainFileID(), AST.Roots,
*AST.Ctx, OS);
}
};
} // namespace
} // namespace include_cleaner
} // namespace clang
int main(int argc, const char **argv) {
using namespace clang::include_cleaner;
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
auto OptionsParser =
clang::tooling::CommonOptionsParser::create(argc, argv, IncludeCleaner);
if (!OptionsParser) {
llvm::errs() << toString(OptionsParser.takeError());
return 1;
}
std::unique_ptr<clang::tooling::FrontendActionFactory> Factory;
if (HTMLReportPath.getNumOccurrences()) {
if (OptionsParser->getSourcePathList().size() != 1) {
llvm::errs() << "-" << HTMLReportPath.ArgStr
<< " requires a single input file";
return 1;
}
Factory = clang::tooling::newFrontendActionFactory<HTMLReportAction>();
} else {
llvm::errs() << "Unimplemented\n";
return 1;
}
return clang::tooling::ClangTool(OptionsParser->getCompilations(),
OptionsParser->getSourcePathList())
.run(Factory.get());
}

View File

@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS
add_custom_target(ClangIncludeCleanerUnitTests)
add_unittest(ClangIncludeCleanerUnitTests ClangIncludeCleanerTests
RecordTest.cpp
WalkASTTest.cpp
)

View File

@ -0,0 +1,84 @@
#include "clang-include-cleaner/Record.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Testing/TestAST.h"
#include "llvm/Support/raw_ostream.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace clang::include_cleaner {
namespace {
// Matches a Decl* if it is a NamedDecl with the given name.
MATCHER_P(Named, N, "") {
if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(arg)) {
if (N == ND->getNameAsString())
return true;
}
std::string S;
llvm::raw_string_ostream OS(S);
arg->dump(OS);
*result_listener << S;
return false;
}
class RecordASTTest : public ::testing::Test {
protected:
TestInputs Inputs;
RecordedAST Recorded;
RecordASTTest() {
struct RecordAction : public ASTFrontendAction {
RecordedAST &Out;
RecordAction(RecordedAST &Out) : Out(Out) {}
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
StringRef) override {
return Out.record();
}
};
Inputs.MakeAction = [this] {
return std::make_unique<RecordAction>(Recorded);
};
}
TestAST build() { return TestAST(Inputs); }
};
// Top-level decl from the main file is a root, nested ones aren't.
TEST_F(RecordASTTest, Namespace) {
Inputs.Code =
R"cpp(
namespace ns {
int x;
namespace {
int y;
}
}
)cpp";
auto AST = build();
EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("ns")));
}
// Decl in included file is not a root.
TEST_F(RecordASTTest, Inclusion) {
Inputs.ExtraFiles["header.h"] = "void headerFunc();";
Inputs.Code = R"cpp(
#include "header.h"
void mainFunc();
)cpp";
auto AST = build();
EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("mainFunc")));
}
// Decl from macro expanded into the main file is a root.
TEST_F(RecordASTTest, Macros) {
Inputs.ExtraFiles["header.h"] = "#define X void x();";
Inputs.Code = R"cpp(
#include "header.h"
X
)cpp";
auto AST = build();
EXPECT_THAT(Recorded.Roots, testing::ElementsAre(Named("x")));
}
} // namespace
} // namespace clang::include_cleaner

View File

@ -53,6 +53,10 @@ struct TestInputs {
/// To suppress this, set ErrorOK or include "error-ok" in a comment in Code.
/// In either case, all diagnostics appear in TestAST::diagnostics().
bool ErrorOK = false;
/// The action used to parse the code.
/// By default, a SyntaxOnlyAction is used.
std::function<std::unique_ptr<FrontendAction>()> MakeAction;
};
/// The result of parsing a file specified by TestInputs.
@ -78,6 +82,7 @@ public:
SourceManager &sourceManager() { return Clang->getSourceManager(); }
FileManager &fileManager() { return Clang->getFileManager(); }
Preprocessor &preprocessor() { return Clang->getPreprocessor(); }
FrontendAction &action() { return *Action; }
/// Returns diagnostics emitted during parsing.
/// (By default, errors cause test failures, see TestInputs::ErrorOK).

View File

@ -114,7 +114,8 @@ TestAST::TestAST(const TestInputs &In) {
// Running the FrontendAction creates the other components: SourceManager,
// Preprocessor, ASTContext, Sema. Preprocessor needs TargetInfo to be set.
EXPECT_TRUE(Clang->createTarget());
Action = std::make_unique<SyntaxOnlyAction>();
Action =
In.MakeAction ? In.MakeAction() : std::make_unique<SyntaxOnlyAction>();
const FrontendInputFile &Main = Clang->getFrontendOpts().Inputs.front();
if (!Action->BeginSourceFile(*Clang, Main)) {
ADD_FAILURE() << "Failed to BeginSourceFile()";