
The cleanup was manual, but assisted by "include-what-you-use". It consists in 1. Removing unused forward declaration. No impact expected. 2. Removing unused headers in .cpp files. No impact expected. 3. Removing unused headers in .h files. This removes implicit dependencies and is generally considered a good thing, but this may break downstream builds. I've updated llvm, clang, lld, lldb and mlir deps, and included a list of the modification in the second part of the commit. 4. Replacing header inclusion by forward declaration. This has the same impact as 3. Notable changes: - llvm/Support/TargetParser.h no longer includes llvm/Support/AArch64TargetParser.h nor llvm/Support/ARMTargetParser.h - llvm/Support/TypeSize.h no longer includes llvm/Support/WithColor.h - llvm/Support/YAMLTraits.h no longer includes llvm/Support/Regex.h - llvm/ADT/SmallVector.h no longer includes llvm/Support/MemAlloc.h nor llvm/Support/ErrorHandling.h You may need to add some of these headers in your compilation units, if needs be. As an hint to the impact of the cleanup, running clang++ -E -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l before: 8000919 lines after: 7917500 lines Reduced dependencies also helps incremental rebuilds and is more ccache friendly, something not shown by the above metric :-) Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
331 lines
12 KiB
C++
331 lines
12 KiB
C++
//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements hierarchical time profiler.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/TimeProfiler.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Process.h"
|
|
#include "llvm/Support/Threading.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <chrono>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
using namespace std::chrono;
|
|
using namespace llvm;
|
|
|
|
static std::mutex Mu;
|
|
// List of all instances
|
|
static ManagedStatic<std::vector<TimeTraceProfiler *>>
|
|
ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
|
|
// Per Thread instance
|
|
static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
|
|
|
|
TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
|
|
return TimeTraceProfilerInstance;
|
|
}
|
|
|
|
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
|
|
typedef time_point<steady_clock> TimePointType;
|
|
typedef std::pair<size_t, DurationType> CountAndDurationType;
|
|
typedef std::pair<std::string, CountAndDurationType>
|
|
NameAndCountAndDurationType;
|
|
|
|
namespace {
|
|
struct Entry {
|
|
const TimePointType Start;
|
|
TimePointType End;
|
|
const std::string Name;
|
|
const std::string Detail;
|
|
|
|
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
|
|
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
|
|
Detail(std::move(Dt)) {}
|
|
|
|
// Calculate timings for FlameGraph. Cast time points to microsecond precision
|
|
// rather than casting duration. This avoid truncation issues causing inner
|
|
// scopes overruning outer scopes.
|
|
steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
|
|
return (time_point_cast<microseconds>(Start) -
|
|
time_point_cast<microseconds>(StartTime))
|
|
.count();
|
|
}
|
|
|
|
steady_clock::rep getFlameGraphDurUs() const {
|
|
return (time_point_cast<microseconds>(End) -
|
|
time_point_cast<microseconds>(Start))
|
|
.count();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
struct llvm::TimeTraceProfiler {
|
|
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
|
|
: BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
|
|
ProcName(ProcName), Pid(sys::Process::getProcessId()),
|
|
Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
|
|
llvm::get_thread_name(ThreadName);
|
|
}
|
|
|
|
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
|
|
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
|
|
Detail());
|
|
}
|
|
|
|
void end() {
|
|
assert(!Stack.empty() && "Must call begin() first");
|
|
Entry &E = Stack.back();
|
|
E.End = steady_clock::now();
|
|
|
|
// Check that end times monotonically increase.
|
|
assert((Entries.empty() ||
|
|
(E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
|
|
Entries.back().getFlameGraphStartUs(StartTime) +
|
|
Entries.back().getFlameGraphDurUs())) &&
|
|
"TimeProfiler scope ended earlier than previous scope");
|
|
|
|
// Calculate duration at full precision for overall counts.
|
|
DurationType Duration = E.End - E.Start;
|
|
|
|
// Only include sections longer or equal to TimeTraceGranularity msec.
|
|
if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
|
|
Entries.emplace_back(E);
|
|
|
|
// Track total time taken by each "name", but only the topmost levels of
|
|
// them; e.g. if there's a template instantiation that instantiates other
|
|
// templates from within, we only want to add the topmost one. "topmost"
|
|
// happens to be the ones that don't have any currently open entries above
|
|
// itself.
|
|
if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
|
|
[&](const Entry &Val) { return Val.Name == E.Name; })) {
|
|
auto &CountAndTotal = CountAndTotalPerName[E.Name];
|
|
CountAndTotal.first++;
|
|
CountAndTotal.second += Duration;
|
|
}
|
|
|
|
Stack.pop_back();
|
|
}
|
|
|
|
// Write events from this TimeTraceProfilerInstance and
|
|
// ThreadTimeTraceProfilerInstances.
|
|
void write(raw_pwrite_stream &OS) {
|
|
// Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
assert(Stack.empty() &&
|
|
"All profiler sections should be ended when calling write");
|
|
assert(llvm::all_of(*ThreadTimeTraceProfilerInstances,
|
|
[](const auto &TTP) { return TTP->Stack.empty(); }) &&
|
|
"All profiler sections should be ended when calling write");
|
|
|
|
json::OStream J(OS);
|
|
J.objectBegin();
|
|
J.attributeBegin("traceEvents");
|
|
J.arrayBegin();
|
|
|
|
// Emit all events for the main flame graph.
|
|
auto writeEvent = [&](const auto &E, uint64_t Tid) {
|
|
auto StartUs = E.getFlameGraphStartUs(StartTime);
|
|
auto DurUs = E.getFlameGraphDurUs();
|
|
|
|
J.object([&] {
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(Tid));
|
|
J.attribute("ph", "X");
|
|
J.attribute("ts", StartUs);
|
|
J.attribute("dur", DurUs);
|
|
J.attribute("name", E.Name);
|
|
if (!E.Detail.empty()) {
|
|
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
|
|
}
|
|
});
|
|
};
|
|
for (const Entry &E : Entries)
|
|
writeEvent(E, this->Tid);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
for (const Entry &E : TTP->Entries)
|
|
writeEvent(E, TTP->Tid);
|
|
|
|
// Emit totals by section name as additional "thread" events, sorted from
|
|
// longest one.
|
|
// Find highest used thread id.
|
|
uint64_t MaxTid = this->Tid;
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
MaxTid = std::max(MaxTid, TTP->Tid);
|
|
|
|
// Combine all CountAndTotalPerName from threads into one.
|
|
StringMap<CountAndDurationType> AllCountAndTotalPerName;
|
|
auto combineStat = [&](const auto &Stat) {
|
|
StringRef Key = Stat.getKey();
|
|
auto Value = Stat.getValue();
|
|
auto &CountAndTotal = AllCountAndTotalPerName[Key];
|
|
CountAndTotal.first += Value.first;
|
|
CountAndTotal.second += Value.second;
|
|
};
|
|
for (const auto &Stat : CountAndTotalPerName)
|
|
combineStat(Stat);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
for (const auto &Stat : TTP->CountAndTotalPerName)
|
|
combineStat(Stat);
|
|
|
|
std::vector<NameAndCountAndDurationType> SortedTotals;
|
|
SortedTotals.reserve(AllCountAndTotalPerName.size());
|
|
for (const auto &Total : AllCountAndTotalPerName)
|
|
SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
|
|
|
|
llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
|
|
const NameAndCountAndDurationType &B) {
|
|
return A.second.second > B.second.second;
|
|
});
|
|
|
|
// Report totals on separate threads of tracing file.
|
|
uint64_t TotalTid = MaxTid + 1;
|
|
for (const NameAndCountAndDurationType &Total : SortedTotals) {
|
|
auto DurUs = duration_cast<microseconds>(Total.second.second).count();
|
|
auto Count = AllCountAndTotalPerName[Total.first].first;
|
|
|
|
J.object([&] {
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(TotalTid));
|
|
J.attribute("ph", "X");
|
|
J.attribute("ts", 0);
|
|
J.attribute("dur", DurUs);
|
|
J.attribute("name", "Total " + Total.first);
|
|
J.attributeObject("args", [&] {
|
|
J.attribute("count", int64_t(Count));
|
|
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
|
|
});
|
|
});
|
|
|
|
++TotalTid;
|
|
}
|
|
|
|
auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
|
|
StringRef arg) {
|
|
J.object([&] {
|
|
J.attribute("cat", "");
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(Tid));
|
|
J.attribute("ts", 0);
|
|
J.attribute("ph", "M");
|
|
J.attribute("name", Name);
|
|
J.attributeObject("args", [&] { J.attribute("name", arg); });
|
|
});
|
|
};
|
|
|
|
writeMetadataEvent("process_name", Tid, ProcName);
|
|
writeMetadataEvent("thread_name", Tid, ThreadName);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
|
|
|
|
J.arrayEnd();
|
|
J.attributeEnd();
|
|
|
|
// Emit the absolute time when this TimeProfiler started.
|
|
// This can be used to combine the profiling data from
|
|
// multiple processes and preserve actual time intervals.
|
|
J.attribute("beginningOfTime",
|
|
time_point_cast<microseconds>(BeginningOfTime)
|
|
.time_since_epoch()
|
|
.count());
|
|
|
|
J.objectEnd();
|
|
}
|
|
|
|
SmallVector<Entry, 16> Stack;
|
|
SmallVector<Entry, 128> Entries;
|
|
StringMap<CountAndDurationType> CountAndTotalPerName;
|
|
const time_point<system_clock> BeginningOfTime;
|
|
const TimePointType StartTime;
|
|
const std::string ProcName;
|
|
const sys::Process::Pid Pid;
|
|
SmallString<0> ThreadName;
|
|
const uint64_t Tid;
|
|
|
|
// Minimum time granularity (in microseconds)
|
|
const unsigned TimeTraceGranularity;
|
|
};
|
|
|
|
void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
|
|
StringRef ProcName) {
|
|
assert(TimeTraceProfilerInstance == nullptr &&
|
|
"Profiler should not be initialized");
|
|
TimeTraceProfilerInstance = new TimeTraceProfiler(
|
|
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
|
|
}
|
|
|
|
// Removes all TimeTraceProfilerInstances.
|
|
// Called from main thread.
|
|
void llvm::timeTraceProfilerCleanup() {
|
|
delete TimeTraceProfilerInstance;
|
|
TimeTraceProfilerInstance = nullptr;
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
for (auto *TTP : *ThreadTimeTraceProfilerInstances)
|
|
delete TTP;
|
|
ThreadTimeTraceProfilerInstances->clear();
|
|
}
|
|
|
|
// Finish TimeTraceProfilerInstance on a worker thread.
|
|
// This doesn't remove the instance, just moves the pointer to global vector.
|
|
void llvm::timeTraceProfilerFinishThread() {
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance);
|
|
TimeTraceProfilerInstance = nullptr;
|
|
}
|
|
|
|
void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
|
|
assert(TimeTraceProfilerInstance != nullptr &&
|
|
"Profiler object can't be null");
|
|
TimeTraceProfilerInstance->write(OS);
|
|
}
|
|
|
|
Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
|
|
StringRef FallbackFileName) {
|
|
assert(TimeTraceProfilerInstance != nullptr &&
|
|
"Profiler object can't be null");
|
|
|
|
std::string Path = PreferredFileName.str();
|
|
if (Path.empty()) {
|
|
Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
|
|
Path += ".time-trace";
|
|
}
|
|
|
|
std::error_code EC;
|
|
raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
|
|
if (EC)
|
|
return createStringError(EC, "Could not open " + Path);
|
|
|
|
timeTraceProfilerWrite(OS);
|
|
return Error::success();
|
|
}
|
|
|
|
void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->begin(std::string(Name),
|
|
[&]() { return std::string(Detail); });
|
|
}
|
|
|
|
void llvm::timeTraceProfilerBegin(StringRef Name,
|
|
llvm::function_ref<std::string()> Detail) {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->begin(std::string(Name), Detail);
|
|
}
|
|
|
|
void llvm::timeTraceProfilerEnd() {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->end();
|
|
}
|