
Only using that change in StringRef already decreases the number of preoprocessed lines from 7837621 to 7776151 for LLVMSupport Perhaps more interestingly, it shows that many files were relying on the inclusion of StringRef.h to have the declaration from STLExtras.h. This patch tries hard to patch relevant part of llvm-project impacted by this hidden dependency removal. Potential impact: - "llvm/ADT/StringRef.h" no longer includes <memory>, "llvm/ADT/Optional.h" nor "llvm/ADT/STLExtras.h" Related Discourse thread: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
331 lines
12 KiB
C++
331 lines
12 KiB
C++
//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements hierarchical time profiler.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/TimeProfiler.h"
|
|
#include "llvm/ADT/STLFunctionalExtras.h"
|
|
#include "llvm/ADT/StringMap.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Process.h"
|
|
#include "llvm/Support/Threading.h"
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <chrono>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
using namespace std::chrono;
|
|
using namespace llvm;
|
|
|
|
static std::mutex Mu;
|
|
// List of all instances
|
|
static ManagedStatic<std::vector<TimeTraceProfiler *>>
|
|
ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
|
|
// Per Thread instance
|
|
static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
|
|
|
|
TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
|
|
return TimeTraceProfilerInstance;
|
|
}
|
|
|
|
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
|
|
typedef time_point<steady_clock> TimePointType;
|
|
typedef std::pair<size_t, DurationType> CountAndDurationType;
|
|
typedef std::pair<std::string, CountAndDurationType>
|
|
NameAndCountAndDurationType;
|
|
|
|
namespace {
|
|
struct Entry {
|
|
const TimePointType Start;
|
|
TimePointType End;
|
|
const std::string Name;
|
|
const std::string Detail;
|
|
|
|
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
|
|
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
|
|
Detail(std::move(Dt)) {}
|
|
|
|
// Calculate timings for FlameGraph. Cast time points to microsecond precision
|
|
// rather than casting duration. This avoid truncation issues causing inner
|
|
// scopes overruning outer scopes.
|
|
steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
|
|
return (time_point_cast<microseconds>(Start) -
|
|
time_point_cast<microseconds>(StartTime))
|
|
.count();
|
|
}
|
|
|
|
steady_clock::rep getFlameGraphDurUs() const {
|
|
return (time_point_cast<microseconds>(End) -
|
|
time_point_cast<microseconds>(Start))
|
|
.count();
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
struct llvm::TimeTraceProfiler {
|
|
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
|
|
: BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
|
|
ProcName(ProcName), Pid(sys::Process::getProcessId()),
|
|
Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
|
|
llvm::get_thread_name(ThreadName);
|
|
}
|
|
|
|
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
|
|
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
|
|
Detail());
|
|
}
|
|
|
|
void end() {
|
|
assert(!Stack.empty() && "Must call begin() first");
|
|
Entry &E = Stack.back();
|
|
E.End = steady_clock::now();
|
|
|
|
// Check that end times monotonically increase.
|
|
assert((Entries.empty() ||
|
|
(E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
|
|
Entries.back().getFlameGraphStartUs(StartTime) +
|
|
Entries.back().getFlameGraphDurUs())) &&
|
|
"TimeProfiler scope ended earlier than previous scope");
|
|
|
|
// Calculate duration at full precision for overall counts.
|
|
DurationType Duration = E.End - E.Start;
|
|
|
|
// Only include sections longer or equal to TimeTraceGranularity msec.
|
|
if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
|
|
Entries.emplace_back(E);
|
|
|
|
// Track total time taken by each "name", but only the topmost levels of
|
|
// them; e.g. if there's a template instantiation that instantiates other
|
|
// templates from within, we only want to add the topmost one. "topmost"
|
|
// happens to be the ones that don't have any currently open entries above
|
|
// itself.
|
|
if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
|
|
[&](const Entry &Val) { return Val.Name == E.Name; })) {
|
|
auto &CountAndTotal = CountAndTotalPerName[E.Name];
|
|
CountAndTotal.first++;
|
|
CountAndTotal.second += Duration;
|
|
}
|
|
|
|
Stack.pop_back();
|
|
}
|
|
|
|
// Write events from this TimeTraceProfilerInstance and
|
|
// ThreadTimeTraceProfilerInstances.
|
|
void write(raw_pwrite_stream &OS) {
|
|
// Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
assert(Stack.empty() &&
|
|
"All profiler sections should be ended when calling write");
|
|
assert(llvm::all_of(*ThreadTimeTraceProfilerInstances,
|
|
[](const auto &TTP) { return TTP->Stack.empty(); }) &&
|
|
"All profiler sections should be ended when calling write");
|
|
|
|
json::OStream J(OS);
|
|
J.objectBegin();
|
|
J.attributeBegin("traceEvents");
|
|
J.arrayBegin();
|
|
|
|
// Emit all events for the main flame graph.
|
|
auto writeEvent = [&](const auto &E, uint64_t Tid) {
|
|
auto StartUs = E.getFlameGraphStartUs(StartTime);
|
|
auto DurUs = E.getFlameGraphDurUs();
|
|
|
|
J.object([&] {
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(Tid));
|
|
J.attribute("ph", "X");
|
|
J.attribute("ts", StartUs);
|
|
J.attribute("dur", DurUs);
|
|
J.attribute("name", E.Name);
|
|
if (!E.Detail.empty()) {
|
|
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
|
|
}
|
|
});
|
|
};
|
|
for (const Entry &E : Entries)
|
|
writeEvent(E, this->Tid);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
for (const Entry &E : TTP->Entries)
|
|
writeEvent(E, TTP->Tid);
|
|
|
|
// Emit totals by section name as additional "thread" events, sorted from
|
|
// longest one.
|
|
// Find highest used thread id.
|
|
uint64_t MaxTid = this->Tid;
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
MaxTid = std::max(MaxTid, TTP->Tid);
|
|
|
|
// Combine all CountAndTotalPerName from threads into one.
|
|
StringMap<CountAndDurationType> AllCountAndTotalPerName;
|
|
auto combineStat = [&](const auto &Stat) {
|
|
StringRef Key = Stat.getKey();
|
|
auto Value = Stat.getValue();
|
|
auto &CountAndTotal = AllCountAndTotalPerName[Key];
|
|
CountAndTotal.first += Value.first;
|
|
CountAndTotal.second += Value.second;
|
|
};
|
|
for (const auto &Stat : CountAndTotalPerName)
|
|
combineStat(Stat);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
for (const auto &Stat : TTP->CountAndTotalPerName)
|
|
combineStat(Stat);
|
|
|
|
std::vector<NameAndCountAndDurationType> SortedTotals;
|
|
SortedTotals.reserve(AllCountAndTotalPerName.size());
|
|
for (const auto &Total : AllCountAndTotalPerName)
|
|
SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
|
|
|
|
llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
|
|
const NameAndCountAndDurationType &B) {
|
|
return A.second.second > B.second.second;
|
|
});
|
|
|
|
// Report totals on separate threads of tracing file.
|
|
uint64_t TotalTid = MaxTid + 1;
|
|
for (const NameAndCountAndDurationType &Total : SortedTotals) {
|
|
auto DurUs = duration_cast<microseconds>(Total.second.second).count();
|
|
auto Count = AllCountAndTotalPerName[Total.first].first;
|
|
|
|
J.object([&] {
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(TotalTid));
|
|
J.attribute("ph", "X");
|
|
J.attribute("ts", 0);
|
|
J.attribute("dur", DurUs);
|
|
J.attribute("name", "Total " + Total.first);
|
|
J.attributeObject("args", [&] {
|
|
J.attribute("count", int64_t(Count));
|
|
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
|
|
});
|
|
});
|
|
|
|
++TotalTid;
|
|
}
|
|
|
|
auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
|
|
StringRef arg) {
|
|
J.object([&] {
|
|
J.attribute("cat", "");
|
|
J.attribute("pid", Pid);
|
|
J.attribute("tid", int64_t(Tid));
|
|
J.attribute("ts", 0);
|
|
J.attribute("ph", "M");
|
|
J.attribute("name", Name);
|
|
J.attributeObject("args", [&] { J.attribute("name", arg); });
|
|
});
|
|
};
|
|
|
|
writeMetadataEvent("process_name", Tid, ProcName);
|
|
writeMetadataEvent("thread_name", Tid, ThreadName);
|
|
for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
|
|
writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
|
|
|
|
J.arrayEnd();
|
|
J.attributeEnd();
|
|
|
|
// Emit the absolute time when this TimeProfiler started.
|
|
// This can be used to combine the profiling data from
|
|
// multiple processes and preserve actual time intervals.
|
|
J.attribute("beginningOfTime",
|
|
time_point_cast<microseconds>(BeginningOfTime)
|
|
.time_since_epoch()
|
|
.count());
|
|
|
|
J.objectEnd();
|
|
}
|
|
|
|
SmallVector<Entry, 16> Stack;
|
|
SmallVector<Entry, 128> Entries;
|
|
StringMap<CountAndDurationType> CountAndTotalPerName;
|
|
const time_point<system_clock> BeginningOfTime;
|
|
const TimePointType StartTime;
|
|
const std::string ProcName;
|
|
const sys::Process::Pid Pid;
|
|
SmallString<0> ThreadName;
|
|
const uint64_t Tid;
|
|
|
|
// Minimum time granularity (in microseconds)
|
|
const unsigned TimeTraceGranularity;
|
|
};
|
|
|
|
void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
|
|
StringRef ProcName) {
|
|
assert(TimeTraceProfilerInstance == nullptr &&
|
|
"Profiler should not be initialized");
|
|
TimeTraceProfilerInstance = new TimeTraceProfiler(
|
|
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
|
|
}
|
|
|
|
// Removes all TimeTraceProfilerInstances.
|
|
// Called from main thread.
|
|
void llvm::timeTraceProfilerCleanup() {
|
|
delete TimeTraceProfilerInstance;
|
|
TimeTraceProfilerInstance = nullptr;
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
for (auto *TTP : *ThreadTimeTraceProfilerInstances)
|
|
delete TTP;
|
|
ThreadTimeTraceProfilerInstances->clear();
|
|
}
|
|
|
|
// Finish TimeTraceProfilerInstance on a worker thread.
|
|
// This doesn't remove the instance, just moves the pointer to global vector.
|
|
void llvm::timeTraceProfilerFinishThread() {
|
|
std::lock_guard<std::mutex> Lock(Mu);
|
|
ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance);
|
|
TimeTraceProfilerInstance = nullptr;
|
|
}
|
|
|
|
void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
|
|
assert(TimeTraceProfilerInstance != nullptr &&
|
|
"Profiler object can't be null");
|
|
TimeTraceProfilerInstance->write(OS);
|
|
}
|
|
|
|
Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
|
|
StringRef FallbackFileName) {
|
|
assert(TimeTraceProfilerInstance != nullptr &&
|
|
"Profiler object can't be null");
|
|
|
|
std::string Path = PreferredFileName.str();
|
|
if (Path.empty()) {
|
|
Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
|
|
Path += ".time-trace";
|
|
}
|
|
|
|
std::error_code EC;
|
|
raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
|
|
if (EC)
|
|
return createStringError(EC, "Could not open " + Path);
|
|
|
|
timeTraceProfilerWrite(OS);
|
|
return Error::success();
|
|
}
|
|
|
|
void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->begin(std::string(Name),
|
|
[&]() { return std::string(Detail); });
|
|
}
|
|
|
|
void llvm::timeTraceProfilerBegin(StringRef Name,
|
|
llvm::function_ref<std::string()> Detail) {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->begin(std::string(Name), Detail);
|
|
}
|
|
|
|
void llvm::timeTraceProfilerEnd() {
|
|
if (TimeTraceProfilerInstance != nullptr)
|
|
TimeTraceProfilerInstance->end();
|
|
}
|