llvm-project/llvm/lib/Support/TimeProfiler.cpp
Russell Gallop 8ddcd1dc26 [Support] Extend TimeProfiler to support multiple threads
This makes TimeTraceProfilerInstance thread local. Added
timeTraceProfilerFinishThread() which moves the thread local instance to
a global vector of instances. timeTraceProfilerWrite() then writes
recorded data from all instances.

Threads are identified based on their thread ids. Totals are reported
with artificial thread ids higher than the real ones.

Replaced raw pointer for TimeTraceProfilerInstance with unique_ptr.

Differential Revision: https://reviews.llvm.org/D71059
2019-12-12 12:01:44 +00:00

290 lines
9.8 KiB
C++

//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements hierarchical time profiler.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/TimeProfiler.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Threading.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <string>
#include <vector>
using namespace std::chrono;
namespace {
std::mutex Mu;
std::vector<std::unique_ptr<llvm::TimeTraceProfiler>>
ThreadTimeTraceProfilerInstances; // guarded by Mu
} // namespace
namespace llvm {
thread_local std::unique_ptr<TimeTraceProfiler> TimeTraceProfilerInstance =
nullptr;
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
typedef time_point<steady_clock> TimePointType;
typedef std::pair<size_t, DurationType> CountAndDurationType;
typedef std::pair<std::string, CountAndDurationType>
NameAndCountAndDurationType;
struct Entry {
const TimePointType Start;
TimePointType End;
const std::string Name;
const std::string Detail;
Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
: Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
Detail(std::move(Dt)) {}
// Calculate timings for FlameGraph. Cast time points to microsecond precision
// rather than casting duration. This avoid truncation issues causing inner
// scopes overruning outer scopes.
steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
return (time_point_cast<microseconds>(Start) -
time_point_cast<microseconds>(StartTime))
.count();
}
steady_clock::rep getFlameGraphDurUs() const {
return (time_point_cast<microseconds>(End) -
time_point_cast<microseconds>(Start))
.count();
}
};
struct TimeTraceProfiler {
TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
: StartTime(steady_clock::now()), ProcName(ProcName),
Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
Detail());
}
void end() {
assert(!Stack.empty() && "Must call begin() first");
auto &E = Stack.back();
E.End = steady_clock::now();
// Check that end times monotonically increase.
assert((Entries.empty() ||
(E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
Entries.back().getFlameGraphStartUs(StartTime) +
Entries.back().getFlameGraphDurUs())) &&
"TimeProfiler scope ended earlier than previous scope");
// Calculate duration at full precision for overall counts.
DurationType Duration = E.End - E.Start;
// Only include sections longer or equal to TimeTraceGranularity msec.
if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
Entries.emplace_back(E);
// Track total time taken by each "name", but only the topmost levels of
// them; e.g. if there's a template instantiation that instantiates other
// templates from within, we only want to add the topmost one. "topmost"
// happens to be the ones that don't have any currently open entries above
// itself.
if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
return Val.Name == E.Name;
}) == Stack.rend()) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
CountAndTotal.second += Duration;
}
Stack.pop_back();
}
// Write events from this TimeTraceProfilerInstance and
// ThreadTimeTraceProfilerInstances.
void Write(raw_pwrite_stream &OS) {
// Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
std::lock_guard<std::mutex> Lock(Mu);
assert(Stack.empty() &&
"All profiler sections should be ended when calling Write");
assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
ThreadTimeTraceProfilerInstances.end(),
[](const auto &TTP) { return TTP->Stack.empty(); }) &&
"All profiler sections should be ended when calling Write");
json::OStream J(OS);
J.objectBegin();
J.attributeBegin("traceEvents");
J.arrayBegin();
// Emit all events for the main flame graph.
auto writeEvent = [&](auto &E, uint64_t Tid) {
auto StartUs = E.getFlameGraphStartUs(StartTime);
auto DurUs = E.getFlameGraphDurUs();
J.object([&]{
J.attribute("pid", 1);
J.attribute("tid", int64_t(Tid));
J.attribute("ph", "X");
J.attribute("ts", StartUs);
J.attribute("dur", DurUs);
J.attribute("name", E.Name);
if (!E.Detail.empty()) {
J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
}
});
};
for (const auto &E : Entries) {
writeEvent(E, this->Tid);
}
for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
for (const auto &E : TTP->Entries) {
writeEvent(E, TTP->Tid);
}
}
// Emit totals by section name as additional "thread" events, sorted from
// longest one.
// Find highest used thread id.
uint64_t MaxTid = this->Tid;
for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
MaxTid = std::max(MaxTid, TTP->Tid);
}
// Combine all CountAndTotalPerName from threads into one.
StringMap<CountAndDurationType> AllCountAndTotalPerName;
auto combineStat = [&](auto &Stat) {
std::string Key = Stat.getKey();
auto Value = Stat.getValue();
auto &CountAndTotal = AllCountAndTotalPerName[Key];
CountAndTotal.first += Value.first;
CountAndTotal.second += Value.second;
};
for (const auto &Stat : CountAndTotalPerName) {
combineStat(Stat);
}
for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
for (const auto &Stat : TTP->CountAndTotalPerName) {
combineStat(Stat);
}
}
std::vector<NameAndCountAndDurationType> SortedTotals;
SortedTotals.reserve(AllCountAndTotalPerName.size());
for (const auto &Total : AllCountAndTotalPerName)
SortedTotals.emplace_back(Total.getKey(), Total.getValue());
llvm::sort(SortedTotals.begin(), SortedTotals.end(),
[](const NameAndCountAndDurationType &A,
const NameAndCountAndDurationType &B) {
return A.second.second > B.second.second;
});
// Report totals on separate threads of tracing file.
uint64_t TotalTid = MaxTid + 1;
for (const auto &Total : SortedTotals) {
auto DurUs = duration_cast<microseconds>(Total.second.second).count();
auto Count = AllCountAndTotalPerName[Total.first].first;
J.object([&]{
J.attribute("pid", 1);
J.attribute("tid", int64_t(TotalTid));
J.attribute("ph", "X");
J.attribute("ts", 0);
J.attribute("dur", DurUs);
J.attribute("name", "Total " + Total.first);
J.attributeObject("args", [&] {
J.attribute("count", int64_t(Count));
J.attribute("avg ms", int64_t(DurUs / Count / 1000));
});
});
++TotalTid;
}
// Emit metadata event with process name.
J.object([&] {
J.attribute("cat", "");
J.attribute("pid", 1);
J.attribute("tid", 0);
J.attribute("ts", 0);
J.attribute("ph", "M");
J.attribute("name", "process_name");
J.attributeObject("args", [&] { J.attribute("name", ProcName); });
});
J.arrayEnd();
J.attributeEnd();
J.objectEnd();
}
SmallVector<Entry, 16> Stack;
SmallVector<Entry, 128> Entries;
StringMap<CountAndDurationType> CountAndTotalPerName;
const TimePointType StartTime;
const std::string ProcName;
const uint64_t Tid;
// Minimum time granularity (in microseconds)
const unsigned TimeTraceGranularity;
};
void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
StringRef ProcName) {
assert(TimeTraceProfilerInstance == nullptr &&
"Profiler should not be initialized");
TimeTraceProfilerInstance = std::make_unique<TimeTraceProfiler>(
TimeTraceGranularity, llvm::sys::path::filename(ProcName));
}
// Removes all TimeTraceProfilerInstances.
void timeTraceProfilerCleanup() {
TimeTraceProfilerInstance.reset();
std::lock_guard<std::mutex> Lock(Mu);
ThreadTimeTraceProfilerInstances.clear();
}
// Finish TimeTraceProfilerInstance on a worker thread.
// This doesn't remove the instance, just moves the pointer to global vector.
void timeTraceProfilerFinishThread() {
std::lock_guard<std::mutex> Lock(Mu);
ThreadTimeTraceProfilerInstances.push_back(
std::move(TimeTraceProfilerInstance));
}
void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
assert(TimeTraceProfilerInstance != nullptr &&
"Profiler object can't be null");
TimeTraceProfilerInstance->Write(OS);
}
void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
}
void timeTraceProfilerBegin(StringRef Name,
llvm::function_ref<std::string()> Detail) {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->begin(Name, Detail);
}
void timeTraceProfilerEnd() {
if (TimeTraceProfilerInstance != nullptr)
TimeTraceProfilerInstance->end();
}
} // namespace llvm