
Summary: This change simplifies the XRay Allocator implementation to self-manage an mmap'ed memory segment instead of using the internal allocator implementation in sanitizer_common. We've found through benchmarks and profiling these benchmarks in D48879 that using the internal allocator in sanitizer_common introduces a bottleneck on allocating memory through a central spinlock. This change allows thread-local allocators to eliminate contention on the centralized allocator. To get the most benefit from this approach, we also use a managed allocator for the chunk elements used by the segmented array implementation. This gives us the chance to amortize the cost of allocating memory when creating these internal segmented array data structures. We also took the opportunity to remove the preallocation argument from the allocator API, simplifying the usage of the allocator throughout the profiling implementation. In this change we also tweak some of the flag values to reduce the amount of maximum memory we use/need for each thread, when requesting memory through mmap. Depends on D48956. Reviewers: kpw, eizan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D49217 llvm-svn: 337342
287 lines
8.3 KiB
C++
287 lines
8.3 KiB
C++
//===-- function_call_trie_test.cc ----------------------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of XRay, a function call tracing system.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "gtest/gtest.h"
|
|
|
|
#include "xray_function_call_trie.h"
|
|
|
|
namespace __xray {
|
|
|
|
namespace {
|
|
|
|
TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
|
|
profilingFlags()->setDefaults();
|
|
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(Allocators);
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, EnterAndExitFunction) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
|
|
Trie.enterFunction(1, 1);
|
|
Trie.exitFunction(1, 2);
|
|
|
|
// We need a way to pull the data out. At this point, until we get a data
|
|
// collection service implemented, we're going to export the data as a list of
|
|
// roots, and manually walk through the structure ourselves.
|
|
|
|
const auto &R = Trie.getRoots();
|
|
|
|
ASSERT_EQ(R.size(), 1u);
|
|
ASSERT_EQ(R.front()->FId, 1);
|
|
ASSERT_EQ(R.front()->CallCount, 1);
|
|
ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, MissingFunctionEntry) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
Trie.exitFunction(1, 1);
|
|
const auto &R = Trie.getRoots();
|
|
|
|
ASSERT_TRUE(R.empty());
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, NoMatchingEntersForExit) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
Trie.enterFunction(2, 1);
|
|
Trie.enterFunction(3, 3);
|
|
Trie.exitFunction(1, 5);
|
|
const auto &R = Trie.getRoots();
|
|
|
|
ASSERT_FALSE(R.empty());
|
|
EXPECT_EQ(R.size(), size_t{1});
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, MissingFunctionExit) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
Trie.enterFunction(1, 1);
|
|
const auto &R = Trie.getRoots();
|
|
|
|
ASSERT_FALSE(R.empty());
|
|
EXPECT_EQ(R.size(), size_t{1});
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, MultipleRoots) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
|
|
// Enter and exit FId = 1.
|
|
Trie.enterFunction(1, 1);
|
|
Trie.exitFunction(1, 2);
|
|
|
|
// Enter and exit FId = 2.
|
|
Trie.enterFunction(2, 3);
|
|
Trie.exitFunction(2, 4);
|
|
|
|
const auto &R = Trie.getRoots();
|
|
ASSERT_FALSE(R.empty());
|
|
ASSERT_EQ(R.size(), 2u);
|
|
|
|
// Make sure the roots have different IDs.
|
|
const auto R0 = R[0];
|
|
const auto R1 = R[1];
|
|
ASSERT_NE(R0->FId, R1->FId);
|
|
|
|
// Inspect the roots that they have the right data.
|
|
ASSERT_NE(R0, nullptr);
|
|
EXPECT_EQ(R0->CallCount, 1u);
|
|
EXPECT_EQ(R0->CumulativeLocalTime, 1u);
|
|
|
|
ASSERT_NE(R1, nullptr);
|
|
EXPECT_EQ(R1->CallCount, 1u);
|
|
EXPECT_EQ(R1->CumulativeLocalTime, 1u);
|
|
}
|
|
|
|
// While missing an intermediary entry may be rare in practice, we still enforce
|
|
// that we can handle the case where we've missed the entry event somehow, in
|
|
// between call entry/exits. To illustrate, imagine the following shadow call
|
|
// stack:
|
|
//
|
|
// f0@t0 -> f1@t1 -> f2@t2
|
|
//
|
|
// If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
|
|
// `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
|
|
// accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
|
|
// as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
|
|
TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
|
|
Trie.enterFunction(1, 0);
|
|
Trie.enterFunction(2, 100);
|
|
Trie.enterFunction(3, 200);
|
|
Trie.exitFunction(3, 300);
|
|
Trie.exitFunction(1, 400);
|
|
|
|
// What we should see at this point is all the functions in the trie in a
|
|
// specific order (1 -> 2 -> 3) with the appropriate count(s) and local
|
|
// latencies.
|
|
const auto &R = Trie.getRoots();
|
|
ASSERT_FALSE(R.empty());
|
|
ASSERT_EQ(R.size(), 1u);
|
|
|
|
const auto &F1 = *R[0];
|
|
ASSERT_EQ(F1.FId, 1);
|
|
ASSERT_FALSE(F1.Callees.empty());
|
|
|
|
const auto &F2 = *F1.Callees[0].NodePtr;
|
|
ASSERT_EQ(F2.FId, 2);
|
|
ASSERT_FALSE(F2.Callees.empty());
|
|
|
|
const auto &F3 = *F2.Callees[0].NodePtr;
|
|
ASSERT_EQ(F3.FId, 3);
|
|
ASSERT_TRUE(F3.Callees.empty());
|
|
|
|
// Now that we've established the preconditions, we check for specific aspects
|
|
// of the nodes.
|
|
EXPECT_EQ(F3.CallCount, 1);
|
|
EXPECT_EQ(F2.CallCount, 1);
|
|
EXPECT_EQ(F1.CallCount, 1);
|
|
EXPECT_EQ(F3.CumulativeLocalTime, 100);
|
|
EXPECT_EQ(F2.CumulativeLocalTime, 300);
|
|
EXPECT_EQ(F1.CumulativeLocalTime, 100);
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, DeepCallStack) {
|
|
// Simulate a relatively deep call stack (32 levels) and ensure that we can
|
|
// properly pop all the way up the stack.
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
for (int i = 0; i < 32; ++i)
|
|
Trie.enterFunction(i + 1, i);
|
|
Trie.exitFunction(1, 33);
|
|
|
|
// Here, validate that we have a 32-level deep function call path from the
|
|
// root (1) down to the leaf (33).
|
|
const auto &R = Trie.getRoots();
|
|
ASSERT_EQ(R.size(), 1u);
|
|
auto F = R[0];
|
|
for (int i = 0; i < 32; ++i) {
|
|
EXPECT_EQ(F->FId, i + 1);
|
|
EXPECT_EQ(F->CallCount, 1);
|
|
if (F->Callees.empty() && i != 31)
|
|
FAIL() << "Empty callees for FId " << F->FId;
|
|
if (i != 31)
|
|
F = F->Callees[0].NodePtr;
|
|
}
|
|
}
|
|
|
|
// TODO: Test that we can handle cross-CPU migrations, where TSCs are not
|
|
// guaranteed to be synchronised.
|
|
TEST(FunctionCallTrieTest, DeepCopy) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Trie(A);
|
|
|
|
Trie.enterFunction(1, 0);
|
|
Trie.enterFunction(2, 1);
|
|
Trie.exitFunction(2, 2);
|
|
Trie.enterFunction(3, 3);
|
|
Trie.exitFunction(3, 4);
|
|
Trie.exitFunction(1, 5);
|
|
|
|
// We want to make a deep copy and compare notes.
|
|
auto B = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Copy(B);
|
|
Trie.deepCopyInto(Copy);
|
|
|
|
ASSERT_NE(Trie.getRoots().size(), 0u);
|
|
ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
|
|
const auto &R0Orig = *Trie.getRoots()[0];
|
|
const auto &R0Copy = *Copy.getRoots()[0];
|
|
EXPECT_EQ(R0Orig.FId, 1);
|
|
EXPECT_EQ(R0Orig.FId, R0Copy.FId);
|
|
|
|
ASSERT_EQ(R0Orig.Callees.size(), 2u);
|
|
ASSERT_EQ(R0Copy.Callees.size(), 2u);
|
|
|
|
const auto &F1Orig =
|
|
*R0Orig.Callees
|
|
.find_element(
|
|
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
|
|
->NodePtr;
|
|
const auto &F1Copy =
|
|
*R0Copy.Callees
|
|
.find_element(
|
|
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
|
|
->NodePtr;
|
|
EXPECT_EQ(&R0Orig, F1Orig.Parent);
|
|
EXPECT_EQ(&R0Copy, F1Copy.Parent);
|
|
}
|
|
|
|
TEST(FunctionCallTrieTest, MergeInto) {
|
|
profilingFlags()->setDefaults();
|
|
auto A = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie T0(A);
|
|
FunctionCallTrie T1(A);
|
|
|
|
// 1 -> 2 -> 3
|
|
T0.enterFunction(1, 0);
|
|
T0.enterFunction(2, 1);
|
|
T0.enterFunction(3, 2);
|
|
T0.exitFunction(3, 3);
|
|
T0.exitFunction(2, 4);
|
|
T0.exitFunction(1, 5);
|
|
|
|
// 1 -> 2 -> 3
|
|
T1.enterFunction(1, 0);
|
|
T1.enterFunction(2, 1);
|
|
T1.enterFunction(3, 2);
|
|
T1.exitFunction(3, 3);
|
|
T1.exitFunction(2, 4);
|
|
T1.exitFunction(1, 5);
|
|
|
|
// We use a different allocator here to make sure that we're able to transfer
|
|
// data into a FunctionCallTrie which uses a different allocator. This
|
|
// reflects the inteded usage scenario for when we're collecting profiles that
|
|
// aggregate across threads.
|
|
auto B = FunctionCallTrie::InitAllocators();
|
|
FunctionCallTrie Merged(B);
|
|
|
|
T0.mergeInto(Merged);
|
|
T1.mergeInto(Merged);
|
|
|
|
ASSERT_EQ(Merged.getRoots().size(), 1u);
|
|
const auto &R0 = *Merged.getRoots()[0];
|
|
EXPECT_EQ(R0.FId, 1);
|
|
EXPECT_EQ(R0.CallCount, 2);
|
|
EXPECT_EQ(R0.CumulativeLocalTime, 10);
|
|
EXPECT_EQ(R0.Callees.size(), 1u);
|
|
|
|
const auto &F1 = *R0.Callees[0].NodePtr;
|
|
EXPECT_EQ(F1.FId, 2);
|
|
EXPECT_EQ(F1.CallCount, 2);
|
|
EXPECT_EQ(F1.CumulativeLocalTime, 6);
|
|
EXPECT_EQ(F1.Callees.size(), 1u);
|
|
|
|
const auto &F2 = *F1.Callees[0].NodePtr;
|
|
EXPECT_EQ(F2.FId, 3);
|
|
EXPECT_EQ(F2.CallCount, 2);
|
|
EXPECT_EQ(F2.CumulativeLocalTime, 2);
|
|
EXPECT_EQ(F2.Callees.size(), 0u);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
} // namespace __xray
|