[memprof] Move writeMemProf to a separate file (#137051)
This patch moves writeMemProf and its subroutines to a separate file. The intent is as follows: - Reduce the size of InstrProfWriter.cpp. - Move the subroutines to a separate file because they don't interact with anything else in InstrProfWriter.cpp. Remarks: - The new file is named IndexedMemProfData.cpp without "Writer" in the name so that we can move the reader code to this file in the future. - This patch just moves code without changing the function signatures for now. It might make sense to implement a class encompassing "serialize" and "deserialize" methods for IndexedMemProfData, but that's left to subsequent patches.
This commit is contained in:
parent
b6f32ad8b0
commit
9a8f90dba3
23
llvm/include/llvm/ProfileData/IndexedMemProfData.h
Normal file
23
llvm/include/llvm/ProfileData/IndexedMemProfData.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// MemProf data is serialized in writeMemProf provided in this header file.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/ProfileData/InstrProf.h"
|
||||||
|
#include "llvm/ProfileData/MemProf.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
// Write the MemProf data to OS.
|
||||||
|
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
|
||||||
|
memprof::IndexedVersion MemProfVersionRequested,
|
||||||
|
bool MemProfFullSchema);
|
||||||
|
|
||||||
|
} // namespace llvm
|
@ -1,5 +1,6 @@
|
|||||||
add_llvm_component_library(LLVMProfileData
|
add_llvm_component_library(LLVMProfileData
|
||||||
GCOV.cpp
|
GCOV.cpp
|
||||||
|
IndexedMemProfData.cpp
|
||||||
InstrProf.cpp
|
InstrProf.cpp
|
||||||
InstrProfCorrelator.cpp
|
InstrProfCorrelator.cpp
|
||||||
InstrProfReader.cpp
|
InstrProfReader.cpp
|
||||||
|
300
llvm/lib/ProfileData/IndexedMemProfData.cpp
Normal file
300
llvm/lib/ProfileData/IndexedMemProfData.cpp
Normal file
@ -0,0 +1,300 @@
|
|||||||
|
//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// MemProf data is serialized in writeMemProf provided in this file.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/ProfileData/InstrProf.h"
|
||||||
|
#include "llvm/ProfileData/MemProf.h"
|
||||||
|
#include "llvm/Support/FormatVariadic.h"
|
||||||
|
#include "llvm/Support/OnDiskHashTable.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
// Serialize Schema.
|
||||||
|
static void writeMemProfSchema(ProfOStream &OS,
|
||||||
|
const memprof::MemProfSchema &Schema) {
|
||||||
|
OS.write(static_cast<uint64_t>(Schema.size()));
|
||||||
|
for (const auto Id : Schema)
|
||||||
|
OS.write(static_cast<uint64_t>(Id));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize MemProfRecordData. Return RecordTableOffset.
|
||||||
|
static uint64_t writeMemProfRecords(
|
||||||
|
ProfOStream &OS,
|
||||||
|
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
||||||
|
&MemProfRecordData,
|
||||||
|
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
|
||||||
|
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
||||||
|
*MemProfCallStackIndexes = nullptr) {
|
||||||
|
memprof::RecordWriterTrait RecordWriter(Schema, Version,
|
||||||
|
MemProfCallStackIndexes);
|
||||||
|
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
|
||||||
|
RecordTableGenerator;
|
||||||
|
for (auto &[GUID, Record] : MemProfRecordData) {
|
||||||
|
// Insert the key (func hash) and value (memprof record).
|
||||||
|
RecordTableGenerator.insert(GUID, Record, RecordWriter);
|
||||||
|
}
|
||||||
|
// Release the memory of this MapVector as it is no longer needed.
|
||||||
|
MemProfRecordData.clear();
|
||||||
|
|
||||||
|
// The call to Emit invokes RecordWriterTrait::EmitData which destructs
|
||||||
|
// the memprof record copies owned by the RecordTableGenerator. This works
|
||||||
|
// because the RecordTableGenerator is not used after this point.
|
||||||
|
return RecordTableGenerator.Emit(OS.OS, RecordWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize MemProfFrameData. Return FrameTableOffset.
|
||||||
|
static uint64_t writeMemProfFrames(
|
||||||
|
ProfOStream &OS,
|
||||||
|
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
|
||||||
|
OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
|
||||||
|
FrameTableGenerator;
|
||||||
|
for (auto &[FrameId, Frame] : MemProfFrameData) {
|
||||||
|
// Insert the key (frame id) and value (frame contents).
|
||||||
|
FrameTableGenerator.insert(FrameId, Frame);
|
||||||
|
}
|
||||||
|
// Release the memory of this MapVector as it is no longer needed.
|
||||||
|
MemProfFrameData.clear();
|
||||||
|
|
||||||
|
return FrameTableGenerator.Emit(OS.OS);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
|
||||||
|
// indexes within the frame array.
|
||||||
|
static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
|
||||||
|
writeMemProfFrameArray(
|
||||||
|
ProfOStream &OS,
|
||||||
|
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
|
||||||
|
// Mappings from FrameIds to array indexes.
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
|
||||||
|
|
||||||
|
// Compute the order in which we serialize Frames. The order does not matter
|
||||||
|
// in terms of correctness, but we still compute it for deserialization
|
||||||
|
// performance. Specifically, if we serialize frequently used Frames one
|
||||||
|
// after another, we have better cache utilization. For two Frames that
|
||||||
|
// appear equally frequently, we break a tie by serializing the one that tends
|
||||||
|
// to appear earlier in call stacks. We implement the tie-breaking mechanism
|
||||||
|
// by computing the sum of indexes within call stacks for each Frame. If we
|
||||||
|
// still have a tie, then we just resort to compare two FrameIds, which is
|
||||||
|
// just for stability of output.
|
||||||
|
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
|
||||||
|
FrameIdOrder.reserve(MemProfFrameData.size());
|
||||||
|
for (const auto &[Id, Frame] : MemProfFrameData)
|
||||||
|
FrameIdOrder.emplace_back(Id, &Frame);
|
||||||
|
assert(MemProfFrameData.size() == FrameIdOrder.size());
|
||||||
|
llvm::sort(FrameIdOrder,
|
||||||
|
[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
|
||||||
|
const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
|
||||||
|
const auto &SL = FrameHistogram[L.first];
|
||||||
|
const auto &SR = FrameHistogram[R.first];
|
||||||
|
// Popular FrameIds should come first.
|
||||||
|
if (SL.Count != SR.Count)
|
||||||
|
return SL.Count > SR.Count;
|
||||||
|
// If they are equally popular, then the one that tends to appear
|
||||||
|
// earlier in call stacks should come first.
|
||||||
|
if (SL.PositionSum != SR.PositionSum)
|
||||||
|
return SL.PositionSum < SR.PositionSum;
|
||||||
|
// Compare their FrameIds for sort stability.
|
||||||
|
return L.first < R.first;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Serialize all frames while creating mappings from linear IDs to FrameIds.
|
||||||
|
uint64_t Index = 0;
|
||||||
|
MemProfFrameIndexes.reserve(FrameIdOrder.size());
|
||||||
|
for (const auto &[Id, F] : FrameIdOrder) {
|
||||||
|
F->serialize(OS.OS);
|
||||||
|
MemProfFrameIndexes.insert({Id, Index});
|
||||||
|
++Index;
|
||||||
|
}
|
||||||
|
assert(MemProfFrameData.size() == Index);
|
||||||
|
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
|
||||||
|
|
||||||
|
// Release the memory of this MapVector as it is no longer needed.
|
||||||
|
MemProfFrameData.clear();
|
||||||
|
|
||||||
|
return MemProfFrameIndexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t writeMemProfCallStacks(
|
||||||
|
ProfOStream &OS,
|
||||||
|
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||||
|
&MemProfCallStackData) {
|
||||||
|
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
|
||||||
|
CallStackTableGenerator;
|
||||||
|
for (auto &[CSId, CallStack] : MemProfCallStackData)
|
||||||
|
CallStackTableGenerator.insert(CSId, CallStack);
|
||||||
|
// Release the memory of this vector as it is no longer needed.
|
||||||
|
MemProfCallStackData.clear();
|
||||||
|
|
||||||
|
return CallStackTableGenerator.Emit(OS.OS);
|
||||||
|
}
|
||||||
|
|
||||||
|
static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
||||||
|
writeMemProfCallStackArray(
|
||||||
|
ProfOStream &OS,
|
||||||
|
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
||||||
|
&MemProfCallStackData,
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
|
||||||
|
&MemProfFrameIndexes,
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
|
||||||
|
unsigned &NumElements) {
|
||||||
|
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
||||||
|
MemProfCallStackIndexes;
|
||||||
|
|
||||||
|
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
|
||||||
|
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
|
||||||
|
FrameHistogram);
|
||||||
|
for (auto I : Builder.getRadixArray())
|
||||||
|
OS.write32(I);
|
||||||
|
NumElements = Builder.getRadixArray().size();
|
||||||
|
MemProfCallStackIndexes = Builder.takeCallStackPos();
|
||||||
|
|
||||||
|
// Release the memory of this vector as it is no longer needed.
|
||||||
|
MemProfCallStackData.clear();
|
||||||
|
|
||||||
|
return MemProfCallStackIndexes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out MemProf Version2 as follows:
|
||||||
|
// uint64_t Version
|
||||||
|
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
|
||||||
|
// uint64_t FramePayloadOffset = Offset for the frame payload
|
||||||
|
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
|
||||||
|
// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
|
||||||
|
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
|
||||||
|
// uint64_t Num schema entries
|
||||||
|
// uint64_t Schema entry 0
|
||||||
|
// uint64_t Schema entry 1
|
||||||
|
// ....
|
||||||
|
// uint64_t Schema entry N - 1
|
||||||
|
// OnDiskChainedHashTable MemProfRecordData
|
||||||
|
// OnDiskChainedHashTable MemProfFrameData
|
||||||
|
// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
|
||||||
|
static Error writeMemProfV2(ProfOStream &OS,
|
||||||
|
memprof::IndexedMemProfData &MemProfData,
|
||||||
|
bool MemProfFullSchema) {
|
||||||
|
OS.write(memprof::Version2);
|
||||||
|
uint64_t HeaderUpdatePos = OS.tell();
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof record table offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof frame table offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
|
||||||
|
|
||||||
|
auto Schema = memprof::getHotColdSchema();
|
||||||
|
if (MemProfFullSchema)
|
||||||
|
Schema = memprof::getFullSchema();
|
||||||
|
writeMemProfSchema(OS, Schema);
|
||||||
|
|
||||||
|
uint64_t RecordTableOffset =
|
||||||
|
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
|
||||||
|
|
||||||
|
uint64_t FramePayloadOffset = OS.tell();
|
||||||
|
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
|
||||||
|
|
||||||
|
uint64_t CallStackPayloadOffset = OS.tell();
|
||||||
|
uint64_t CallStackTableOffset =
|
||||||
|
writeMemProfCallStacks(OS, MemProfData.CallStacks);
|
||||||
|
|
||||||
|
uint64_t Header[] = {
|
||||||
|
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
|
||||||
|
CallStackPayloadOffset, CallStackTableOffset,
|
||||||
|
};
|
||||||
|
OS.patch({{HeaderUpdatePos, Header}});
|
||||||
|
|
||||||
|
return Error::success();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out MemProf Version3 as follows:
|
||||||
|
// uint64_t Version
|
||||||
|
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
|
||||||
|
// uint64_t RecordPayloadOffset = Offset for the record payload
|
||||||
|
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
|
||||||
|
// uint64_t Num schema entries
|
||||||
|
// uint64_t Schema entry 0
|
||||||
|
// uint64_t Schema entry 1
|
||||||
|
// ....
|
||||||
|
// uint64_t Schema entry N - 1
|
||||||
|
// Frames serialized one after another
|
||||||
|
// Call stacks encoded as a radix tree
|
||||||
|
// OnDiskChainedHashTable MemProfRecordData
|
||||||
|
static Error writeMemProfV3(ProfOStream &OS,
|
||||||
|
memprof::IndexedMemProfData &MemProfData,
|
||||||
|
bool MemProfFullSchema) {
|
||||||
|
OS.write(memprof::Version3);
|
||||||
|
uint64_t HeaderUpdatePos = OS.tell();
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof record payload offset.
|
||||||
|
OS.write(0ULL); // Reserve space for the memprof record table offset.
|
||||||
|
|
||||||
|
auto Schema = memprof::getHotColdSchema();
|
||||||
|
if (MemProfFullSchema)
|
||||||
|
Schema = memprof::getFullSchema();
|
||||||
|
writeMemProfSchema(OS, Schema);
|
||||||
|
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
|
||||||
|
memprof::computeFrameHistogram(MemProfData.CallStacks);
|
||||||
|
assert(MemProfData.Frames.size() == FrameHistogram.size());
|
||||||
|
|
||||||
|
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
|
||||||
|
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
|
||||||
|
|
||||||
|
uint64_t CallStackPayloadOffset = OS.tell();
|
||||||
|
// The number of elements in the call stack array.
|
||||||
|
unsigned NumElements = 0;
|
||||||
|
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
||||||
|
MemProfCallStackIndexes =
|
||||||
|
writeMemProfCallStackArray(OS, MemProfData.CallStacks,
|
||||||
|
MemProfFrameIndexes, FrameHistogram,
|
||||||
|
NumElements);
|
||||||
|
|
||||||
|
uint64_t RecordPayloadOffset = OS.tell();
|
||||||
|
uint64_t RecordTableOffset =
|
||||||
|
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
|
||||||
|
&MemProfCallStackIndexes);
|
||||||
|
|
||||||
|
// IndexedMemProfReader::deserializeV3 computes the number of elements in the
|
||||||
|
// call stack array from the difference between CallStackPayloadOffset and
|
||||||
|
// RecordPayloadOffset. Verify that the computation works.
|
||||||
|
assert(CallStackPayloadOffset +
|
||||||
|
NumElements * sizeof(memprof::LinearFrameId) ==
|
||||||
|
RecordPayloadOffset);
|
||||||
|
|
||||||
|
uint64_t Header[] = {
|
||||||
|
CallStackPayloadOffset,
|
||||||
|
RecordPayloadOffset,
|
||||||
|
RecordTableOffset,
|
||||||
|
};
|
||||||
|
OS.patch({{HeaderUpdatePos, Header}});
|
||||||
|
|
||||||
|
return Error::success();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out the MemProf data in a requested version.
|
||||||
|
Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
|
||||||
|
memprof::IndexedVersion MemProfVersionRequested,
|
||||||
|
bool MemProfFullSchema) {
|
||||||
|
switch (MemProfVersionRequested) {
|
||||||
|
case memprof::Version2:
|
||||||
|
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
|
||||||
|
case memprof::Version3:
|
||||||
|
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
|
||||||
|
}
|
||||||
|
|
||||||
|
return make_error<InstrProfError>(
|
||||||
|
instrprof_error::unsupported_version,
|
||||||
|
formatv("MemProf version {} not supported; "
|
||||||
|
"requires version between {} and {}, inclusive",
|
||||||
|
MemProfVersionRequested, memprof::MinimumSupportedVersion,
|
||||||
|
memprof::MaximumSupportedVersion));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace llvm
|
@ -16,6 +16,7 @@
|
|||||||
#include "llvm/ADT/SetVector.h"
|
#include "llvm/ADT/SetVector.h"
|
||||||
#include "llvm/ADT/StringRef.h"
|
#include "llvm/ADT/StringRef.h"
|
||||||
#include "llvm/IR/ProfileSummary.h"
|
#include "llvm/IR/ProfileSummary.h"
|
||||||
|
#include "llvm/ProfileData/IndexedMemProfData.h"
|
||||||
#include "llvm/ProfileData/InstrProf.h"
|
#include "llvm/ProfileData/InstrProf.h"
|
||||||
#include "llvm/ProfileData/MemProf.h"
|
#include "llvm/ProfileData/MemProf.h"
|
||||||
#include "llvm/ProfileData/ProfileCommon.h"
|
#include "llvm/ProfileData/ProfileCommon.h"
|
||||||
@ -23,7 +24,6 @@
|
|||||||
#include "llvm/Support/Endian.h"
|
#include "llvm/Support/Endian.h"
|
||||||
#include "llvm/Support/EndianStream.h"
|
#include "llvm/Support/EndianStream.h"
|
||||||
#include "llvm/Support/Error.h"
|
#include "llvm/Support/Error.h"
|
||||||
#include "llvm/Support/FormatVariadic.h"
|
|
||||||
#include "llvm/Support/MemoryBuffer.h"
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
#include "llvm/Support/OnDiskHashTable.h"
|
#include "llvm/Support/OnDiskHashTable.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
@ -449,287 +449,6 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
|
|||||||
TheSummary->setEntry(I, Res[I]);
|
TheSummary->setEntry(I, Res[I]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Serialize Schema.
|
|
||||||
static void writeMemProfSchema(ProfOStream &OS,
|
|
||||||
const memprof::MemProfSchema &Schema) {
|
|
||||||
OS.write(static_cast<uint64_t>(Schema.size()));
|
|
||||||
for (const auto Id : Schema)
|
|
||||||
OS.write(static_cast<uint64_t>(Id));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Serialize MemProfRecordData. Return RecordTableOffset.
|
|
||||||
static uint64_t writeMemProfRecords(
|
|
||||||
ProfOStream &OS,
|
|
||||||
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
|
|
||||||
&MemProfRecordData,
|
|
||||||
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
|
|
||||||
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
|
||||||
*MemProfCallStackIndexes = nullptr) {
|
|
||||||
memprof::RecordWriterTrait RecordWriter(Schema, Version,
|
|
||||||
MemProfCallStackIndexes);
|
|
||||||
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
|
|
||||||
RecordTableGenerator;
|
|
||||||
for (auto &[GUID, Record] : MemProfRecordData) {
|
|
||||||
// Insert the key (func hash) and value (memprof record).
|
|
||||||
RecordTableGenerator.insert(GUID, Record, RecordWriter);
|
|
||||||
}
|
|
||||||
// Release the memory of this MapVector as it is no longer needed.
|
|
||||||
MemProfRecordData.clear();
|
|
||||||
|
|
||||||
// The call to Emit invokes RecordWriterTrait::EmitData which destructs
|
|
||||||
// the memprof record copies owned by the RecordTableGenerator. This works
|
|
||||||
// because the RecordTableGenerator is not used after this point.
|
|
||||||
return RecordTableGenerator.Emit(OS.OS, RecordWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Serialize MemProfFrameData. Return FrameTableOffset.
|
|
||||||
static uint64_t writeMemProfFrames(
|
|
||||||
ProfOStream &OS,
|
|
||||||
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
|
|
||||||
OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
|
|
||||||
FrameTableGenerator;
|
|
||||||
for (auto &[FrameId, Frame] : MemProfFrameData) {
|
|
||||||
// Insert the key (frame id) and value (frame contents).
|
|
||||||
FrameTableGenerator.insert(FrameId, Frame);
|
|
||||||
}
|
|
||||||
// Release the memory of this MapVector as it is no longer needed.
|
|
||||||
MemProfFrameData.clear();
|
|
||||||
|
|
||||||
return FrameTableGenerator.Emit(OS.OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
|
|
||||||
// indexes within the frame array.
|
|
||||||
static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
|
|
||||||
writeMemProfFrameArray(
|
|
||||||
ProfOStream &OS,
|
|
||||||
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
|
|
||||||
// Mappings from FrameIds to array indexes.
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
|
|
||||||
|
|
||||||
// Compute the order in which we serialize Frames. The order does not matter
|
|
||||||
// in terms of correctness, but we still compute it for deserialization
|
|
||||||
// performance. Specifically, if we serialize frequently used Frames one
|
|
||||||
// after another, we have better cache utilization. For two Frames that
|
|
||||||
// appear equally frequently, we break a tie by serializing the one that tends
|
|
||||||
// to appear earlier in call stacks. We implement the tie-breaking mechanism
|
|
||||||
// by computing the sum of indexes within call stacks for each Frame. If we
|
|
||||||
// still have a tie, then we just resort to compare two FrameIds, which is
|
|
||||||
// just for stability of output.
|
|
||||||
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
|
|
||||||
FrameIdOrder.reserve(MemProfFrameData.size());
|
|
||||||
for (const auto &[Id, Frame] : MemProfFrameData)
|
|
||||||
FrameIdOrder.emplace_back(Id, &Frame);
|
|
||||||
assert(MemProfFrameData.size() == FrameIdOrder.size());
|
|
||||||
llvm::sort(FrameIdOrder,
|
|
||||||
[&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
|
|
||||||
const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
|
|
||||||
const auto &SL = FrameHistogram[L.first];
|
|
||||||
const auto &SR = FrameHistogram[R.first];
|
|
||||||
// Popular FrameIds should come first.
|
|
||||||
if (SL.Count != SR.Count)
|
|
||||||
return SL.Count > SR.Count;
|
|
||||||
// If they are equally popular, then the one that tends to appear
|
|
||||||
// earlier in call stacks should come first.
|
|
||||||
if (SL.PositionSum != SR.PositionSum)
|
|
||||||
return SL.PositionSum < SR.PositionSum;
|
|
||||||
// Compare their FrameIds for sort stability.
|
|
||||||
return L.first < R.first;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Serialize all frames while creating mappings from linear IDs to FrameIds.
|
|
||||||
uint64_t Index = 0;
|
|
||||||
MemProfFrameIndexes.reserve(FrameIdOrder.size());
|
|
||||||
for (const auto &[Id, F] : FrameIdOrder) {
|
|
||||||
F->serialize(OS.OS);
|
|
||||||
MemProfFrameIndexes.insert({Id, Index});
|
|
||||||
++Index;
|
|
||||||
}
|
|
||||||
assert(MemProfFrameData.size() == Index);
|
|
||||||
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
|
|
||||||
|
|
||||||
// Release the memory of this MapVector as it is no longer needed.
|
|
||||||
MemProfFrameData.clear();
|
|
||||||
|
|
||||||
return MemProfFrameIndexes;
|
|
||||||
}
|
|
||||||
|
|
||||||
static uint64_t writeMemProfCallStacks(
|
|
||||||
ProfOStream &OS,
|
|
||||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
|
||||||
&MemProfCallStackData) {
|
|
||||||
OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
|
|
||||||
CallStackTableGenerator;
|
|
||||||
for (auto &[CSId, CallStack] : MemProfCallStackData)
|
|
||||||
CallStackTableGenerator.insert(CSId, CallStack);
|
|
||||||
// Release the memory of this vector as it is no longer needed.
|
|
||||||
MemProfCallStackData.clear();
|
|
||||||
|
|
||||||
return CallStackTableGenerator.Emit(OS.OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
|
||||||
writeMemProfCallStackArray(
|
|
||||||
ProfOStream &OS,
|
|
||||||
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
|
|
||||||
&MemProfCallStackData,
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
|
|
||||||
&MemProfFrameIndexes,
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
|
|
||||||
unsigned &NumElements) {
|
|
||||||
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
|
||||||
MemProfCallStackIndexes;
|
|
||||||
|
|
||||||
memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
|
|
||||||
Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
|
|
||||||
FrameHistogram);
|
|
||||||
for (auto I : Builder.getRadixArray())
|
|
||||||
OS.write32(I);
|
|
||||||
NumElements = Builder.getRadixArray().size();
|
|
||||||
MemProfCallStackIndexes = Builder.takeCallStackPos();
|
|
||||||
|
|
||||||
// Release the memory of this vector as it is no longer needed.
|
|
||||||
MemProfCallStackData.clear();
|
|
||||||
|
|
||||||
return MemProfCallStackIndexes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write out MemProf Version2 as follows:
|
|
||||||
// uint64_t Version
|
|
||||||
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
|
|
||||||
// uint64_t FramePayloadOffset = Offset for the frame payload
|
|
||||||
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
|
|
||||||
// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
|
|
||||||
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
|
|
||||||
// uint64_t Num schema entries
|
|
||||||
// uint64_t Schema entry 0
|
|
||||||
// uint64_t Schema entry 1
|
|
||||||
// ....
|
|
||||||
// uint64_t Schema entry N - 1
|
|
||||||
// OnDiskChainedHashTable MemProfRecordData
|
|
||||||
// OnDiskChainedHashTable MemProfFrameData
|
|
||||||
// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
|
|
||||||
static Error writeMemProfV2(ProfOStream &OS,
|
|
||||||
memprof::IndexedMemProfData &MemProfData,
|
|
||||||
bool MemProfFullSchema) {
|
|
||||||
OS.write(memprof::Version2);
|
|
||||||
uint64_t HeaderUpdatePos = OS.tell();
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof record table offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof frame payload offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof frame table offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
|
|
||||||
|
|
||||||
auto Schema = memprof::getHotColdSchema();
|
|
||||||
if (MemProfFullSchema)
|
|
||||||
Schema = memprof::getFullSchema();
|
|
||||||
writeMemProfSchema(OS, Schema);
|
|
||||||
|
|
||||||
uint64_t RecordTableOffset =
|
|
||||||
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
|
|
||||||
|
|
||||||
uint64_t FramePayloadOffset = OS.tell();
|
|
||||||
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
|
|
||||||
|
|
||||||
uint64_t CallStackPayloadOffset = OS.tell();
|
|
||||||
uint64_t CallStackTableOffset =
|
|
||||||
writeMemProfCallStacks(OS, MemProfData.CallStacks);
|
|
||||||
|
|
||||||
uint64_t Header[] = {
|
|
||||||
RecordTableOffset, FramePayloadOffset, FrameTableOffset,
|
|
||||||
CallStackPayloadOffset, CallStackTableOffset,
|
|
||||||
};
|
|
||||||
OS.patch({{HeaderUpdatePos, Header}});
|
|
||||||
|
|
||||||
return Error::success();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write out MemProf Version3 as follows:
|
|
||||||
// uint64_t Version
|
|
||||||
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
|
|
||||||
// uint64_t RecordPayloadOffset = Offset for the record payload
|
|
||||||
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
|
|
||||||
// uint64_t Num schema entries
|
|
||||||
// uint64_t Schema entry 0
|
|
||||||
// uint64_t Schema entry 1
|
|
||||||
// ....
|
|
||||||
// uint64_t Schema entry N - 1
|
|
||||||
// Frames serialized one after another
|
|
||||||
// Call stacks encoded as a radix tree
|
|
||||||
// OnDiskChainedHashTable MemProfRecordData
|
|
||||||
static Error writeMemProfV3(ProfOStream &OS,
|
|
||||||
memprof::IndexedMemProfData &MemProfData,
|
|
||||||
bool MemProfFullSchema) {
|
|
||||||
OS.write(memprof::Version3);
|
|
||||||
uint64_t HeaderUpdatePos = OS.tell();
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof record payload offset.
|
|
||||||
OS.write(0ULL); // Reserve space for the memprof record table offset.
|
|
||||||
|
|
||||||
auto Schema = memprof::getHotColdSchema();
|
|
||||||
if (MemProfFullSchema)
|
|
||||||
Schema = memprof::getFullSchema();
|
|
||||||
writeMemProfSchema(OS, Schema);
|
|
||||||
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
|
|
||||||
memprof::computeFrameHistogram(MemProfData.CallStacks);
|
|
||||||
assert(MemProfData.Frames.size() == FrameHistogram.size());
|
|
||||||
|
|
||||||
llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
|
|
||||||
writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
|
|
||||||
|
|
||||||
uint64_t CallStackPayloadOffset = OS.tell();
|
|
||||||
// The number of elements in the call stack array.
|
|
||||||
unsigned NumElements = 0;
|
|
||||||
llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
|
|
||||||
MemProfCallStackIndexes =
|
|
||||||
writeMemProfCallStackArray(OS, MemProfData.CallStacks,
|
|
||||||
MemProfFrameIndexes, FrameHistogram,
|
|
||||||
NumElements);
|
|
||||||
|
|
||||||
uint64_t RecordPayloadOffset = OS.tell();
|
|
||||||
uint64_t RecordTableOffset =
|
|
||||||
writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
|
|
||||||
&MemProfCallStackIndexes);
|
|
||||||
|
|
||||||
// IndexedMemProfReader::deserializeV3 computes the number of elements in the
|
|
||||||
// call stack array from the difference between CallStackPayloadOffset and
|
|
||||||
// RecordPayloadOffset. Verify that the computation works.
|
|
||||||
assert(CallStackPayloadOffset +
|
|
||||||
NumElements * sizeof(memprof::LinearFrameId) ==
|
|
||||||
RecordPayloadOffset);
|
|
||||||
|
|
||||||
uint64_t Header[] = {
|
|
||||||
CallStackPayloadOffset,
|
|
||||||
RecordPayloadOffset,
|
|
||||||
RecordTableOffset,
|
|
||||||
};
|
|
||||||
OS.patch({{HeaderUpdatePos, Header}});
|
|
||||||
|
|
||||||
return Error::success();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write out the MemProf data in a requested version.
|
|
||||||
static Error writeMemProf(ProfOStream &OS,
|
|
||||||
memprof::IndexedMemProfData &MemProfData,
|
|
||||||
memprof::IndexedVersion MemProfVersionRequested,
|
|
||||||
bool MemProfFullSchema) {
|
|
||||||
switch (MemProfVersionRequested) {
|
|
||||||
case memprof::Version2:
|
|
||||||
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
|
|
||||||
case memprof::Version3:
|
|
||||||
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
|
|
||||||
}
|
|
||||||
|
|
||||||
return make_error<InstrProfError>(
|
|
||||||
instrprof_error::unsupported_version,
|
|
||||||
formatv("MemProf version {} not supported; "
|
|
||||||
"requires version between {} and {}, inclusive",
|
|
||||||
MemProfVersionRequested, memprof::MinimumSupportedVersion,
|
|
||||||
memprof::MaximumSupportedVersion));
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
|
uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
|
||||||
const bool WritePrevVersion,
|
const bool WritePrevVersion,
|
||||||
ProfOStream &OS) {
|
ProfOStream &OS) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user