llvm-project/llvm/lib/ProfileData/DataAccessProf.cpp
Kazu Hirata 228f66807d
[llvm] Remove unused includes (NFC) (#142733)
These are identified by misc-include-cleaner.  I've filtered out those
that break builds.  Also, I'm staying away from llvm-config.h,
config.h, and Compiler.h, which likely cause platform- or
compiler-specific build failures.
2025-06-04 12:30:52 -07:00

265 lines
9.0 KiB
C++

#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace memprof {
// If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
// creates an owned copy of `Str`, adds a map entry for it and returns the
// iterator.
static std::pair<StringRef, uint64_t>
saveStringToMap(DataAccessProfData::StringToIndexMap &Map,
llvm::UniqueStringSaver &Saver, StringRef Str) {
auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size());
return *Iter;
}
// Returns the canonical name or error.
static Expected<StringRef> getCanonicalName(StringRef Name) {
if (Name.empty())
return make_error<StringError>("Empty symbol name",
llvm::errc::invalid_argument);
return InstrProfSymtab::getCanonicalName(Name);
}
std::optional<DataAccessProfRecord>
DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
auto Key = SymbolID;
if (std::holds_alternative<StringRef>(SymbolID)) {
auto NameOrErr = getCanonicalName(std::get<StringRef>(SymbolID));
// If name canonicalization fails, suppress the error inside.
if (!NameOrErr) {
assert(
std::get<StringRef>(SymbolID).empty() &&
"Name canonicalization only fails when stringified string is empty.");
return std::nullopt;
}
Key = *NameOrErr;
}
auto It = Records.find(Key);
if (It != Records.end()) {
return DataAccessProfRecord(Key, It->second.AccessCount,
It->second.Locations);
}
return std::nullopt;
}
bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const {
if (std::holds_alternative<uint64_t>(SymID))
return KnownColdHashes.contains(std::get<uint64_t>(SymID));
return KnownColdSymbols.contains(std::get<StringRef>(SymID));
}
Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol,
uint64_t AccessCount) {
uint64_t RecordID = -1;
const bool IsStringLiteral = std::holds_alternative<uint64_t>(Symbol);
SymbolHandleRef Key;
if (IsStringLiteral) {
RecordID = std::get<uint64_t>(Symbol);
Key = RecordID;
} else {
auto CanonicalName = getCanonicalName(std::get<StringRef>(Symbol));
if (!CanonicalName)
return CanonicalName.takeError();
std::tie(Key, RecordID) =
saveStringToMap(StrToIndexMap, Saver, *CanonicalName);
}
auto [Iter, Inserted] =
Records.try_emplace(Key, RecordID, AccessCount, IsStringLiteral);
if (!Inserted)
return make_error<StringError>("Duplicate symbol or string literal added. "
"User of DataAccessProfData should "
"aggregate count for the same symbol. ",
llvm::errc::invalid_argument);
return Error::success();
}
Error DataAccessProfData::setDataAccessProfile(
SymbolHandleRef SymbolID, uint64_t AccessCount,
ArrayRef<SourceLocation> Locations) {
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
return E;
auto &Record = Records.back().second;
for (const auto &Location : Locations)
Record.Locations.push_back(
{saveStringToMap(StrToIndexMap, Saver, Location.FileName).first,
Location.Line});
return Error::success();
}
Error DataAccessProfData::addKnownSymbolWithoutSamples(
SymbolHandleRef SymbolID) {
if (std::holds_alternative<uint64_t>(SymbolID)) {
KnownColdHashes.insert(std::get<uint64_t>(SymbolID));
return Error::success();
}
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
if (!CanonicalName)
return CanonicalName.takeError();
KnownColdSymbols.insert(
saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
return Error::success();
}
Error DataAccessProfData::deserialize(const unsigned char *&Ptr) {
uint64_t NumSampledSymbols =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint64_t NumColdKnownSymbols =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols,
NumColdKnownSymbols))
return E;
uint64_t Num =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < Num; ++I)
KnownColdHashes.insert(
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
return deserializeRecords(Ptr);
}
Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
OS.write(StrToIndexMap.size());
OS.write(KnownColdSymbols.size());
std::vector<std::string> Strs;
Strs.reserve(StrToIndexMap.size() + KnownColdSymbols.size());
for (const auto &Str : StrToIndexMap)
Strs.push_back(Str.first.str());
for (const auto &Str : KnownColdSymbols)
Strs.push_back(Str.str());
std::string CompressedStrings;
if (!Strs.empty())
if (Error E = collectGlobalObjectNameStrings(
Strs, compression::zlib::isAvailable(), CompressedStrings))
return E;
const uint64_t CompressedStringLen = CompressedStrings.length();
// Record the length of compressed string.
OS.write(CompressedStringLen);
// Write the chars in compressed strings.
for (char C : CompressedStrings)
OS.writeByte(static_cast<uint8_t>(C));
// Pad up to a multiple of 8.
// InstrProfReader could read bytes according to 'CompressedStringLen'.
const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
OS.writeByte(0);
return Error::success();
}
uint64_t
DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const {
if (std::holds_alternative<uint64_t>(SymbolID))
return std::get<uint64_t>(SymbolID);
auto Iter = StrToIndexMap.find(std::get<StringRef>(SymbolID));
assert(Iter != StrToIndexMap.end() &&
"String literals not found in StrToIndexMap");
return Iter->second;
}
Error DataAccessProfData::serialize(ProfOStream &OS) const {
if (Error E = serializeSymbolsAndFilenames(OS))
return E;
OS.write(KnownColdHashes.size());
for (const auto &Hash : KnownColdHashes)
OS.write(Hash);
OS.write((uint64_t)(Records.size()));
for (const auto &[Key, Rec] : Records) {
OS.write(getEncodedIndex(Rec.SymbolID));
OS.writeByte(Rec.IsStringLiteral);
OS.write(Rec.AccessCount);
OS.write(Rec.Locations.size());
for (const auto &Loc : Rec.Locations) {
OS.write(getEncodedIndex(Loc.FileName));
OS.write32(Loc.Line);
}
}
return Error::success();
}
Error DataAccessProfData::deserializeSymbolsAndFilenames(
const unsigned char *&Ptr, const uint64_t NumSampledSymbols,
const uint64_t NumColdKnownSymbols) {
uint64_t Len =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The first NumSampledSymbols strings are symbols with samples, and next
// NumColdKnownSymbols strings are known cold symbols.
uint64_t StringCnt = 0;
std::function<Error(StringRef)> addName = [&](StringRef Name) {
if (StringCnt < NumSampledSymbols)
saveStringToMap(StrToIndexMap, Saver, Name);
else
KnownColdSymbols.insert(Saver.save(Name));
++StringCnt;
return Error::success();
};
if (Error E =
readAndDecodeStrings(StringRef((const char *)Ptr, Len), addName))
return E;
Ptr += alignTo(Len, 8);
return Error::success();
}
Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
SmallVector<StringRef> Strings =
llvm::to_vector(llvm::make_first_range(getStrToIndexMapRef()));
uint64_t NumRecords =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < NumRecords; ++I) {
uint64_t ID =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
bool IsStringLiteral =
support::endian::readNext<uint8_t, llvm::endianness::little>(Ptr);
uint64_t AccessCount =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
SymbolHandleRef SymbolID;
if (IsStringLiteral)
SymbolID = ID;
else
SymbolID = Strings[ID];
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
return E;
auto &Record = Records.back().second;
uint64_t NumLocations =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.Locations.reserve(NumLocations);
for (uint64_t J = 0; J < NumLocations; ++J) {
uint64_t FileNameIndex =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint32_t Line =
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
Record.Locations.push_back({Strings[FileNameIndex], Line});
}
}
return Error::success();
}
} // namespace memprof
} // namespace llvm