Revert "[CGData] Lazy loading support for stable function map (#151660)"

This reverts commit 76dd742f7b32e4d3acf50fab1dbbd897f215837e.
This commit is contained in:
Kyungwoo Lee 2025-08-14 16:56:54 -07:00
parent 3bc4d66082
commit 07d3a73d70
20 changed files with 88 additions and 329 deletions

View File

@ -285,9 +285,6 @@ enum CGDataVersion {
// Version 3 adds the total size of the Names in the stable function map so
// we can skip reading them into the memory for non-assertion builds.
Version3 = 3,
// Version 4 adjusts the structure of stable function merging map for
// efficient lazy loading support.
Version4 = 4,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;

View File

@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
#endif
/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 4
#define CG_DATA_INDEX_VERSION 3

View File

@ -20,8 +20,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <mutex>
namespace llvm {
@ -74,37 +72,11 @@ struct StableFunctionMap {
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};
using StableFunctionEntries =
SmallVector<std::unique_ptr<StableFunctionEntry>>;
/// In addition to the deserialized StableFunctionEntry, the struct stores
/// the offsets of corresponding serialized stable function entries, and a
/// once flag for safe lazy loading in a multithreaded environment.
struct EntryStorage {
/// The actual storage of deserialized stable function entries. If the map
/// is lazily loaded, this will be empty until the first access by the
/// corresponding function hash.
StableFunctionEntries Entries;
private:
/// This is used to deserialize the entry lazily. Each element is the
/// corresponding serialized stable function entry's offset in the memory
/// buffer (StableFunctionMap::Buffer).
/// The offsets are only populated when loading the map lazily, otherwise
/// it is empty.
SmallVector<uint64_t> Offsets;
std::once_flag LazyLoadFlag;
friend struct StableFunctionMap;
friend struct StableFunctionMapRecord;
};
// Note: DenseMap requires value type to be copyable even if only using
// in-place insertion. Use STL instead. This also affects the
// deletion-while-iteration in finalize().
using HashFuncsMapType = std::unordered_map<stable_hash, EntryStorage>;
using HashFuncsMapType =
DenseMap<stable_hash, SmallVector<std::unique_ptr<StableFunctionEntry>>>;
/// Get the HashToFuncs map for serialization.
const HashFuncsMapType &getFunctionMap() const;
const HashFuncsMapType &getFunctionMap() const { return HashToFuncs; }
/// Get the NameToId vector for serialization.
ArrayRef<std::string> getNames() const { return IdToName; }
@ -127,19 +99,6 @@ struct StableFunctionMap {
/// \returns true if there is no stable function entry.
bool empty() const { return size() == 0; }
/// \returns true if there is an entry for the given function hash.
/// This does not trigger lazy loading.
bool contains(HashFuncsMapType::key_type FunctionHash) const {
return HashToFuncs.count(FunctionHash) > 0;
}
/// \returns the stable function entries for the given function hash. If the
/// map is lazily loaded, it will deserialize the entries if it is not already
/// done, other requests to the same hash at the same time will be blocked
/// until the entries are deserialized.
const StableFunctionEntries &
at(HashFuncsMapType::key_type FunctionHash) const;
enum SizeType {
UniqueHashCount, // The number of unique hashes in HashToFuncs.
TotalFunctionCount, // The number of total functions in HashToFuncs.
@ -160,31 +119,17 @@ private:
/// `StableFunctionEntry` is ready for insertion.
void insert(std::unique_ptr<StableFunctionEntry> FuncEntry) {
assert(!Finalized && "Cannot insert after finalization");
HashToFuncs[FuncEntry->Hash].Entries.emplace_back(std::move(FuncEntry));
HashToFuncs[FuncEntry->Hash].emplace_back(std::move(FuncEntry));
}
void deserializeLazyLoadingEntry(HashFuncsMapType::iterator It) const;
/// Eagerly deserialize all the unloaded entries in the lazy loading map.
void deserializeLazyLoadingEntries() const;
bool isLazilyLoaded() const { return (bool)Buffer; }
/// A map from a stable_hash to a vector of functions with that hash.
mutable HashFuncsMapType HashToFuncs;
HashFuncsMapType HashToFuncs;
/// A vector of strings to hold names.
SmallVector<std::string> IdToName;
/// A map from StringRef (name) to an ID.
StringMap<unsigned> NameToId;
/// True if the function map is finalized with minimal content.
bool Finalized = false;
/// The memory buffer that contains the serialized stable function map for
/// lazy loading.
/// Non-empty only if this StableFunctionMap is created from a MemoryBuffer
/// (i.e. by IndexedCodeGenDataReader::read()) and lazily deserialized.
std::shared_ptr<MemoryBuffer> Buffer;
/// Whether to read stable function names from the buffer.
bool ReadStableFunctionMapNames = true;
friend struct StableFunctionMapRecord;
};

View File

@ -24,26 +24,6 @@
namespace llvm {
/// The structure of the serialized stable function map is as follows:
/// - Number of unique function/module names
/// - Total size of unique function/module names for opt-in skipping
/// - Unique function/module names
/// - Padding to align to 4 bytes
/// - Number of StableFunctionEntries
/// - Hashes of each StableFunctionEntry
/// - Fixed-size fields for each StableFunctionEntry (the order is consistent
/// with the hashes above):
/// - FunctionNameId
/// - ModuleNameId
/// - InstCount
/// - Relative offset to the beginning of IndexOperandHashes for this entry
/// - Total size of variable-sized IndexOperandHashes for lazy-loading support
/// - Variable-sized IndexOperandHashes for each StableFunctionEntry:
/// - Number of IndexOperandHashes
/// - Contents of each IndexOperandHashes
/// - InstIndex
/// - OpndIndex
/// - OpndHash
struct StableFunctionMapRecord {
std::unique_ptr<StableFunctionMap> FunctionMap;
@ -60,25 +40,13 @@ struct StableFunctionMapRecord {
const StableFunctionMap *FunctionMap,
std::vector<CGDataPatchItem> &PatchItems);
/// A static helper function to deserialize the stable function map entry.
/// Ptr should be pointing to the start of the fixed-sized fields of the
/// entry when passed in.
LLVM_ABI static void deserializeEntry(const unsigned char *Ptr,
stable_hash Hash,
StableFunctionMap *FunctionMap);
/// Serialize the stable function map to a raw_ostream.
LLVM_ABI void serialize(raw_ostream &OS,
std::vector<CGDataPatchItem> &PatchItems) const;
/// Deserialize the stable function map from a raw_ostream.
LLVM_ABI void deserialize(const unsigned char *&Ptr);
/// Lazily deserialize the stable function map from `Buffer` starting at
/// `Offset`. The individual stable function entry would be read lazily from
/// `Buffer` when the function map is accessed.
LLVM_ABI void lazyDeserialize(std::shared_ptr<MemoryBuffer> Buffer,
uint64_t Offset);
LLVM_ABI void deserialize(const unsigned char *&Ptr,
bool ReadStableFunctionMapNames = true);
/// Serialize the stable function map to a YAML stream.
LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
@ -102,18 +70,6 @@ struct StableFunctionMapRecord {
yaml::Output YOS(OS);
serializeYAML(YOS);
}
/// Set whether to read stable function names from the buffer.
/// Has no effect if the function map is read from a YAML stream.
void setReadStableFunctionMapNames(bool Read) {
assert(
FunctionMap->empty() &&
"Cannot change ReadStableFunctionMapNames after the map is populated");
FunctionMap->ReadStableFunctionMapNames = Read;
}
private:
void deserialize(const unsigned char *&Ptr, bool Lazy);
};
} // namespace llvm

View File

@ -186,7 +186,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version4,
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =

View File

@ -26,12 +26,6 @@ static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
"disabled to save memory and time for final consumption of the "
"indexed CodeGenData in production."));
cl::opt<bool> IndexedCodeGenDataLazyLoading(
"indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden,
cl::desc(
"Lazily load indexed CodeGenData. Enable to save memory and time "
"for final consumption of the indexed CodeGenData in production."));
namespace llvm {
static Expected<std::unique_ptr<MemoryBuffer>>
@ -115,20 +109,11 @@ Error IndexedCodeGenDataReader::read() {
return error(cgdata_error::eof);
HashTreeRecord.deserialize(Ptr);
}
// TODO: lazy loading support for outlined hash tree.
std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
if (hasStableFunctionMap()) {
const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
if (Ptr >= End)
return error(cgdata_error::eof);
FunctionMapRecord.setReadStableFunctionMapNames(
IndexedCodeGenDataReadFunctionMapNames);
if (IndexedCodeGenDataLazyLoading)
FunctionMapRecord.lazyDeserialize(SharedDataBuffer,
Header.StableFunctionMapOffset);
else
FunctionMapRecord.deserialize(Ptr);
FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
}
return success();

View File

@ -15,10 +15,8 @@
#include "llvm/CGData/StableFunctionMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include <mutex>
#define DEBUG_TYPE "stable-function-map"
@ -95,10 +93,9 @@ void StableFunctionMap::insert(const StableFunction &Func) {
void StableFunctionMap::merge(const StableFunctionMap &OtherMap) {
assert(!Finalized && "Cannot merge after finalization");
deserializeLazyLoadingEntries();
for (auto &[Hash, Funcs] : OtherMap.HashToFuncs) {
auto &ThisFuncs = HashToFuncs[Hash].Entries;
for (auto &Func : Funcs.Entries) {
auto &ThisFuncs = HashToFuncs[Hash];
for (auto &Func : Funcs) {
auto FuncNameId =
getIdOrCreateForName(*OtherMap.getNameForId(Func->FunctionNameId));
auto ModuleNameId =
@ -117,63 +114,25 @@ size_t StableFunctionMap::size(SizeType Type) const {
case UniqueHashCount:
return HashToFuncs.size();
case TotalFunctionCount: {
deserializeLazyLoadingEntries();
size_t Count = 0;
for (auto &Funcs : HashToFuncs)
Count += Funcs.second.Entries.size();
Count += Funcs.second.size();
return Count;
}
case MergeableFunctionCount: {
deserializeLazyLoadingEntries();
size_t Count = 0;
for (auto &[Hash, Funcs] : HashToFuncs)
if (Funcs.Entries.size() >= 2)
Count += Funcs.Entries.size();
if (Funcs.size() >= 2)
Count += Funcs.size();
return Count;
}
}
llvm_unreachable("Unhandled size type");
}
const StableFunctionMap::StableFunctionEntries &
StableFunctionMap::at(HashFuncsMapType::key_type FunctionHash) const {
auto It = HashToFuncs.find(FunctionHash);
if (isLazilyLoaded())
deserializeLazyLoadingEntry(It);
return It->second.Entries;
}
void StableFunctionMap::deserializeLazyLoadingEntry(
HashFuncsMapType::iterator It) const {
assert(isLazilyLoaded() && "Cannot deserialize non-lazily-loaded map");
auto &[Hash, Storage] = *It;
std::call_once(Storage.LazyLoadFlag,
[this, HashArg = Hash, &StorageArg = Storage]() {
for (auto Offset : StorageArg.Offsets)
StableFunctionMapRecord::deserializeEntry(
reinterpret_cast<const unsigned char *>(Offset),
HashArg, const_cast<StableFunctionMap *>(this));
});
}
void StableFunctionMap::deserializeLazyLoadingEntries() const {
if (!isLazilyLoaded())
return;
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It)
deserializeLazyLoadingEntry(It);
}
const StableFunctionMap::HashFuncsMapType &
StableFunctionMap::getFunctionMap() const {
// Ensure all entries are deserialized before returning the raw map.
if (isLazilyLoaded())
deserializeLazyLoadingEntries();
return HashToFuncs;
}
using ParamLocs = SmallVector<IndexPair>;
static void
removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
static void removeIdenticalIndexPair(
SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> &SFS) {
auto &RSF = SFS[0];
unsigned StableFunctionCount = SFS.size();
@ -200,7 +159,9 @@ removeIdenticalIndexPair(StableFunctionMap::StableFunctionEntries &SFS) {
SF->IndexOperandHashMap->erase(Pair);
}
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
static bool isProfitable(
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
&SFS) {
unsigned StableFunctionCount = SFS.size();
if (StableFunctionCount < GlobalMergingMinMerges)
return false;
@ -241,11 +202,8 @@ static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS) {
}
void StableFunctionMap::finalize(bool SkipTrim) {
deserializeLazyLoadingEntries();
SmallVector<HashFuncsMapType::iterator> ToDelete;
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) {
auto &[StableHash, Storage] = *It;
auto &SFS = Storage.Entries;
auto &[StableHash, SFS] = *It;
// Group stable functions by ModuleIdentifier.
llvm::stable_sort(SFS, [&](const std::unique_ptr<StableFunctionEntry> &L,
@ -278,7 +236,7 @@ void StableFunctionMap::finalize(bool SkipTrim) {
}
}
if (Invalid) {
ToDelete.push_back(It);
HashToFuncs.erase(It);
continue;
}
@ -290,10 +248,8 @@ void StableFunctionMap::finalize(bool SkipTrim) {
removeIdenticalIndexPair(SFS);
if (!isProfitable(SFS))
ToDelete.push_back(It);
HashToFuncs.erase(It);
}
for (auto It : ToDelete)
HashToFuncs.erase(It);
Finalized = true;
}

View File

@ -53,7 +53,7 @@ static SmallVector<const StableFunctionMap::StableFunctionEntry *>
getStableFunctionEntries(const StableFunctionMap &SFM) {
SmallVector<const StableFunctionMap::StableFunctionEntry *> FuncEntries;
for (const auto &P : SFM.getFunctionMap())
for (auto &Func : P.second.Entries)
for (auto &Func : P.second)
FuncEntries.emplace_back(Func.get());
llvm::stable_sort(
@ -107,25 +107,14 @@ void StableFunctionMapRecord::serialize(
// Write StableFunctionEntries whose pointers are sorted.
auto FuncEntries = getStableFunctionEntries(*FunctionMap);
Writer.write<uint32_t>(FuncEntries.size());
for (const auto *FuncRef : FuncEntries)
Writer.write<stable_hash>(FuncRef->Hash);
std::vector<uint64_t> IndexOperandHashesOffsets;
IndexOperandHashesOffsets.reserve(FuncEntries.size());
for (const auto *FuncRef : FuncEntries) {
Writer.write<stable_hash>(FuncRef->Hash);
Writer.write<uint32_t>(FuncRef->FunctionNameId);
Writer.write<uint32_t>(FuncRef->ModuleNameId);
Writer.write<uint32_t>(FuncRef->InstCount);
const uint64_t Offset = Writer.OS.tell();
IndexOperandHashesOffsets.push_back(Offset);
Writer.write<uint64_t>(0);
}
const uint64_t IndexOperandHashesByteSizeOffset = Writer.OS.tell();
Writer.write<uint64_t>(0);
for (size_t I = 0; I < FuncEntries.size(); ++I) {
const uint64_t Offset = Writer.OS.tell() - IndexOperandHashesOffsets[I];
PatchItems.emplace_back(IndexOperandHashesOffsets[I], &Offset, 1);
// Emit IndexOperandHashes sorted from IndexOperandHashMap.
const auto *FuncRef = FuncEntries[I];
IndexOperandHashVecType IndexOperandHashes =
getStableIndexOperandHashes(FuncRef);
Writer.write<uint32_t>(IndexOperandHashes.size());
@ -135,62 +124,10 @@ void StableFunctionMapRecord::serialize(
Writer.write<stable_hash>(IndexOperandHash.second);
}
}
// Write the total size of IndexOperandHashes.
const uint64_t IndexOperandHashesByteSize =
Writer.OS.tell() - IndexOperandHashesByteSizeOffset - sizeof(uint64_t);
PatchItems.emplace_back(IndexOperandHashesByteSizeOffset,
&IndexOperandHashesByteSize, 1);
}
void StableFunctionMapRecord::deserializeEntry(const unsigned char *Ptr,
stable_hash Hash,
StableFunctionMap *FunctionMap) {
auto FunctionNameId =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
if (FunctionMap->ReadStableFunctionMapNames)
assert(FunctionMap->getNameForId(FunctionNameId) &&
"FunctionNameId out of range");
auto ModuleNameId =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
if (FunctionMap->ReadStableFunctionMapNames)
assert(FunctionMap->getNameForId(ModuleNameId) &&
"ModuleNameId out of range");
auto InstCount =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
// Read IndexOperandHashes to build IndexOperandHashMap
auto CurrentPosition = reinterpret_cast<uintptr_t>(Ptr);
auto IndexOperandHashesOffset =
endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
auto *IndexOperandHashesPtr = reinterpret_cast<const unsigned char *>(
CurrentPosition + IndexOperandHashesOffset);
auto NumIndexOperandHashes =
endian::readNext<uint32_t, endianness::little, unaligned>(
IndexOperandHashesPtr);
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
auto InstIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
IndexOperandHashesPtr);
auto OpndIndex = endian::readNext<uint32_t, endianness::little, unaligned>(
IndexOperandHashesPtr);
auto OpndHash =
endian::readNext<stable_hash, endianness::little, unaligned>(
IndexOperandHashesPtr);
assert(InstIndex < InstCount && "InstIndex out of range");
IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
}
// Insert a new StableFunctionEntry into the map.
auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
Hash, FunctionNameId, ModuleNameId, InstCount,
std::move(IndexOperandHashMap));
FunctionMap->insert(std::move(FuncEntry));
}
void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
bool Lazy) {
bool ReadStableFunctionMapNames) {
// Assert that Ptr is 4-byte aligned
assert(((uintptr_t)Ptr % 4) == 0);
// Read Names.
@ -202,7 +139,7 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
const auto NamesByteSize =
endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
if (FunctionMap->ReadStableFunctionMapNames) {
if (ReadStableFunctionMapNames) {
for (unsigned I = 0; I < NumNames; ++I) {
StringRef Name(reinterpret_cast<const char *>(Ptr));
Ptr += Name.size() + 1;
@ -220,51 +157,47 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
// Read StableFunctionEntries.
auto NumFuncs =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
auto FixedSizeFieldsOffset =
reinterpret_cast<uintptr_t>(Ptr) + NumFuncs * sizeof(stable_hash);
constexpr uint32_t FixedSizeFieldsSizePerEntry =
// FunctionNameId
sizeof(uint32_t) +
// ModuleNameId
sizeof(uint32_t) +
// InstCount
sizeof(uint32_t) +
// Relative offset to IndexOperandHashes
sizeof(uint64_t);
for (unsigned I = 0; I < NumFuncs; ++I) {
auto Hash =
endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
if (Lazy) {
auto It = FunctionMap->HashToFuncs.try_emplace(Hash).first;
StableFunctionMap::EntryStorage &Storage = It->second;
Storage.Offsets.push_back(FixedSizeFieldsOffset);
} else {
deserializeEntry(
reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset), Hash,
FunctionMap.get());
[[maybe_unused]] auto FunctionNameId =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
[[maybe_unused]] auto ModuleNameId =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
// Only validate IDs if we've read the names
if (ReadStableFunctionMapNames) {
assert(FunctionMap->getNameForId(FunctionNameId) &&
"FunctionNameId out of range");
assert(FunctionMap->getNameForId(ModuleNameId) &&
"ModuleNameId out of range");
}
FixedSizeFieldsOffset += FixedSizeFieldsSizePerEntry;
auto InstCount =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
// Read IndexOperandHashes to build IndexOperandHashMap
auto NumIndexOperandHashes =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
auto IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
for (unsigned J = 0; J < NumIndexOperandHashes; ++J) {
auto InstIndex =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
auto OpndIndex =
endian::readNext<uint32_t, endianness::little, unaligned>(Ptr);
auto OpndHash =
endian::readNext<stable_hash, endianness::little, unaligned>(Ptr);
assert(InstIndex < InstCount && "InstIndex out of range");
IndexOperandHashMap->try_emplace({InstIndex, OpndIndex}, OpndHash);
}
// Insert a new StableFunctionEntry into the map.
auto FuncEntry = std::make_unique<StableFunctionMap::StableFunctionEntry>(
Hash, FunctionNameId, ModuleNameId, InstCount,
std::move(IndexOperandHashMap));
FunctionMap->insert(std::move(FuncEntry));
}
// Update Ptr to the end of the serialized map to meet the expectation of
// CodeGenDataReader.
Ptr = reinterpret_cast<const unsigned char *>(FixedSizeFieldsOffset);
auto IndexOperandHashesByteSize =
endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
Ptr = reinterpret_cast<const unsigned char *>(
reinterpret_cast<uintptr_t>(Ptr) + IndexOperandHashesByteSize);
}
void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
deserialize(Ptr, /*Lazy=*/false);
}
void StableFunctionMapRecord::lazyDeserialize(
std::shared_ptr<MemoryBuffer> Buffer, uint64_t Offset) {
const auto *Ptr = reinterpret_cast<const unsigned char *>(
reinterpret_cast<uintptr_t>(Buffer->getBufferStart()) + Offset);
deserialize(Ptr, /*Lazy=*/true);
FunctionMap->Buffer = std::move(Buffer);
}
void StableFunctionMapRecord::serializeYAML(yaml::Output &YOS) const {

View File

@ -350,8 +350,9 @@ checkConstLocationCompatible(const StableFunctionMap::StableFunctionEntry &SF,
return true;
}
static ParamLocsVecTy
computeParamInfo(const StableFunctionMap::StableFunctionEntries &SFS) {
static ParamLocsVecTy computeParamInfo(
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>>
&SFS) {
std::map<std::vector<stable_hash>, ParamLocs> HashSeqToLocs;
auto &RSF = *SFS[0];
unsigned StableFunctionCount = SFS.size();
@ -395,18 +396,19 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
// Collect stable functions related to the current module.
DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
HashToFuncs;
auto &Maps = FunctionMap->getFunctionMap();
for (auto &F : M) {
if (!isEligibleFunction(&F))
continue;
auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
if (FunctionMap->contains(FI.FunctionHash))
if (Maps.contains(FI.FunctionHash))
HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
}
for (auto &[Hash, Funcs] : HashToFuncs) {
std::optional<ParamLocsVecTy> ParamLocsVec;
SmallVector<FuncMergeInfo> FuncMergeInfos;
auto &SFS = FunctionMap->at(Hash);
auto &SFS = Maps.at(Hash);
assert(!SFS.empty());
auto &RFS = SFS[0];

View File

@ -36,11 +36,9 @@
; Merge the cgdata using llvm-cgdata.
; We now validate the content of the merged cgdata.
; Two functions have the same hash with only one different constant at the same location.
; Two functions have the same hash with only one different constnat at a same location.
; RUN: llvm-cgdata --merge -o %tout.cgdata %tout-nowrite.1 %tout-nowrite.2
; RUN: llvm-cgdata --convert %tout.cgdata -o - | FileCheck %s
; RUN: llvm-cgdata --merge -o %tout-lazy.cgdata %tout-nowrite.1 %tout-nowrite.2 -indexed-codegen-data-lazy-loading
; RUN: llvm-cgdata --convert %tout-lazy.cgdata -indexed-codegen-data-lazy-loading -o - | FileCheck %s
; CHECK: - Hash: [[#%d,HASH:]]
; CHECK-NEXT: FunctionName: f1

View File

@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0
# The version number appears when asked, as it's in the header
RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
VERSION: Version: 4
VERSION: Version: 3
# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
# uint64_t StableFunctionMapOffset;
# }
RUN: printf '\xffcgdata\x81' > %t_header.cgdata
RUN: printf '\x04\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata

View File

@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
# The current version 4 while the header says 5.
# The current version 3 while the header says 4.
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
RUN: printf '\x05\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata

View File

@ -23,8 +23,6 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-both-hashtree-funcma
# Merge an object file having cgdata (__llvm_outline and __llvm_merge)
RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap.cgdata
RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap.cgdata | FileCheck %s
RUN: llvm-cgdata -m --skip-trim %t/merge-both-hashtree-funcmap.o -o %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading
RUN: llvm-cgdata -s %t/merge-both-hashtree-funcmap-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Outlined hash tree:
CHECK-NEXT: Total Node Count: 3
@ -65,4 +63,4 @@ CHECK-NEXT: Mergeable function Count: 0
;--- merge-both-template.ll
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
@.data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

View File

@ -23,8 +23,8 @@ RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
# Merge the archive into the codegen data file.
RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive.cgdata
RUN: llvm-cgdata --show %t/merge-archive.cgdata | FileCheck %s
RUN: llvm-cgdata --merge --skip-trim %t/merge-archive.a -o %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading
RUN: llvm-cgdata --show %t/merge-archive-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
RUN: llvm-cgdata --show %t/merge-archive.cgdata| FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 2
@ -65,7 +65,7 @@ MAP-NEXT: ...
...
;--- merge-1-template.ll
@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
;--- raw-2.cgtext
:stable_function_map
@ -80,4 +80,4 @@ MAP-NEXT: ...
...
;--- merge-2-template.ll
@.data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

View File

@ -17,8 +17,6 @@ RUN: sed "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat-template-
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat.cgdata
RUN: llvm-cgdata --show %t/merge-concat.cgdata | FileCheck %s
RUN: llvm-cgdata --merge --skip-trim %t/merge-concat.o -o %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading
RUN: llvm-cgdata --show %t/merge-concat-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
@ -76,5 +74,5 @@ MAP-NEXT: ...
; In an linked executable (as opposed to an object file), cgdata in __llvm_merge might be concatenated.
; Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated.
; In other words, the following two trees are encoded back-to-back in a binary format.
@.data1 = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data2 = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data1 = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

View File

@ -19,9 +19,8 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
# Merge two object files into the codegen data file.
RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge.cgdata
RUN: llvm-cgdata --show %t/merge.cgdata | FileCheck %s
RUN: llvm-cgdata --merge --skip-trim %t/merge-1.o %t/merge-2.o -o %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading
RUN: llvm-cgdata --show %t/merge-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 2
@ -62,7 +61,7 @@ MAP-NEXT: ...
...
;--- merge-1-template.ll
@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
;--- raw-2.cgtext
:stable_function_map
@ -77,4 +76,4 @@ MAP-NEXT: ...
...
;--- merge-2-template.ll
@.data = private unnamed_addr constant [84 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"

View File

@ -15,8 +15,6 @@ RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-single.ll -o %t/merg
# Merge an object file having cgdata (__llvm_merge)
RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single.cgdata
RUN: llvm-cgdata -s %t/merge-single.cgdata | FileCheck %s
RUN: llvm-cgdata -m --skip-trim %t/merge-single.o -o %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading
RUN: llvm-cgdata -s %t/merge-single-lazy.cgdata -indexed-codegen-data-lazy-loading | FileCheck %s
CHECK: Stable function map:
CHECK-NEXT: Unique hash Count: 1
CHECK-NEXT: Total function Count: 1
@ -35,4 +33,4 @@ CHECK-NEXT: Mergeable function Count: 0
...
;--- merge-single-template.ll
@.data = private unnamed_addr constant [84 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"

View File

@ -31,4 +31,3 @@ def : JoinedOrSeparate<["-"], "o">, Alias<output>, MetaVarName<"<file>">, HelpTe
def format : Option<["--"], "format", KIND_SEPARATE>,
HelpText<"Specify the output format (text or binary)">, MetaVarName<"<value>">;
def : JoinedOrSeparate<["-"], "f">, Alias<format>, HelpText<"Alias for --format">;
def indexed_codegen_data_lazy_loading : F<"indexed-codegen-data-lazy-loading", "Lazily load indexed CodeGenData for testing purpose.">, Flags<[HelpHidden]>;

View File

@ -83,8 +83,6 @@ static CGDataAction Action;
static std::optional<CGDataFormat> OutputFormat;
static std::vector<std::string> InputFilenames;
extern cl::opt<bool> IndexedCodeGenDataLazyLoading;
static void exitWithError(Twine Message, StringRef Whence = "",
StringRef Hint = "") {
WithColor::error();
@ -363,9 +361,6 @@ static void parseArgs(int argc, char **argv) {
default:
llvm_unreachable("unrecognized action");
}
IndexedCodeGenDataLazyLoading =
Args.hasArg(OPT_indexed_codegen_data_lazy_loading);
}
int llvm_cgdata_main(int argc, char **argvNonConst, const llvm::ToolContext &) {

View File

@ -117,7 +117,7 @@ TEST(StableFunctionMap, Finalize3) {
Map.finalize();
auto &M = Map.getFunctionMap();
EXPECT_THAT(M, SizeIs(1));
auto &FuncEntries = M.begin()->second.Entries;
auto &FuncEntries = M.begin()->second;
for (auto &FuncEntry : FuncEntries) {
EXPECT_THAT(*FuncEntry->IndexOperandHashMap, SizeIs(1));
ASSERT_THAT(*FuncEntry->IndexOperandHashMap,