
I tried to run llvm-pdbdump on a very large (~1.5GB) PDB to try and identify show-stopping performance problems. This patch addresses the first such problem. When loading the DBI stream, before anyone has even tried to access a single record, we build an in memory map of every source file for every module. In the particular PDB I was using, this was over 85 million files. Specifically, the complexity is O(m*n) where m is the number of modules and n is the average number of source files (including headers) per module. The whole reason for doing this was so that we could have constant time access to any module and any of its source file lists. However, we can still get O(1) access to the source file list for a given module with a simple O(m) precomputation, and access to the list of modules is already O(1) anyway. So this patches reduces the O(m*n) up-front precomputation to an O(m) one, where n is ~6,500 and n*m is about 85 million in my pathological test case. Differential Revision: https://reviews.llvm.org/D32870 llvm-svn: 302205
274 lines
8.1 KiB
C++
274 lines
8.1 KiB
C++
//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h"
|
|
|
|
#include "llvm/DebugInfo/PDB/Native/RawError.h"
|
|
#include "llvm/Support/Error.h"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::pdb;
|
|
|
|
DbiModuleSourceFilesIterator::DbiModuleSourceFilesIterator(
|
|
const DbiModuleList &Modules, uint32_t Modi, uint16_t Filei)
|
|
: Modules(&Modules), Modi(Modi), Filei(Filei) {
|
|
setValue();
|
|
}
|
|
|
|
bool DbiModuleSourceFilesIterator::
|
|
operator==(const DbiModuleSourceFilesIterator &R) const {
|
|
// incompatible iterators are never equal
|
|
if (!isCompatible(R))
|
|
return false;
|
|
|
|
// If they're compatible, and they're both ends, then they're equal.
|
|
if (isEnd() && R.isEnd())
|
|
return true;
|
|
|
|
// If one is an end and the other is not, they're not equal.
|
|
if (isEnd() != R.isEnd())
|
|
return false;
|
|
|
|
// Now we know:
|
|
// - They're compatible
|
|
// - They're not *both* end iterators
|
|
// - Their endness is the same.
|
|
// Thus, they're compatible iterators pointing to a valid file on the same
|
|
// module. All we need to check are the file indices.
|
|
assert(Modules == R.Modules);
|
|
assert(Modi == R.Modi);
|
|
assert(!isEnd());
|
|
assert(!R.isEnd());
|
|
|
|
return (Filei == R.Filei);
|
|
}
|
|
|
|
bool DbiModuleSourceFilesIterator::
|
|
operator<(const DbiModuleSourceFilesIterator &R) const {
|
|
assert(isCompatible(R));
|
|
|
|
// It's not sufficient to compare the file indices, because default
|
|
// constructed iterators could be equal to iterators with valid indices. To
|
|
// account for this, early-out if they're equal.
|
|
if (*this == R)
|
|
return false;
|
|
|
|
return Filei < R.Filei;
|
|
}
|
|
|
|
std::ptrdiff_t DbiModuleSourceFilesIterator::
|
|
operator-(const DbiModuleSourceFilesIterator &R) const {
|
|
assert(isCompatible(R));
|
|
assert(!(*this < R));
|
|
|
|
// If they're both end iterators, the distance is 0.
|
|
if (isEnd() && R.isEnd())
|
|
return 0;
|
|
|
|
assert(!R.isEnd());
|
|
|
|
// At this point, R cannot be end, but *this can, which means that *this
|
|
// might be a universal end iterator with none of its fields set. So in that
|
|
// case have to rely on R as the authority to figure out how many files there
|
|
// are to compute the distance.
|
|
uint32_t Thisi = Filei;
|
|
if (isEnd()) {
|
|
uint32_t RealModi = R.Modi;
|
|
Thisi = R.Modules->getSourceFileCount(RealModi);
|
|
}
|
|
|
|
assert(Thisi >= R.Filei);
|
|
return Thisi - R.Filei;
|
|
}
|
|
|
|
DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator::
|
|
operator+=(std::ptrdiff_t N) {
|
|
assert(!isEnd());
|
|
|
|
Filei += N;
|
|
assert(Filei <= Modules->getSourceFileCount(Modi));
|
|
setValue();
|
|
return *this;
|
|
}
|
|
|
|
DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator::
|
|
operator-=(std::ptrdiff_t N) {
|
|
// Note that we can subtract from an end iterator, but not a universal end
|
|
// iterator.
|
|
assert(!isUniversalEnd());
|
|
|
|
assert(N <= Filei);
|
|
|
|
Filei -= N;
|
|
return *this;
|
|
}
|
|
|
|
void DbiModuleSourceFilesIterator::setValue() {
|
|
if (isEnd()) {
|
|
ThisValue = "";
|
|
return;
|
|
}
|
|
|
|
uint32_t Off = Modules->ModuleInitialFileIndex[Modi] + Filei;
|
|
auto ExpectedValue = Modules->getFileName(Off);
|
|
if (!ExpectedValue) {
|
|
consumeError(ExpectedValue.takeError());
|
|
Filei = Modules->getSourceFileCount(Modi);
|
|
} else
|
|
ThisValue = *ExpectedValue;
|
|
}
|
|
|
|
bool DbiModuleSourceFilesIterator::isEnd() const {
|
|
if (isUniversalEnd())
|
|
return true;
|
|
|
|
assert(Modules);
|
|
assert(Modi <= Modules->getModuleCount());
|
|
assert(Filei <= Modules->getSourceFileCount(Modi));
|
|
|
|
if (Modi == Modules->getModuleCount())
|
|
return true;
|
|
if (Filei == Modules->getSourceFileCount(Modi))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool DbiModuleSourceFilesIterator::isUniversalEnd() const { return !Modules; }
|
|
|
|
bool DbiModuleSourceFilesIterator::isCompatible(
|
|
const DbiModuleSourceFilesIterator &R) const {
|
|
// Universal iterators are compatible with any other iterator.
|
|
if (isUniversalEnd() || R.isUniversalEnd())
|
|
return true;
|
|
|
|
// At this point, neither iterator is a universal end iterator, although one
|
|
// or both might be non-universal end iterators. Regardless, the module index
|
|
// is valid, so they are compatible if and only if they refer to the same
|
|
// module.
|
|
return Modi == R.Modi;
|
|
}
|
|
|
|
Error DbiModuleList::initialize(BinaryStreamRef ModInfo,
|
|
BinaryStreamRef FileInfo) {
|
|
if (auto EC = initializeModInfo(ModInfo))
|
|
return EC;
|
|
if (auto EC = initializeFileInfo(FileInfo))
|
|
return EC;
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
Error DbiModuleList::initializeModInfo(BinaryStreamRef ModInfo) {
|
|
ModInfoSubstream = ModInfo;
|
|
|
|
if (ModInfo.getLength() == 0)
|
|
return Error::success();
|
|
|
|
BinaryStreamReader Reader(ModInfo);
|
|
|
|
if (auto EC = Reader.readArray(Descriptors, ModInfo.getLength()))
|
|
return EC;
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
Error DbiModuleList::initializeFileInfo(BinaryStreamRef FileInfo) {
|
|
FileInfoSubstream = FileInfo;
|
|
|
|
if (FileInfo.getLength() == 0)
|
|
return Error::success();
|
|
|
|
BinaryStreamReader FISR(FileInfo);
|
|
if (auto EC = FISR.readObject(FileInfoHeader))
|
|
return EC;
|
|
|
|
// First is an array of `NumModules` module indices. This does not seem to be
|
|
// used for anything meaningful, so we ignore it.
|
|
FixedStreamArray<support::ulittle16_t> ModuleIndices;
|
|
if (auto EC = FISR.readArray(ModuleIndices, FileInfoHeader->NumModules))
|
|
return EC;
|
|
if (auto EC = FISR.readArray(ModFileCountArray, FileInfoHeader->NumModules))
|
|
return EC;
|
|
|
|
// Compute the real number of source files. We can't trust the value in
|
|
// `FileInfoHeader->NumSourceFiles` because it is a unit16, and the sum of all
|
|
// source file counts might be larger than a unit16. So we compute the real
|
|
// count by summing up the individual counts.
|
|
uint32_t NumSourceFiles = 0;
|
|
for (auto Count : ModFileCountArray)
|
|
NumSourceFiles += Count;
|
|
|
|
// In the reference implementation, this array is where the pointer documented
|
|
// at the definition of ModuleInfoHeader::FileNameOffs points to. Note that
|
|
// although the field in ModuleInfoHeader is ignored this array is not, as it
|
|
// is the authority on where each filename begins in the names buffer.
|
|
if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles))
|
|
return EC;
|
|
|
|
if (auto EC = FISR.readStreamRef(NamesBuffer))
|
|
return EC;
|
|
|
|
auto DescriptorIter = Descriptors.begin();
|
|
uint32_t NextFileIndex = 0;
|
|
ModuleInitialFileIndex.resize(FileInfoHeader->NumModules);
|
|
ModuleDescriptorOffsets.resize(FileInfoHeader->NumModules);
|
|
for (size_t I = 0; I < FileInfoHeader->NumModules; ++I) {
|
|
assert(DescriptorIter != Descriptors.end());
|
|
ModuleInitialFileIndex[I] = NextFileIndex;
|
|
ModuleDescriptorOffsets[I] = DescriptorIter.offset();
|
|
|
|
NextFileIndex += ModFileCountArray[I];
|
|
++DescriptorIter;
|
|
}
|
|
|
|
assert(DescriptorIter == Descriptors.end());
|
|
assert(NextFileIndex == NumSourceFiles);
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
uint32_t DbiModuleList::getModuleCount() const {
|
|
return FileInfoHeader->NumModules;
|
|
}
|
|
|
|
uint32_t DbiModuleList::getSourceFileCount() const {
|
|
return FileNameOffsets.size();
|
|
}
|
|
|
|
uint16_t DbiModuleList::getSourceFileCount(uint32_t Modi) const {
|
|
return ModFileCountArray[Modi];
|
|
}
|
|
|
|
DbiModuleDescriptor DbiModuleList::getModuleDescriptor(uint32_t Modi) const {
|
|
assert(Modi < getModuleCount());
|
|
uint32_t Offset = ModuleDescriptorOffsets[Modi];
|
|
auto Iter = Descriptors.at(Offset);
|
|
assert(Iter != Descriptors.end());
|
|
return *Iter;
|
|
}
|
|
|
|
iterator_range<DbiModuleSourceFilesIterator>
|
|
DbiModuleList::source_files(uint32_t Modi) const {
|
|
return make_range<DbiModuleSourceFilesIterator>(
|
|
DbiModuleSourceFilesIterator(*this, Modi, 0),
|
|
DbiModuleSourceFilesIterator());
|
|
}
|
|
|
|
Expected<StringRef> DbiModuleList::getFileName(uint32_t Index) const {
|
|
BinaryStreamReader Names(NamesBuffer);
|
|
if (Index >= getSourceFileCount())
|
|
return make_error<RawError>(raw_error_code::index_out_of_bounds);
|
|
|
|
uint32_t FileOffset = FileNameOffsets[Index];
|
|
Names.setOffset(FileOffset);
|
|
StringRef Name;
|
|
if (auto EC = Names.readCString(Name))
|
|
return std::move(EC);
|
|
return Name;
|
|
}
|