[BOLT][NFC] Speedup merge-fdata (#119942)

Eliminate splitting the buffer into lines, and use `std::getline`
directly. Simplify no_lbr and boltedcollection handling as well.

Test Plan: For a large fdata file (200MB), fstream version is ~10%
faster.
This commit is contained in:
Amir Ayupov 2024-12-14 22:26:20 -08:00 committed by GitHub
parent fe9f2ac8e7
commit 97f43364cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 24 additions and 39 deletions

View File

@ -5,7 +5,7 @@
# RUN: split-file %s %t
# RUN: not merge-fdata %t/a.fdata %t/b.fdata 2>&1 | FileCheck %s
# CHECK: cannot mix profile collected in BOLT and non-BOLT deployments
# CHECK: cannot mix profile with and without boltedcollection
#--- a.fdata
boltedcollection

View File

@ -6,7 +6,7 @@
# RUN: split-file %s %t
# RUN: not merge-fdata %t/a.fdata %t/b.fdata 2>&1 | FileCheck %s
# CHECK: cannot mix 'no_lbr' and 'lbr' profiles.
# CHECK: cannot mix profile with and without no_lbr
#--- a.fdata
no_lbr

View File

@ -22,6 +22,7 @@
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadPool.h"
#include <algorithm>
#include <fstream>
#include <mutex>
#include <unordered_map>
@ -274,52 +275,36 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
if (isYAML(Filename))
report_error(Filename, "cannot mix YAML and legacy formats");
ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = MB.getError())
report_error(Filename, EC);
StringRef Buf = MB.get()->getBuffer();
std::ifstream FdataFile(Filename, std::ios::in);
std::string FdataLine;
std::getline(FdataFile, FdataLine);
auto checkMode = [&](const std::string &Key, std::optional<bool> &Flag) {
const bool KeyIsSet = FdataLine.rfind(Key, 0) == 0;
if (!Flag.has_value())
Flag = KeyIsSet;
else if (*Flag != KeyIsSet)
report_error(Filename, "cannot mix profile with and without " + Key);
if (KeyIsSet)
// Advance line
std::getline(FdataFile, FdataLine);
};
ProfileTy *Profile;
{
std::lock_guard<std::mutex> Lock(BoltedCollectionMutex);
// Check if the string "boltedcollection" is in the first line
if (Buf.starts_with("boltedcollection\n")) {
if (!BoltedCollection.value_or(true))
report_error(
Filename,
"cannot mix profile collected in BOLT and non-BOLT deployments");
BoltedCollection = true;
Buf = Buf.drop_front(17);
} else {
if (BoltedCollection.value_or(false))
report_error(
Filename,
"cannot mix profile collected in BOLT and non-BOLT deployments");
BoltedCollection = false;
}
checkMode("boltedcollection", BoltedCollection);
// Check if the string "no_lbr" is in the first line
// (or second line if BoltedCollection is true)
size_t CheckNoLBRPos = Buf.find('\n');
if (CheckNoLBRPos != StringRef::npos) {
StringRef FirstLine = Buf.substr(0, CheckNoLBRPos);
if (FirstLine.contains("no_lbr")) {
if (!NoLBRCollection.value_or(true))
report_error(Filename, "cannot mix 'no_lbr' and 'lbr' profiles");
NoLBRCollection = true;
Buf = Buf.drop_front(CheckNoLBRPos + 1);
} else {
if (NoLBRCollection.value_or(false))
report_error(Filename, "cannot mix 'no_lbr' and 'lbr' profiles");
NoLBRCollection = false;
}
}
checkMode("no_lbr", NoLBRCollection);
Profile = &Profiles[tid];
}
SmallVector<StringRef> Lines;
SplitString(Buf, Lines, "\n");
for (StringRef Line : Lines) {
do {
StringRef Line(FdataLine);
size_t Pos = Line.rfind(" ");
if (Pos == StringRef::npos)
report_error(Filename, "Malformed / corrupted profile");
@ -329,7 +314,7 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
report_error(Filename, "Malformed / corrupted profile counter");
Count += Profile->lookup(Signature);
Profile->insert_or_assign(Signature, Count);
}
} while (std::getline(FdataFile, FdataLine));
};
// The final reduction has non-trivial cost, make sure each thread has at