[clang-doc] Introduce TransientArena for short lived allocations (#190045)

With strings interned, we can move the StringRefs in various Info
structs into a new short lived arena. This change migrates the remaining
SmallVectors in CommentInfo to use an ArrayRef backed by the new
transient arena.

This results in further minor reductions in overall memory usage, but no
significant effect on runtime performance.

| Metric | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| Time | 920.5s | 1011.0s | 1005.7s | +9.2% | -0.5% |
| Memory | 86.0G | 44.9G | 42.1G | -51.0% | -6.2% |

| Benchmark | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| BM_BitcodeReader_Scale/10 | 67.9us | 70.0us | 68.6us | +1.0% | -2.0% |
| BM_BitcodeReader_Scale/10000 | 70.5ms | 21.3ms | 21.3ms | -69.8% |
-0.0% |
| BM_BitcodeReader_Scale/4096 | 23.2ms | 4.5ms | 4.6ms | -80.2% | +2.8%
|
| BM_BitcodeReader_Scale/512 | 509.4us | 538.8us | 546.3us | +7.3% |
+1.4% |
| BM_BitcodeReader_Scale/64 | 114.8us | 118.0us | 117.9us | +2.7% |
-0.1% |
| BM_EmitInfoFunction | 1.6us | 1.6us | 1.5us | -5.5% | -6.2% |
| BM_Index_Insertion/10 | 2.3us | 4.0us | 3.9us | +70.3% | -0.7% |
| BM_Index_Insertion/10000 | 3.1ms | 5.0ms | 5.3ms | +68.6% | +5.0% |
| BM_Index_Insertion/4096 | 1.3ms | 2.0ms | 2.1ms | +64.2% | +4.5% |
| BM_Index_Insertion/512 | 153.6us | 245.0us | 253.0us | +64.8% | +3.2%
|
| BM_Index_Insertion/64 | 18.1us | 28.9us | 30.1us | +67.0% | +4.4% |
| BM_JSONGenerator_Scale/10 | 36.8us | 36.4us | 37.0us | +0.4% | +1.7% |
| BM_JSONGenerator_Scale/10000 | 89.6ms | 90.4ms | 91.7ms | +2.3% |
+1.5% |
| BM_JSONGenerator_Scale/4096 | 33.7ms | 34.0ms | 35.1ms | +4.0% | +3.0%
|
| BM_JSONGenerator_Scale/64 | 222.4us | 220.5us | 223.3us | +0.4% |
+1.3% |
| BM_Mapper_Scale/10000 | 104.3ms | 105.4ms | 105.6ms | +1.3% | +0.3% |
| BM_Mapper_Scale/4096 | 44.3ms | 44.7ms | 44.8ms | +1.0% | +0.1% |
| BM_Mapper_Scale/512 | 7.6ms | 7.7ms | 7.6ms | +0.7% | -1.2% |
| BM_MergeInfos_Scale/10000 | 12.2ms | 1.4ms | 1.4ms | -88.2% | +0.1% |
| BM_MergeInfos_Scale/2 | 1.9us | 1.7us | 1.7us | -8.5% | +2.1% |
| BM_MergeInfos_Scale/4096 | 2.8ms | 495.6us | 487.3us | -82.5% | -1.7%
|
| BM_MergeInfos_Scale/512 | 68.9us | 34.6us | 38.7us | -43.9% | +11.6% |
| BM_MergeInfos_Scale/64 | 10.3us | 6.0us | 6.4us | -37.4% | +7.2% |
| BM_MergeInfos_Scale/8 | 2.8us | 2.1us | 2.2us | -20.6% | +5.1% |
| BM_SerializeFunctionInfo | 25.5us | 26.8us | 25.9us | +1.4% | -3.3% |
This commit is contained in:
Paul Kirth 2026-04-02 22:46:25 -07:00 committed by GitHub
parent 1972cf64fd
commit 4b2623d03c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 153 additions and 41 deletions

View File

@ -129,13 +129,6 @@ static llvm::Error decodeRecord(const Record &R, FieldId &Field,
"invalid value for FieldId");
}
static llvm::Error decodeRecord(const Record &R,
llvm::SmallVectorImpl<llvm::StringRef> &Field,
llvm::StringRef Blob) {
Field.push_back(internString(Blob));
return llvm::Error::success();
}
static llvm::Error decodeRecord(const Record &R,
llvm::SmallVectorImpl<Location> &Field,
llvm::StringRef Blob) {
@ -346,7 +339,10 @@ static llvm::Error parseRecord(const Record &R, unsigned ID,
}
static llvm::Error parseRecord(const Record &R, unsigned ID,
llvm::StringRef Blob, CommentInfo *I) {
llvm::StringRef Blob, CommentInfo *I,
llvm::SmallVectorImpl<StringRef> &AttrKeys,
llvm::SmallVectorImpl<StringRef> &AttrValues,
llvm::SmallVectorImpl<StringRef> &Args) {
llvm::SmallString<16> KindStr;
switch (ID) {
case COMMENT_KIND:
@ -365,11 +361,14 @@ static llvm::Error parseRecord(const Record &R, unsigned ID,
case COMMENT_CLOSENAME:
return decodeRecord(R, I->CloseName, Blob);
case COMMENT_ATTRKEY:
return decodeRecord(R, I->AttrKeys, Blob);
AttrKeys.push_back(internString(Blob));
return llvm::Error::success();
case COMMENT_ATTRVAL:
return decodeRecord(R, I->AttrValues, Blob);
AttrValues.push_back(internString(Blob));
return llvm::Error::success();
case COMMENT_ARG:
return decodeRecord(R, I->Args, Blob);
Args.push_back(internString(Blob));
return llvm::Error::success();
case COMMENT_SELFCLOSING:
return decodeRecord(R, I->SelfClosing, Blob);
case COMMENT_EXPLICIT:
@ -380,6 +379,70 @@ static llvm::Error parseRecord(const Record &R, unsigned ID,
}
}
template <>
llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
llvm::TimeTraceScope("Reducing infos", "readBlock");
if (llvm::Error Err = Stream.EnterSubBlock(ID))
return Err;
llvm::SmallVector<StringRef> AttrKeys;
llvm::SmallVector<StringRef> AttrValues;
llvm::SmallVector<StringRef> Args;
while (true) {
unsigned BlockOrCode = 0;
llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
if (!C)
return C.takeError();
switch (*C) {
case Cursor::BadBlock:
return llvm::createStringError(llvm::inconvertibleErrorCode(),
"bad block found");
case Cursor::BlockEnd: {
if (!AttrKeys.empty()) {
StringRef *KeysMem =
TransientArena.Allocate<StringRef>(AttrKeys.size());
std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
}
if (!AttrValues.empty()) {
StringRef *ValuesMem =
TransientArena.Allocate<StringRef>(AttrValues.size());
std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
ValuesMem);
I->AttrValues = llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
}
if (!Args.empty()) {
StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
}
return llvm::Error::success();
}
case Cursor::BlockBegin:
if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
if (llvm::Error Skipped = Stream.SkipBlock())
return joinErrors(std::move(Err), std::move(Skipped));
return Err;
}
continue;
case Cursor::Record:
break;
}
Record R;
llvm::StringRef Blob;
llvm::Expected<unsigned> MaybeRecID =
Stream.readRecord(BlockOrCode, R, &Blob);
if (!MaybeRecID)
return MaybeRecID.takeError();
if (llvm::Error Err = parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys,
AttrValues, Args))
return Err;
}
}
static llvm::Error parseRecord(const Record &R, unsigned ID,
llvm::StringRef Blob, Reference *I, FieldId &F) {
switch (ID) {

View File

@ -27,6 +27,10 @@
namespace clang {
namespace doc {
// Thread local arenas usable in each thread pool
thread_local llvm::BumpPtrAllocator TransientArena;
thread_local llvm::BumpPtrAllocator PersistentArena;
ConcurrentStringPool &getGlobalStringPool() {
static ConcurrentStringPool GlobalPool;
return GlobalPool;

View File

@ -49,6 +49,8 @@ private:
ConcurrentStringPool &getGlobalStringPool();
extern thread_local llvm::BumpPtrAllocator TransientArena;
inline StringRef internString(const Twine &T) {
if (T.isTriviallyEmpty())
return StringRef();
@ -67,6 +69,16 @@ inline StringRef internString(const Twine &T) {
return getGlobalStringPool().intern(S);
}
template <typename T>
inline llvm::ArrayRef<T> allocateArray(llvm::ArrayRef<T> V,
llvm::BumpPtrAllocator &Alloc) {
if (V.empty())
return llvm::ArrayRef<T>();
T *Allocated = (T *)Alloc.Allocate<T>(V.size());
std::uninitialized_move(V.begin(), V.end(), Allocated);
return llvm::ArrayRef<T>(Allocated, V.size());
}
// An abstraction for owned pointers. Initially mapped to OwnedPtr,
// to be eventually transitioned to bare pointers in an arena.
template <typename T> using OwnedPtr = std::unique_ptr<T>;
@ -167,11 +179,10 @@ struct CommentInfo {
StringRef ParamName; // Parameter name (for (T)ParamCommand).
StringRef CloseName; // Closing tag name (for VerbatimBlock).
StringRef Text; // Text of the comment.
llvm::SmallVector<StringRef, 4>
AttrKeys; // List of attribute keys (for HTML).
llvm::SmallVector<StringRef, 4>
llvm::ArrayRef<StringRef> AttrKeys; // List of attribute keys (for HTML).
llvm::ArrayRef<StringRef>
AttrValues; // List of attribute values for each key (for HTML).
llvm::SmallVector<StringRef, 4>
llvm::ArrayRef<StringRef>
Args; // List of arguments to commands (for InlineCommand).
CommentKind Kind = CommentKind::
CK_Unknown; // Kind of comment (FullComment, ParagraphComment,

View File

@ -244,18 +244,38 @@ void ClangDocCommentVisitor::visitTextComment(const TextComment *C) {
void ClangDocCommentVisitor::visitInlineCommandComment(
const InlineCommandComment *C) {
CurrentCI.Name = internString(getCommandName(C->getCommandID()));
llvm::SmallVector<StringRef> Args;
for (unsigned I = 0, E = C->getNumArgs(); I != E; ++I)
CurrentCI.Args.push_back(internString(C->getArgText(I).trim()));
Args.push_back(internString(C->getArgText(I).trim()));
if (!Args.empty()) {
StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
CurrentCI.Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
}
}
void ClangDocCommentVisitor::visitHTMLStartTagComment(
const HTMLStartTagComment *C) {
CurrentCI.Name = internString(C->getTagName());
CurrentCI.SelfClosing = C->isSelfClosing();
llvm::SmallVector<StringRef> AttrKeys;
llvm::SmallVector<StringRef> AttrValues;
for (unsigned I = 0, E = C->getNumAttrs(); I < E; ++I) {
const HTMLStartTagComment::Attribute &Attr = C->getAttr(I);
CurrentCI.AttrKeys.push_back(internString(Attr.Name));
CurrentCI.AttrValues.push_back(internString(Attr.Value));
AttrKeys.push_back(internString(Attr.Name));
AttrValues.push_back(internString(Attr.Value));
}
if (!AttrKeys.empty()) {
StringRef *KeysMem = TransientArena.Allocate<StringRef>(AttrKeys.size());
std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
CurrentCI.AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
}
if (!AttrValues.empty()) {
StringRef *ValuesMem =
TransientArena.Allocate<StringRef>(AttrValues.size());
std::uninitialized_copy(AttrValues.begin(), AttrValues.end(), ValuesMem);
CurrentCI.AttrValues =
llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
}
}
@ -268,8 +288,14 @@ void ClangDocCommentVisitor::visitHTMLEndTagComment(
void ClangDocCommentVisitor::visitBlockCommandComment(
const BlockCommandComment *C) {
CurrentCI.Name = internString(getCommandName(C->getCommandID()));
llvm::SmallVector<StringRef> Args;
for (unsigned I = 0, E = C->getNumArgs(); I < E; ++I)
CurrentCI.Args.push_back(internString(C->getArgText(I).trim()));
Args.push_back(internString(C->getArgText(I).trim()));
if (!Args.empty()) {
StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
CurrentCI.Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
}
}
void ClangDocCommentVisitor::visitParamCommandComment(

View File

@ -240,11 +240,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
QArgs.push_back(QuotedString(S));
}
IO.mapOptional("Args", QArgs, std::vector<QuotedString>());
if (!IO.outputting()) {
I.Args.clear();
for (auto &Q : QArgs)
I.Args.push_back(Q.Ref);
}
std::vector<QuotedString> QAttrKeys;
if (IO.outputting()) {
@ -252,11 +247,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
QAttrKeys.push_back(QuotedString(S));
}
IO.mapOptional("AttrKeys", QAttrKeys, std::vector<QuotedString>());
if (!IO.outputting()) {
I.AttrKeys.clear();
for (auto &Q : QAttrKeys)
I.AttrKeys.push_back(Q.Ref);
}
std::vector<QuotedString> QAttrValues;
if (IO.outputting()) {
@ -264,11 +254,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
QAttrValues.push_back(QuotedString(S));
}
IO.mapOptional("AttrValues", QAttrValues, std::vector<QuotedString>());
if (!IO.outputting()) {
I.AttrValues.clear();
for (auto &Q : QAttrValues)
I.AttrValues.push_back(Q.Ref);
}
IO.mapOptional("Children", I.Children);
}

View File

@ -351,8 +351,10 @@ Example usage for a project using a compile commands database:
DiagnosticsEngine::Error, "error reading bitcode: %0");
unsigned DiagIDBitcodeMerging = Diags.getCustomDiagID(
DiagnosticsEngine::Error, "error merging bitcode: %0");
// ExecutorConcurrency is a flag exposed by AllTUsExecution.h
// Note: we use per-thread arenas, so Pool must outlive the last use of this
// memory in the generators.
llvm::DefaultThreadPool Pool(
// ExecutorConcurrency is a flag exposed by AllTUsExecution.h
llvm::hardware_concurrency(ExecutorConcurrency));
{
llvm::TimeTraceScope TS("Reduce");

View File

@ -265,8 +265,15 @@ TEST_F(BitcodeTest, emitInfoWithCommentBitcode) {
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "ul";
HTML->Children.back()->AttrKeys.emplace_back("class");
HTML->Children.back()->AttrValues.emplace_back("test");
{
llvm::SmallVector<StringRef, 1> Keys = {"class"};
HTML->Children.back()->AttrKeys =
allocateArray<StringRef>(Keys, TransientArena);
llvm::SmallVector<StringRef, 1> Values = {"test"};
HTML->Children.back()->AttrValues =
allocateArray<StringRef>(Values, TransientArena);
}
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "li";

View File

@ -247,8 +247,15 @@ TEST_F(MDGeneratorTest, emitCommentMD) {
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "ul";
HTML->Children.back()->AttrKeys.emplace_back("class");
HTML->Children.back()->AttrValues.emplace_back("test");
{
llvm::SmallVector<StringRef, 1> Keys = {"class"};
HTML->Children.back()->AttrKeys =
allocateArray<StringRef>(Keys, TransientArena);
llvm::SmallVector<StringRef, 1> Values = {"test"};
HTML->Children.back()->AttrValues =
allocateArray<StringRef>(Values, TransientArena);
}
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "li";

View File

@ -411,8 +411,15 @@ TEST_F(YAMLGeneratorTest, emitCommentYAML) {
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "ul";
HTML->Children.back()->AttrKeys.emplace_back("class");
HTML->Children.back()->AttrValues.emplace_back("test");
{
llvm::SmallVector<StringRef, 1> Keys = {"class"};
HTML->Children.back()->AttrKeys =
allocateArray<StringRef>(Keys, TransientArena);
llvm::SmallVector<StringRef, 1> Values = {"test"};
HTML->Children.back()->AttrValues =
allocateArray<StringRef>(Values, TransientArena);
}
HTML->Children.emplace_back(allocatePtr<CommentInfo>());
HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
HTML->Children.back()->Name = "li";