From 42cc454777274a06933abcd098ec3281158717f9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 30 Mar 2026 20:51:30 -0700 Subject: [PATCH] [ELF] Optimize binary search in getSectionPiece (#187916) Two optimizations to make getSectionPiece O(1) for common cases: 1. For non-string fixed-size merge sections, use direct computation (offset / entsize) instead of binary search. 2. Pre-resolve piece indices for non-section Defined symbols during splitSections. The piece index and intra-piece offset are packed into Defined::value as ((pieceIdx+1) << 32) | intraPieceOffset, replacing repeated binary searches (MarkLive, includeInSymtab, getRelocTargetVA) with a single upfront resolution. On x86-64, references to mergeable strings use local labels: leaq .LC0(%rip), %rax # R_X86_64_PC32 .LC0-4 The relocations use non-section symbols and benefit from optimization 2. On many other targets (e.g. AArch64), the addend is 0 and the assembler adjusts such relocations to reference section symbols, which still use binary search. On a clang link (clang-relassert reproduce tarball, x86-64): - --gc-sections: 1.05x as fast --- lld/ELF/InputSection.cpp | 12 ++++++++++++ lld/ELF/InputSection.h | 7 ++++++- lld/ELF/SyntheticSections.cpp | 20 ++++++++++++++++++++ lld/test/ELF/merge-piece-oob.s | 2 +- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 855d520b6194..fc82433cdcc9 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -1548,13 +1548,25 @@ void MergeInputSection::splitIntoPieces() { } SectionPiece &MergeInputSection::getSectionPiece(uint64_t offset) { + // Pre-resolved by splitSections: pieceIdx + 1 in upper bits, + // intra-piece offset in lower bits. + if (uint32_t idx = offset >> mergeValueShift) + return pieces[idx - 1]; assert(offset < content().size()); + // For non-string fixed-size records, piece index = offset / entsize. + if (!(flags & SHF_STRINGS)) + return pieces[offset / entsize]; return partition_point( pieces, [=](SectionPiece p) { return p.inputOff <= offset; })[-1]; } // Return the offset in an output section for a given input offset. uint64_t MergeInputSection::getParentOffset(uint64_t offset) const { + // Pre-resolved by splitSections: pieceIdx + 1 in upper bits, + // intra-piece offset in lower bits. + if (uint32_t idx = offset >> mergeValueShift) + return pieces[idx - 1].outputOff + + (offset & llvm::maskTrailingOnes(mergeValueShift)); const SectionPiece &piece = getSectionPiece(offset); return piece.outputOff + (offset - piece.inputOff); } diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 30df85d7aa10..061af258ce8a 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -326,6 +326,10 @@ struct SectionPiece { static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); +// Used by splitSections to pre-resolve section piece indexes. 32 bits of offset +// supports section piece up to 4GB. +constexpr unsigned mergeValueShift = 32; + // This corresponds to a SHF_MERGE section of an input file. class MergeInputSection : public InputSectionBase { public: @@ -339,7 +343,8 @@ public: void splitIntoPieces(); // Translate an offset in the input section to an offset in the parent - // MergeSyntheticSection. + // MergeSyntheticSection. If the offset was pre-resolved by + // resolveSymbolPieces (upper bits non-zero), this is O(1). uint64_t getParentOffset(uint64_t offset) const; // Splittable sections are handled as a sequence of data diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 6b51fa41f0bf..2da3f1afeb82 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3809,6 +3809,26 @@ template void elf::splitSections(Ctx &ctx) { else if (auto *eh = dyn_cast(sec)) eh->split(); } + + // For non-section Defined symbols in merge sections, pre-resolve the piece + // index to avoid potentially repeated binary search (MarkLive, RelocScan, + // includeInSymtab). Encode each non-section Defined symbol's value as + // ((pieceIdx + 1) << mergeValueShift) | intraPieceOffset. + auto resolve = [](Defined *d) { + auto *ms = dyn_cast_or_null(d->section); + if (!ms || d->isSection()) + return; + SectionPiece &piece = ms->getSectionPiece(d->value); + uint32_t idx = &piece - ms->pieces.data(); + uint64_t off = d->value - piece.inputOff; + d->value = ((uint64_t)(idx + 1) << mergeValueShift) | off; + }; + for (Symbol *sym : file->getLocalSymbols()) + if (auto *d = dyn_cast(sym)) + resolve(d); + for (Symbol *sym : file->getGlobalSymbols()) + if (auto *d = dyn_cast(sym); d && d->file == file) + resolve(d); }); } diff --git a/lld/test/ELF/merge-piece-oob.s b/lld/test/ELF/merge-piece-oob.s index 1ff34768a4d1..d2bf9fab443a 100644 --- a/lld/test/ELF/merge-piece-oob.s +++ b/lld/test/ELF/merge-piece-oob.s @@ -12,7 +12,7 @@ # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xffffffffffffffff is outside the section ## .rodata.str1.1 is "abc\0" (4 bytes). offset<=size is accepted. # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.rodata.str1.1): offset 0x5 is outside the section -## .data.retain references .foo-1 as well. +## .data.retain references .foo-2 as well. # CHECK-NEXT: [[PREFIX]]: {{.*}}:(.foo): offset 0xfffffffffffffffe is outside the section ## Test that --gc-sections with an out-of-bounds offset doesn't crash.