[ELF] Separate relative and non-relative dynamic relocations (#187959)
Previously, the flow was: 1. Parallel scan adds relative relocs to per-thread `relocsVec` 2. `mergeRels()` copies all into `relocs` 3. `partitionRels()` uses `stable_partition` to separate Now, relative relocs are routed at `addReloc` time by checking `reloc.type == relativeRel`. In `mergeRels`, sharded entries are classified through the same `addReloc` path rather than blindly appended. `relocsVec` may contain non-relative entries like `R_AARCH64_AUTH_RELATIVE`. This eliminates the `stable_partition` on the full relocation vector (543K entries for clang) and avoids copying relative relocations into `relocs` only to move them out again. Linking an x86_64 release+assertions build of clang is 1.04x as fast. `numRelativeRelocs` caches `relativeRelocs.size()` at `finalizeContents` time for `DT_RELACOUNT`. Using a live `relativeRelocs.size()` would cause `DynamicSection::writeTo` to emit an extra entry when thunks add relocs after `.dynamic` is sized, overflowing into adjacent sections. Tested by ppc64-long-branch-rel14.s.
This commit is contained in:
parent
5567572c44
commit
076226f378
@ -1483,7 +1483,8 @@ RelocationBaseSection::RelocationBaseSection(Ctx &ctx, StringRef name,
|
||||
unsigned concurrency)
|
||||
: SyntheticSection(ctx, name, type, SHF_ALLOC, ctx.arg.wordsize),
|
||||
dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag),
|
||||
relocsVec(concurrency), combreloc(combreloc) {}
|
||||
relocsVec(concurrency), relativeRel(ctx.target->relativeRel),
|
||||
combreloc(combreloc) {}
|
||||
|
||||
void RelocationBaseSection::addSymbolReloc(
|
||||
RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
|
||||
@ -1503,29 +1504,24 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
|
||||
}
|
||||
|
||||
void RelocationBaseSection::mergeRels() {
|
||||
size_t newSize = relocs.size();
|
||||
size_t newSize = relativeRelocs.size();
|
||||
for (const auto &v : relocsVec)
|
||||
newSize += v.size();
|
||||
relocs.reserve(newSize);
|
||||
relativeRelocs.reserve(newSize);
|
||||
// Classify relocsVec entries into relativeRelocs or relocs. Note that
|
||||
// relocsVec may contain non-relative entries (e.g. R_AARCH64_AUTH_RELATIVE)
|
||||
// so we must check the type.
|
||||
for (const auto &v : relocsVec)
|
||||
llvm::append_range(relocs, v);
|
||||
for (const DynamicReloc &r : v)
|
||||
addReloc(r);
|
||||
relocsVec.clear();
|
||||
}
|
||||
|
||||
void RelocationBaseSection::partitionRels() {
|
||||
if (!combreloc)
|
||||
return;
|
||||
const RelType relativeRel = ctx.target->relativeRel;
|
||||
numRelativeRelocs =
|
||||
std::stable_partition(relocs.begin(), relocs.end(),
|
||||
[=](auto &r) { return r.type == relativeRel; }) -
|
||||
relocs.begin();
|
||||
}
|
||||
|
||||
void RelocationBaseSection::finalizeContents() {
|
||||
mergeRels();
|
||||
// Compute DT_RELACOUNT to be used by part.dynamic.
|
||||
partitionRels();
|
||||
// Cache the count for DT_RELACOUNT. This must not change after
|
||||
// DynamicSection::finalizeContents sizes the .dynamic section.
|
||||
numRelativeRelocs = relativeRelocs.size();
|
||||
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
|
||||
|
||||
// When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
|
||||
@ -1551,26 +1547,29 @@ void DynamicReloc::finalize(Ctx &ctx, SymbolTableBaseSection *symt) {
|
||||
|
||||
void RelocationBaseSection::computeRels() {
|
||||
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
|
||||
parallelForEach(relativeRelocs, [&ctx = ctx, symTab](DynamicReloc &rel) {
|
||||
rel.finalize(ctx, symTab);
|
||||
});
|
||||
parallelForEach(relocs, [&ctx = ctx, symTab](DynamicReloc &rel) {
|
||||
rel.finalize(ctx, symTab);
|
||||
});
|
||||
|
||||
// Place IRELATIVE relocations last so that other dynamic relocations are
|
||||
// applied before IFUNC resolvers run.
|
||||
auto irelative = std::stable_partition(
|
||||
relocs.begin() + numRelativeRelocs, relocs.end(),
|
||||
relocs.begin(), relocs.end(),
|
||||
[t = ctx.target->iRelativeRel](auto &r) { return r.type != t; });
|
||||
|
||||
// Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to
|
||||
// place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset
|
||||
// is to make results easier to read.
|
||||
if (combreloc) {
|
||||
auto nonRelative = relocs.begin() + numRelativeRelocs;
|
||||
parallelSort(relocs.begin(), nonRelative,
|
||||
[&](auto &a, auto &b) { return a.r_offset < b.r_offset; });
|
||||
// Non-relative relocations are few, so don't bother with parallelSort.
|
||||
llvm::sort(nonRelative, irelative, [&](auto &a, auto &b) {
|
||||
parallelSort(relativeRelocs.begin(), relativeRelocs.end(),
|
||||
[](auto &a, auto &b) { return a.r_offset < b.r_offset; });
|
||||
// Non-relative relocations are few, so don't bother with parallelSort.
|
||||
if (combreloc)
|
||||
llvm::sort(relocs.begin(), irelative, [](auto &a, auto &b) {
|
||||
return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <class ELFT>
|
||||
@ -1585,7 +1584,9 @@ RelocationSection<ELFT>::RelocationSection(Ctx &ctx, StringRef name,
|
||||
|
||||
template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
|
||||
computeRels();
|
||||
for (const DynamicReloc &rel : relocs) {
|
||||
// Write relative relocations first for DT_REL[A]COUNT.
|
||||
for (const DynamicReloc &rel :
|
||||
llvm::concat<const DynamicReloc>(relativeRelocs, relocs)) {
|
||||
auto *p = reinterpret_cast<Elf_Rela *>(buf);
|
||||
p->r_offset = rel.r_offset;
|
||||
p->setSymbolAndType(rel.r_sym, rel.type, ctx.arg.isMips64EL);
|
||||
|
||||
@ -495,7 +495,10 @@ public:
|
||||
/// This overload can be used if the addends are written directly instead of
|
||||
/// using relocations on the input section (e.g. MipsGotSection::writeTo()).
|
||||
template <bool shard = false> void addReloc(const DynamicReloc &reloc) {
|
||||
relocs.push_back(reloc);
|
||||
if (combreloc && reloc.type == relativeRel)
|
||||
relativeRelocs.push_back(reloc);
|
||||
else
|
||||
relocs.push_back(reloc);
|
||||
}
|
||||
/// Add a dynamic relocation against \p sym with an optional addend.
|
||||
void addSymbolReloc(RelType dynType, InputSectionBase &isec,
|
||||
@ -532,11 +535,11 @@ public:
|
||||
{dynType, &sec, offsetInSec, isAgainstSymbol, sym, addend, expr});
|
||||
}
|
||||
bool isNeeded() const override {
|
||||
return !relocs.empty() ||
|
||||
return !relocs.empty() || !relativeRelocs.empty() ||
|
||||
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
|
||||
}
|
||||
size_t getSize() const override {
|
||||
size_t count = relocs.size();
|
||||
size_t count = relocs.size() + relativeRelocs.size();
|
||||
for (const auto &v : relocsVec)
|
||||
count += v.size();
|
||||
return count * this->entsize;
|
||||
@ -545,16 +548,16 @@ public:
|
||||
void finalizeContents() override;
|
||||
|
||||
int32_t dynamicTag, sizeDynamicTag;
|
||||
SmallVector<DynamicReloc, 0> relocs;
|
||||
SmallVector<DynamicReloc, 0> relocs, relativeRelocs;
|
||||
|
||||
protected:
|
||||
void mergeRels();
|
||||
void partitionRels();
|
||||
void computeRels();
|
||||
// Used when parallel relocation scanning adds relocations. The elements
|
||||
// will be moved into relocs by mergeRel().
|
||||
// will be classified into relativeRelocs or relocs by mergeRels().
|
||||
SmallVector<SmallVector<DynamicReloc, 0>, 0> relocsVec;
|
||||
size_t numRelativeRelocs = 0; // used by -z combreloc
|
||||
RelType relativeRel;
|
||||
bool combreloc;
|
||||
};
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user