[lld-macho] Parallelize linker optimization hint processing
This commit moves the parsing of linker optimization hints into `ARM64::applyOptimizationHints`. This lets us avoid allocating memory for holding the parsed information, and moves work out of `ObjFile::parse`, which is not parallelized at the moment. This change reduces the overhead of processing LOHs to 25-30 ms when linking Chromium Framework on my M1 machine; previously it took close to 100 ms. There's no statistically significant change in runtime for a --threads=1 link. Performance figures with all 8 cores utilized: N Min Max Median Avg Stddev x 20 3.8027232 3.8760762 3.8505335 3.8454145 0.026352574 + 20 3.7019017 3.8660538 3.7546209 3.7620371 0.032680043 Difference at 95.0% confidence -0.0833775 +/- 0.019 -2.16823% +/- 0.494094% (Student's t, pooled s = 0.0296854) Differential Revision: https://reviews.llvm.org/D133439
This commit is contained in:
parent
396ed327bb
commit
a8843ec952
@ -18,6 +18,7 @@
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/BinaryFormat/MachO.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/LEB128.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -40,8 +41,7 @@ struct ARM64 : ARM64Common {
|
||||
uint64_t selectorIndex, uint64_t gotAddr,
|
||||
uint64_t msgSendIndex) const override;
|
||||
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
|
||||
void applyOptimizationHints(uint8_t *,
|
||||
const ConcatInputSection *) const override;
|
||||
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -196,23 +196,6 @@ struct Ldr {
|
||||
ExtendType extendType;
|
||||
int64_t offset;
|
||||
};
|
||||
|
||||
class OptimizationHintContext {
|
||||
public:
|
||||
OptimizationHintContext(uint8_t *buf, const ConcatInputSection *isec)
|
||||
: buf(buf), isec(isec) {}
|
||||
|
||||
void applyAdrpAdd(uint64_t, uint64_t);
|
||||
void applyAdrpAdrp(uint64_t, uint64_t);
|
||||
void applyAdrpLdr(uint64_t, uint64_t);
|
||||
void applyAdrpLdrGot(uint64_t, uint64_t);
|
||||
void applyAdrpAddLdr(uint64_t, uint64_t, uint64_t);
|
||||
void applyAdrpLdrGotLdr(uint64_t, uint64_t, uint64_t);
|
||||
|
||||
private:
|
||||
uint8_t *buf;
|
||||
const ConcatInputSection *isec;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static bool parseAdrp(uint32_t insn, Adrp &adrp) {
|
||||
@ -347,7 +330,8 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
|
||||
// ->
|
||||
// adr xM, _foo
|
||||
// nop
|
||||
void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
|
||||
static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2) {
|
||||
uint32_t ins1 = read32le(buf + offset1);
|
||||
uint32_t ins2 = read32le(buf + offset2);
|
||||
Adrp adrp;
|
||||
@ -375,8 +359,8 @@ void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
|
||||
// ->
|
||||
// adrp xN, _foo@PAGE
|
||||
// nop
|
||||
void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
|
||||
uint64_t offset2) {
|
||||
static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2) {
|
||||
uint32_t ins1 = read32le(buf + offset1);
|
||||
uint32_t ins2 = read32le(buf + offset2);
|
||||
Adrp adrp1, adrp2;
|
||||
@ -402,7 +386,8 @@ void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
|
||||
// ->
|
||||
// nop
|
||||
// ldr xM, _foo
|
||||
void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
|
||||
static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2) {
|
||||
uint32_t ins1 = read32le(buf + offset1);
|
||||
uint32_t ins2 = read32le(buf + offset2);
|
||||
Adrp adrp;
|
||||
@ -426,15 +411,15 @@ void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
|
||||
// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
|
||||
// but they may be changed to adrp+add by relaxGotLoad(). This hint performs
|
||||
// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
|
||||
void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
|
||||
uint64_t offset2) {
|
||||
static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2) {
|
||||
uint32_t ins2 = read32le(buf + offset2);
|
||||
Add add;
|
||||
Ldr ldr;
|
||||
if (parseAdd(ins2, add))
|
||||
applyAdrpAdd(offset1, offset2);
|
||||
applyAdrpAdd(buf, isec, offset1, offset2);
|
||||
else if (parseLdr(ins2, ldr))
|
||||
applyAdrpLdr(offset1, offset2);
|
||||
applyAdrpLdr(buf, isec, offset1, offset2);
|
||||
}
|
||||
|
||||
// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
|
||||
@ -444,9 +429,9 @@ void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
|
||||
// adrp x0, _foo@PAGE
|
||||
// add x1, x0, _foo@PAGEOFF
|
||||
// ldr x2, [x1, #off]
|
||||
void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
|
||||
uint64_t offset2,
|
||||
uint64_t offset3) {
|
||||
static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2,
|
||||
uint64_t offset3) {
|
||||
uint32_t ins1 = read32le(buf + offset1);
|
||||
Adrp adrp;
|
||||
if (!parseAdrp(ins1, adrp))
|
||||
@ -512,15 +497,15 @@ void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
|
||||
// the GOT entry can be loaded with a single literal ldr instruction.
|
||||
// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
|
||||
// we perform the AdrpAddLdr transformation.
|
||||
void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
|
||||
uint64_t offset2,
|
||||
uint64_t offset3) {
|
||||
static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
|
||||
uint64_t offset1, uint64_t offset2,
|
||||
uint64_t offset3) {
|
||||
uint32_t ins2 = read32le(buf + offset2);
|
||||
Add add;
|
||||
Ldr ldr2;
|
||||
|
||||
if (parseAdd(ins2, add)) {
|
||||
applyAdrpAddLdr(offset1, offset2, offset3);
|
||||
applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
|
||||
} else if (parseLdr(ins2, ldr2)) {
|
||||
// adrp x1, _foo@GOTPAGE
|
||||
// ldr x2, [x1, _foo@GOTPAGEOFF]
|
||||
@ -559,47 +544,167 @@ void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
|
||||
}
|
||||
}
|
||||
|
||||
void ARM64::applyOptimizationHints(uint8_t *buf,
|
||||
const ConcatInputSection *isec) const {
|
||||
assert(isec);
|
||||
static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
|
||||
unsigned int n = 0;
|
||||
uint64_t value = decodeULEB128(ptr, &n, end);
|
||||
ptr += n;
|
||||
return value;
|
||||
}
|
||||
|
||||
// Note: Some of these optimizations might not be valid when shared regions
|
||||
// are in use. Will need to revisit this if splitSegInfo is added.
|
||||
template <typename Callback>
|
||||
static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
|
||||
std::array<uint64_t, 3> args;
|
||||
|
||||
OptimizationHintContext ctx(buf, isec);
|
||||
for (const OptimizationHint &hint : isec->optimizationHints) {
|
||||
switch (hint.type) {
|
||||
case LOH_ARM64_ADRP_ADRP:
|
||||
// This is done in another pass because the other optimization hints
|
||||
// might cause its targets to be turned into NOPs.
|
||||
for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
|
||||
uint64_t type = readValue(p, end);
|
||||
if (type == 0)
|
||||
break;
|
||||
|
||||
uint64_t argCount = readValue(p, end);
|
||||
// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
|
||||
if (argCount > 3) {
|
||||
for (unsigned i = 0; i < argCount; ++i)
|
||||
readValue(p, end);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < argCount; ++i)
|
||||
args[i] = readValue(p, end);
|
||||
callback(type, ArrayRef<uint64_t>(args.data(), argCount));
|
||||
}
|
||||
}
|
||||
|
||||
// On RISC architectures like arm64, materializing a memory address generally
|
||||
// takes multiple instructions. If the referenced symbol is located close enough
|
||||
// in memory, fewer instructions are needed.
|
||||
//
|
||||
// Linker optimization hints record where addresses are computed. After
|
||||
// addresses have been assigned, if possible, we change them to a shorter
|
||||
// sequence of instructions. The size of the binary is not modified; the
|
||||
// eliminated instructions are replaced with NOPs. This still leads to faster
|
||||
// code as the CPU can skip over NOPs quickly.
|
||||
//
|
||||
// LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
|
||||
// points to a sequence of ULEB128-encoded numbers. Each entry specifies a
|
||||
// transformation kind, and 2 or 3 addresses where the instructions are located.
|
||||
void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const {
|
||||
ArrayRef<uint8_t> data = obj.getOptimizationHints();
|
||||
if (data.empty())
|
||||
return;
|
||||
|
||||
const ConcatInputSection *section = nullptr;
|
||||
uint64_t sectionAddr = 0;
|
||||
uint8_t *buf = nullptr;
|
||||
|
||||
auto findSection = [&](uint64_t addr) {
|
||||
if (section && addr >= sectionAddr &&
|
||||
addr < sectionAddr + section->getSize())
|
||||
return true;
|
||||
|
||||
auto secIt = std::prev(llvm::upper_bound(
|
||||
obj.sections, addr,
|
||||
[](uint64_t off, const Section *sec) { return off < sec->addr; }));
|
||||
const Section *sec = *secIt;
|
||||
|
||||
auto subsecIt = std::prev(llvm::upper_bound(
|
||||
sec->subsections, addr - sec->addr,
|
||||
[](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
|
||||
const Subsection &subsec = *subsecIt;
|
||||
const ConcatInputSection *isec =
|
||||
dyn_cast_or_null<ConcatInputSection>(subsec.isec);
|
||||
if (!isec || isec->shouldOmitFromOutput())
|
||||
return false;
|
||||
|
||||
section = isec;
|
||||
sectionAddr = subsec.offset + sec->addr;
|
||||
buf = outBuf + section->outSecOff + section->parent->fileOff;
|
||||
return true;
|
||||
};
|
||||
|
||||
auto isValidOffset = [&](uint64_t offset) {
|
||||
if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) {
|
||||
error("linker optimization hint spans multiple sections");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
bool hasAdrpAdrp = false;
|
||||
forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
|
||||
if (kind == LOH_ARM64_ADRP_ADRP) {
|
||||
hasAdrpAdrp = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!findSection(args[0]))
|
||||
return;
|
||||
switch (kind) {
|
||||
case LOH_ARM64_ADRP_ADD:
|
||||
if (isValidOffset(args[1]))
|
||||
applyAdrpAdd(buf, section, args[0] - sectionAddr,
|
||||
args[1] - sectionAddr);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_LDR:
|
||||
ctx.applyAdrpLdr(hint.offset0, hint.offset0 + hint.delta[0]);
|
||||
if (isValidOffset(args[1]))
|
||||
applyAdrpLdr(buf, section, args[0] - sectionAddr,
|
||||
args[1] - sectionAddr);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_LDR_GOT:
|
||||
if (isValidOffset(args[1]))
|
||||
applyAdrpLdrGot(buf, section, args[0] - sectionAddr,
|
||||
args[1] - sectionAddr);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_ADD_LDR:
|
||||
ctx.applyAdrpAddLdr(hint.offset0, hint.offset0 + hint.delta[0],
|
||||
hint.offset0 + hint.delta[1]);
|
||||
if (isValidOffset(args[1]) && isValidOffset(args[2]))
|
||||
applyAdrpAddLdr(buf, section, args[0] - sectionAddr,
|
||||
args[1] - sectionAddr, args[2] - sectionAddr);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_LDR_GOT_LDR:
|
||||
ctx.applyAdrpLdrGotLdr(hint.offset0, hint.offset0 + hint.delta[0],
|
||||
hint.offset0 + hint.delta[1]);
|
||||
if (isValidOffset(args[1]) && isValidOffset(args[2]))
|
||||
applyAdrpLdrGotLdr(buf, section, args[0] - sectionAddr,
|
||||
args[1] - sectionAddr, args[2] - sectionAddr);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_ADD_STR:
|
||||
case LOH_ARM64_ADRP_LDR_GOT_STR:
|
||||
// TODO: Implement these
|
||||
break;
|
||||
case LOH_ARM64_ADRP_ADD:
|
||||
ctx.applyAdrpAdd(hint.offset0, hint.offset0 + hint.delta[0]);
|
||||
break;
|
||||
case LOH_ARM64_ADRP_LDR_GOT:
|
||||
ctx.applyAdrpLdrGot(hint.offset0, hint.offset0 + hint.delta[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (const OptimizationHint &hint : isec->optimizationHints)
|
||||
if (hint.type == LOH_ARM64_ADRP_ADRP)
|
||||
ctx.applyAdrpAdrp(hint.offset0, hint.offset0 + hint.delta[0]);
|
||||
if (!hasAdrpAdrp)
|
||||
return;
|
||||
|
||||
// AdrpAdrp optimization hints are performed in a second pass because they
|
||||
// might interfere with other transformations. For instance, consider the
|
||||
// following input:
|
||||
//
|
||||
// adrp x0, _foo@PAGE
|
||||
// add x1, x0, _foo@PAGEOFF
|
||||
// adrp x0, _bar@PAGE
|
||||
// add x2, x0, _bar@PAGEOFF
|
||||
//
|
||||
// If we perform the AdrpAdrp relaxation first, we get:
|
||||
//
|
||||
// adrp x0, _foo@PAGE
|
||||
// add x1, x0, _foo@PAGEOFF
|
||||
// nop
|
||||
// add x2, x0, _bar@PAGEOFF
|
||||
//
|
||||
// If we then apply AdrpAdd to the first two instructions, the add will have a
|
||||
// garbage value in x0:
|
||||
//
|
||||
// adr x1, _foo
|
||||
// nop
|
||||
// nop
|
||||
// add x2, x0, _bar@PAGEOFF
|
||||
forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
|
||||
if (kind != LOH_ARM64_ADRP_ADRP)
|
||||
return;
|
||||
if (!findSection(args[0]))
|
||||
return;
|
||||
if (isValidOffset(args[1]))
|
||||
applyAdrpAdrp(buf, section, args[0] - sectionAddr, args[1] - sectionAddr);
|
||||
});
|
||||
}
|
||||
|
||||
TargetInfo *macho::createARM64TargetInfo() {
|
||||
|
@ -463,155 +463,6 @@ static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
|
||||
return *it;
|
||||
}
|
||||
|
||||
// Linker optimization hints mark a sequence of instructions used for
|
||||
// synthesizing an address which that be transformed into a faster sequence. The
|
||||
// transformations depend on conditions that are determined at link time, like
|
||||
// the distance to the referenced symbol or its alignment.
|
||||
//
|
||||
// Each hint has a type and refers to 2 or 3 instructions. Each of those
|
||||
// instructions must have a corresponding relocation. After addresses have been
|
||||
// finalized and relocations have been performed, we check if the requirements
|
||||
// hold, and perform the optimizations if they do.
|
||||
//
|
||||
// Similar linker relaxations exist for ELF as well, with the difference being
|
||||
// that the explicit marking allows for the relaxation of non-consecutive
|
||||
// relocations too.
|
||||
//
|
||||
// The specific types of hints are documented in Arch/ARM64.cpp
|
||||
void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) {
|
||||
auto expectedArgCount = [](uint8_t type) {
|
||||
switch (type) {
|
||||
case LOH_ARM64_ADRP_ADRP:
|
||||
case LOH_ARM64_ADRP_LDR:
|
||||
case LOH_ARM64_ADRP_ADD:
|
||||
case LOH_ARM64_ADRP_LDR_GOT:
|
||||
return 2;
|
||||
case LOH_ARM64_ADRP_ADD_LDR:
|
||||
case LOH_ARM64_ADRP_ADD_STR:
|
||||
case LOH_ARM64_ADRP_LDR_GOT_LDR:
|
||||
case LOH_ARM64_ADRP_LDR_GOT_STR:
|
||||
return 3;
|
||||
}
|
||||
return -1;
|
||||
};
|
||||
|
||||
// Each hint contains at least 4 ULEB128-encoded fields, so in the worst case,
|
||||
// there are data.size() / 4 LOHs. It's a huge overestimation though, as
|
||||
// offsets are unlikely to fall in the 0-127 byte range, so we pre-allocate
|
||||
// half as much.
|
||||
optimizationHints.reserve(data.size() / 8);
|
||||
|
||||
for (const uint8_t *p = data.begin(); p < data.end();) {
|
||||
const ptrdiff_t inputOffset = p - data.begin();
|
||||
unsigned int n = 0;
|
||||
uint8_t type = decodeULEB128(p, &n, data.end());
|
||||
p += n;
|
||||
|
||||
// An entry of type 0 terminates the list.
|
||||
if (type == 0)
|
||||
break;
|
||||
|
||||
int expectedCount = expectedArgCount(type);
|
||||
if (LLVM_UNLIKELY(expectedCount == -1)) {
|
||||
error("Linker optimization hint at offset " + Twine(inputOffset) +
|
||||
" has unknown type " + Twine(type));
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t argCount = decodeULEB128(p, &n, data.end());
|
||||
p += n;
|
||||
|
||||
if (LLVM_UNLIKELY(argCount != expectedCount)) {
|
||||
error("Linker optimization hint at offset " + Twine(inputOffset) +
|
||||
" has " + Twine(argCount) + " arguments instead of the expected " +
|
||||
Twine(expectedCount));
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t offset0 = decodeULEB128(p, &n, data.end());
|
||||
p += n;
|
||||
|
||||
int16_t delta[2];
|
||||
for (int i = 0; i < argCount - 1; ++i) {
|
||||
uint64_t address = decodeULEB128(p, &n, data.end());
|
||||
p += n;
|
||||
int64_t d = address - offset0;
|
||||
if (LLVM_UNLIKELY(d > std::numeric_limits<int16_t>::max() ||
|
||||
d < std::numeric_limits<int16_t>::min())) {
|
||||
error("Linker optimization hint at offset " + Twine(inputOffset) +
|
||||
" has addresses too far apart");
|
||||
return;
|
||||
}
|
||||
delta[i] = d;
|
||||
}
|
||||
|
||||
optimizationHints.push_back({offset0, {delta[0], delta[1]}, type});
|
||||
}
|
||||
|
||||
// We sort the per-object vector of optimization hints so each section only
|
||||
// needs to hold an ArrayRef to a contiguous range of hints.
|
||||
llvm::sort(optimizationHints,
|
||||
[](const OptimizationHint &a, const OptimizationHint &b) {
|
||||
return a.offset0 < b.offset0;
|
||||
});
|
||||
|
||||
auto section = sections.begin();
|
||||
auto subsection = (*section)->subsections.begin();
|
||||
uint64_t subsectionBase = 0;
|
||||
uint64_t subsectionEnd = 0;
|
||||
|
||||
auto updateAddr = [&]() {
|
||||
subsectionBase = (*section)->addr + subsection->offset;
|
||||
subsectionEnd = subsectionBase + subsection->isec->getSize();
|
||||
};
|
||||
|
||||
auto advanceSubsection = [&]() {
|
||||
if (section == sections.end())
|
||||
return;
|
||||
++subsection;
|
||||
while (subsection == (*section)->subsections.end()) {
|
||||
++section;
|
||||
if (section == sections.end())
|
||||
return;
|
||||
subsection = (*section)->subsections.begin();
|
||||
}
|
||||
};
|
||||
|
||||
updateAddr();
|
||||
auto hintStart = optimizationHints.begin();
|
||||
for (auto hintEnd = hintStart, end = optimizationHints.end(); hintEnd != end;
|
||||
++hintEnd) {
|
||||
if (hintEnd->offset0 >= subsectionEnd) {
|
||||
subsection->isec->optimizationHints =
|
||||
ArrayRef<OptimizationHint>(&*hintStart, hintEnd - hintStart);
|
||||
|
||||
hintStart = hintEnd;
|
||||
while (hintStart->offset0 >= subsectionEnd) {
|
||||
advanceSubsection();
|
||||
if (section == sections.end())
|
||||
break;
|
||||
updateAddr();
|
||||
assert(hintStart->offset0 >= subsectionBase);
|
||||
}
|
||||
}
|
||||
|
||||
hintEnd->offset0 -= subsectionBase;
|
||||
for (int i = 0, count = expectedArgCount(hintEnd->type); i < count - 1;
|
||||
++i) {
|
||||
if (LLVM_UNLIKELY(
|
||||
hintEnd->delta[i] < -static_cast<int64_t>(hintEnd->offset0) ||
|
||||
hintEnd->delta[i] >=
|
||||
static_cast<int64_t>(subsectionEnd - hintEnd->offset0))) {
|
||||
error("Linker optimization hint spans multiple sections");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (section != sections.end())
|
||||
subsection->isec->optimizationHints = ArrayRef<OptimizationHint>(
|
||||
&*hintStart, optimizationHints.end() - hintStart);
|
||||
}
|
||||
|
||||
template <class SectionHeader>
|
||||
static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
|
||||
relocation_info rel) {
|
||||
@ -1129,11 +980,6 @@ template <class LP> void ObjFile::parse() {
|
||||
if (!sections[i]->subsections.empty())
|
||||
parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
|
||||
|
||||
if (!config->ignoreOptimizationHints)
|
||||
if (auto *cmd = findCommand<linkedit_data_command>(
|
||||
hdr, LC_LINKER_OPTIMIZATION_HINT))
|
||||
parseOptimizationHints({buf + cmd->dataoff, cmd->datasize});
|
||||
|
||||
parseDebugInfo();
|
||||
|
||||
Section *ehFrameSection = nullptr;
|
||||
@ -1213,6 +1059,14 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
|
||||
c->datasize / sizeof(data_in_code_entry)};
|
||||
}
|
||||
|
||||
ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
|
||||
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
|
||||
if (auto *cmd =
|
||||
findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
|
||||
return {buf + cmd->dataoff, cmd->datasize};
|
||||
return {};
|
||||
}
|
||||
|
||||
// Create pointers from symbols to their associated compact unwind entries.
|
||||
void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
|
||||
for (const Subsection &subsection : compactUnwindSection.subsections) {
|
||||
|
@ -159,6 +159,7 @@ public:
|
||||
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
|
||||
bool lazy = false, bool forceHidden = false);
|
||||
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
|
||||
ArrayRef<uint8_t> getOptimizationHints() const;
|
||||
template <class LP> void parse();
|
||||
|
||||
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
|
||||
@ -176,7 +177,6 @@ public:
|
||||
std::vector<ConcatInputSection *> debugSections;
|
||||
std::vector<CallGraphEntry> callGraph;
|
||||
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
|
||||
std::vector<OptimizationHint> optimizationHints;
|
||||
std::vector<AliasSymbol *> aliases;
|
||||
|
||||
private:
|
||||
@ -193,7 +193,6 @@ private:
|
||||
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
|
||||
const SectionHeader &, Section &);
|
||||
void parseDebugInfo();
|
||||
void parseOptimizationHints(ArrayRef<uint8_t> data);
|
||||
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
|
||||
void registerCompactUnwind(Section &compactUnwindSection);
|
||||
void registerEhFrames(Section &ehFrameSection);
|
||||
|
@ -29,8 +29,8 @@ using namespace lld::macho;
|
||||
// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
|
||||
// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
|
||||
// so account for that.
|
||||
static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) ==
|
||||
sizeof(std::vector<Reloc>) + 104,
|
||||
static_assert(sizeof(void *) != 8 ||
|
||||
sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88,
|
||||
"Try to minimize ConcatInputSection's size, we create many "
|
||||
"instances of it");
|
||||
|
||||
@ -219,8 +219,6 @@ void ConcatInputSection::writeTo(uint8_t *buf) {
|
||||
}
|
||||
target->relocateOne(loc, r, referentVA, getVA() + r.offset);
|
||||
}
|
||||
|
||||
target->applyOptimizationHints(buf, this);
|
||||
}
|
||||
|
||||
ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,
|
||||
|
@ -83,7 +83,6 @@ public:
|
||||
OutputSection *parent = nullptr;
|
||||
ArrayRef<uint8_t> data;
|
||||
std::vector<Reloc> relocs;
|
||||
ArrayRef<OptimizationHint> optimizationHints;
|
||||
// The symbols that belong to this InputSection, sorted by value. With
|
||||
// .subsections_via_symbols, there is typically only one element here.
|
||||
llvm::TinyPtrVector<Defined *> symbols;
|
||||
|
@ -69,14 +69,6 @@ struct Reloc {
|
||||
addend(addend), referent(referent) {}
|
||||
};
|
||||
|
||||
struct OptimizationHint {
|
||||
// Offset of the first address within the containing InputSection.
|
||||
uint64_t offset0;
|
||||
// Offset of the other addresses relative to the first one.
|
||||
int16_t delta[2];
|
||||
uint8_t type;
|
||||
};
|
||||
|
||||
bool validateSymbolRelocation(const Symbol *, const InputSection *,
|
||||
const Reloc &);
|
||||
|
||||
|
@ -27,7 +27,7 @@ class Symbol;
|
||||
class Defined;
|
||||
class DylibSymbol;
|
||||
class InputSection;
|
||||
class ConcatInputSection;
|
||||
class ObjFile;
|
||||
|
||||
class TargetInfo {
|
||||
public:
|
||||
@ -97,8 +97,7 @@ public:
|
||||
llvm_unreachable("Unsupported architecture for dtrace symbols");
|
||||
}
|
||||
|
||||
virtual void applyOptimizationHints(uint8_t *buf,
|
||||
const ConcatInputSection *) const {};
|
||||
virtual void applyOptimizationHints(uint8_t *, const ObjFile &) const {};
|
||||
|
||||
uint32_t magic;
|
||||
llvm::MachO::CPUType cpuType;
|
||||
|
@ -60,6 +60,7 @@ public:
|
||||
|
||||
void openFile();
|
||||
void writeSections();
|
||||
void applyOptimizationHints();
|
||||
void writeUuid();
|
||||
void writeCodeSignature();
|
||||
void writeOutputFile();
|
||||
@ -1072,6 +1073,18 @@ void Writer::writeSections() {
|
||||
});
|
||||
}
|
||||
|
||||
void Writer::applyOptimizationHints() {
|
||||
if (config->arch() != AK_arm64 || config->ignoreOptimizationHints)
|
||||
return;
|
||||
|
||||
uint8_t *buf = buffer->getBufferStart();
|
||||
TimeTraceScope timeScope("Apply linker optimization hints");
|
||||
parallelForEach(inputFiles, [buf](const InputFile *file) {
|
||||
if (const auto *objFile = dyn_cast<ObjFile>(file))
|
||||
target->applyOptimizationHints(buf, *objFile);
|
||||
});
|
||||
}
|
||||
|
||||
// In order to utilize multiple cores, we first split the buffer into chunks,
|
||||
// compute a hash for each chunk, and then compute a hash value of the hash
|
||||
// values.
|
||||
@ -1114,6 +1127,7 @@ void Writer::writeOutputFile() {
|
||||
if (errorCount())
|
||||
return;
|
||||
writeSections();
|
||||
applyOptimizationHints();
|
||||
writeUuid();
|
||||
writeCodeSignature();
|
||||
|
||||
|
@ -1,15 +1,10 @@
|
||||
# REQUIRES: aarch64
|
||||
|
||||
# RUN: rm -rf %t; split-file %s %t
|
||||
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/section.s -o %t/section.o
|
||||
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/far.s -o %t/far.o
|
||||
# RUN: not %lld -arch arm64 %t/section.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTION
|
||||
# RUN: not %lld -arch arm64 %t/far.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=FAR
|
||||
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
|
||||
# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s
|
||||
|
||||
# SECTION: error: Linker optimization hint spans multiple sections
|
||||
# FAR: error: Linker optimization hint at offset 0 has addresses too far apart
|
||||
# CHECK: error: linker optimization hint spans multiple sections
|
||||
|
||||
#--- section.s
|
||||
.globl _main
|
||||
_main:
|
||||
L1:
|
||||
@ -23,17 +18,3 @@ _target:
|
||||
|
||||
.loh AdrpAdd L1, L2
|
||||
.subsections_via_symbols
|
||||
|
||||
#--- far.s
|
||||
.globl _main
|
||||
_main:
|
||||
L1:
|
||||
adrp x0, _target@PAGE
|
||||
.zero 0x8000
|
||||
L2:
|
||||
add x0, x0, _target@PAGEOFF
|
||||
|
||||
_target:
|
||||
|
||||
.loh AdrpAdd L1, L2
|
||||
.subsections_via_symbols
|
||||
|
@ -17,6 +17,11 @@
|
||||
## Not an adrp instruction (invalid)
|
||||
# CHECK-NEXT: nop
|
||||
# CHECK-NEXT: adrp x4
|
||||
## Other relaxations take precedence over AdrpAdrp
|
||||
# CHECK-NEXT: adr x6
|
||||
# CHECK-NEXT: nop
|
||||
# CHECK-NEXT: adr x6
|
||||
# CHECK-NEXT: nop
|
||||
|
||||
.text
|
||||
.align 2
|
||||
@ -39,6 +44,14 @@ L7:
|
||||
nop
|
||||
L8:
|
||||
adrp x4, _baz@PAGE
|
||||
L9:
|
||||
adrp x5, _foo@PAGE
|
||||
L10:
|
||||
add x6, x5, _foo@PAGEOFF
|
||||
L11:
|
||||
adrp x5, _bar@PAGE
|
||||
L12:
|
||||
add x6, x5, _bar@PAGEOFF
|
||||
|
||||
.data
|
||||
.align 12
|
||||
@ -54,3 +67,6 @@ _baz:
|
||||
.loh AdrpAdrp L3, L4
|
||||
.loh AdrpAdrp L5, L6
|
||||
.loh AdrpAdrp L7, L8
|
||||
.loh AdrpAdrp L9, L11
|
||||
.loh AdrpAdd L9, L10
|
||||
.loh AdrpAdd L11, L12
|
||||
|
Loading…
x
Reference in New Issue
Block a user