[lld-macho] Parallelize linker optimization hint processing

This commit moves the parsing of linker optimization hints into
`ARM64::applyOptimizationHints`. This lets us avoid allocating memory
for holding the parsed information, and moves work out of
`ObjFile::parse`, which is not parallelized at the moment.

This change reduces the overhead of processing LOHs to 25-30 ms when
linking Chromium Framework on my M1 machine; previously it took close to
100 ms.

There's no statistically significant change in runtime for a --threads=1
link.

Performance figures with all 8 cores utilized:

      N           Min           Max        Median           Avg        Stddev
  x  20     3.8027232     3.8760762     3.8505335     3.8454145   0.026352574
  +  20     3.7019017     3.8660538     3.7546209     3.7620371   0.032680043
  Difference at 95.0% confidence
  	-0.0833775 +/- 0.019
  	-2.16823% +/- 0.494094%
  	(Student's t, pooled s = 0.0296854)

Differential Revision: https://reviews.llvm.org/D133439
This commit is contained in:
Daniel Bertalan 2022-09-05 19:03:15 +02:00
parent 396ed327bb
commit a8843ec952
No known key found for this signature in database
10 changed files with 211 additions and 254 deletions

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
@ -40,8 +41,7 @@ struct ARM64 : ARM64Common {
uint64_t selectorIndex, uint64_t gotAddr,
uint64_t msgSendIndex) const override;
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
void applyOptimizationHints(uint8_t *,
const ConcatInputSection *) const override;
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
};
} // namespace
@ -196,23 +196,6 @@ struct Ldr {
ExtendType extendType;
int64_t offset;
};
class OptimizationHintContext {
public:
OptimizationHintContext(uint8_t *buf, const ConcatInputSection *isec)
: buf(buf), isec(isec) {}
void applyAdrpAdd(uint64_t, uint64_t);
void applyAdrpAdrp(uint64_t, uint64_t);
void applyAdrpLdr(uint64_t, uint64_t);
void applyAdrpLdrGot(uint64_t, uint64_t);
void applyAdrpAddLdr(uint64_t, uint64_t, uint64_t);
void applyAdrpLdrGotLdr(uint64_t, uint64_t, uint64_t);
private:
uint8_t *buf;
const ConcatInputSection *isec;
};
} // namespace
static bool parseAdrp(uint32_t insn, Adrp &adrp) {
@ -347,7 +330,8 @@ static void writeImmediateLdr(void *loc, const Ldr &ldr) {
// ->
// adr xM, _foo
// nop
void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
static void applyAdrpAdd(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
@ -375,8 +359,8 @@ void OptimizationHintContext::applyAdrpAdd(uint64_t offset1, uint64_t offset2) {
// ->
// adrp xN, _foo@PAGE
// nop
void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
uint64_t offset2) {
static void applyAdrpAdrp(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp1, adrp2;
@ -402,7 +386,8 @@ void OptimizationHintContext::applyAdrpAdrp(uint64_t offset1,
// ->
// nop
// ldr xM, _foo
void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
static void applyAdrpLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins1 = read32le(buf + offset1);
uint32_t ins2 = read32le(buf + offset2);
Adrp adrp;
@ -426,15 +411,15 @@ void OptimizationHintContext::applyAdrpLdr(uint64_t offset1, uint64_t offset2) {
// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
// but they may be changed to adrp+add by relaxGotLoad(). This hint performs
// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
uint64_t offset2) {
static void applyAdrpLdrGot(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2) {
uint32_t ins2 = read32le(buf + offset2);
Add add;
Ldr ldr;
if (parseAdd(ins2, add))
applyAdrpAdd(offset1, offset2);
applyAdrpAdd(buf, isec, offset1, offset2);
else if (parseLdr(ins2, ldr))
applyAdrpLdr(offset1, offset2);
applyAdrpLdr(buf, isec, offset1, offset2);
}
// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
@ -444,9 +429,9 @@ void OptimizationHintContext::applyAdrpLdrGot(uint64_t offset1,
// adrp x0, _foo@PAGE
// add x1, x0, _foo@PAGEOFF
// ldr x2, [x1, #off]
void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
uint64_t offset2,
uint64_t offset3) {
static void applyAdrpAddLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2,
uint64_t offset3) {
uint32_t ins1 = read32le(buf + offset1);
Adrp adrp;
if (!parseAdrp(ins1, adrp))
@ -512,15 +497,15 @@ void OptimizationHintContext::applyAdrpAddLdr(uint64_t offset1,
// the GOT entry can be loaded with a single literal ldr instruction.
// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
// we perform the AdrpAddLdr transformation.
void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
uint64_t offset2,
uint64_t offset3) {
static void applyAdrpLdrGotLdr(uint8_t *buf, const ConcatInputSection *isec,
uint64_t offset1, uint64_t offset2,
uint64_t offset3) {
uint32_t ins2 = read32le(buf + offset2);
Add add;
Ldr ldr2;
if (parseAdd(ins2, add)) {
applyAdrpAddLdr(offset1, offset2, offset3);
applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
} else if (parseLdr(ins2, ldr2)) {
// adrp x1, _foo@GOTPAGE
// ldr x2, [x1, _foo@GOTPAGEOFF]
@ -559,47 +544,167 @@ void OptimizationHintContext::applyAdrpLdrGotLdr(uint64_t offset1,
}
}
void ARM64::applyOptimizationHints(uint8_t *buf,
const ConcatInputSection *isec) const {
assert(isec);
static uint64_t readValue(const uint8_t *&ptr, const uint8_t *end) {
unsigned int n = 0;
uint64_t value = decodeULEB128(ptr, &n, end);
ptr += n;
return value;
}
// Note: Some of these optimizations might not be valid when shared regions
// are in use. Will need to revisit this if splitSegInfo is added.
template <typename Callback>
static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
std::array<uint64_t, 3> args;
OptimizationHintContext ctx(buf, isec);
for (const OptimizationHint &hint : isec->optimizationHints) {
switch (hint.type) {
case LOH_ARM64_ADRP_ADRP:
// This is done in another pass because the other optimization hints
// might cause its targets to be turned into NOPs.
for (const uint8_t *p = data.begin(), *end = data.end(); p < end;) {
uint64_t type = readValue(p, end);
if (type == 0)
break;
uint64_t argCount = readValue(p, end);
// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
if (argCount > 3) {
for (unsigned i = 0; i < argCount; ++i)
readValue(p, end);
continue;
}
for (unsigned i = 0; i < argCount; ++i)
args[i] = readValue(p, end);
callback(type, ArrayRef<uint64_t>(args.data(), argCount));
}
}
// On RISC architectures like arm64, materializing a memory address generally
// takes multiple instructions. If the referenced symbol is located close enough
// in memory, fewer instructions are needed.
//
// Linker optimization hints record where addresses are computed. After
// addresses have been assigned, if possible, we change them to a shorter
// sequence of instructions. The size of the binary is not modified; the
// eliminated instructions are replaced with NOPs. This still leads to faster
// code as the CPU can skip over NOPs quickly.
//
// LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
// points to a sequence of ULEB128-encoded numbers. Each entry specifies a
// transformation kind, and 2 or 3 addresses where the instructions are located.
void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const {
ArrayRef<uint8_t> data = obj.getOptimizationHints();
if (data.empty())
return;
const ConcatInputSection *section = nullptr;
uint64_t sectionAddr = 0;
uint8_t *buf = nullptr;
auto findSection = [&](uint64_t addr) {
if (section && addr >= sectionAddr &&
addr < sectionAddr + section->getSize())
return true;
auto secIt = std::prev(llvm::upper_bound(
obj.sections, addr,
[](uint64_t off, const Section *sec) { return off < sec->addr; }));
const Section *sec = *secIt;
auto subsecIt = std::prev(llvm::upper_bound(
sec->subsections, addr - sec->addr,
[](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
const Subsection &subsec = *subsecIt;
const ConcatInputSection *isec =
dyn_cast_or_null<ConcatInputSection>(subsec.isec);
if (!isec || isec->shouldOmitFromOutput())
return false;
section = isec;
sectionAddr = subsec.offset + sec->addr;
buf = outBuf + section->outSecOff + section->parent->fileOff;
return true;
};
auto isValidOffset = [&](uint64_t offset) {
if (offset < sectionAddr || offset >= sectionAddr + section->getSize()) {
error("linker optimization hint spans multiple sections");
return false;
}
return true;
};
bool hasAdrpAdrp = false;
forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
if (kind == LOH_ARM64_ADRP_ADRP) {
hasAdrpAdrp = true;
return;
}
if (!findSection(args[0]))
return;
switch (kind) {
case LOH_ARM64_ADRP_ADD:
if (isValidOffset(args[1]))
applyAdrpAdd(buf, section, args[0] - sectionAddr,
args[1] - sectionAddr);
break;
case LOH_ARM64_ADRP_LDR:
ctx.applyAdrpLdr(hint.offset0, hint.offset0 + hint.delta[0]);
if (isValidOffset(args[1]))
applyAdrpLdr(buf, section, args[0] - sectionAddr,
args[1] - sectionAddr);
break;
case LOH_ARM64_ADRP_LDR_GOT:
if (isValidOffset(args[1]))
applyAdrpLdrGot(buf, section, args[0] - sectionAddr,
args[1] - sectionAddr);
break;
case LOH_ARM64_ADRP_ADD_LDR:
ctx.applyAdrpAddLdr(hint.offset0, hint.offset0 + hint.delta[0],
hint.offset0 + hint.delta[1]);
if (isValidOffset(args[1]) && isValidOffset(args[2]))
applyAdrpAddLdr(buf, section, args[0] - sectionAddr,
args[1] - sectionAddr, args[2] - sectionAddr);
break;
case LOH_ARM64_ADRP_LDR_GOT_LDR:
ctx.applyAdrpLdrGotLdr(hint.offset0, hint.offset0 + hint.delta[0],
hint.offset0 + hint.delta[1]);
if (isValidOffset(args[1]) && isValidOffset(args[2]))
applyAdrpLdrGotLdr(buf, section, args[0] - sectionAddr,
args[1] - sectionAddr, args[2] - sectionAddr);
break;
case LOH_ARM64_ADRP_ADD_STR:
case LOH_ARM64_ADRP_LDR_GOT_STR:
// TODO: Implement these
break;
case LOH_ARM64_ADRP_ADD:
ctx.applyAdrpAdd(hint.offset0, hint.offset0 + hint.delta[0]);
break;
case LOH_ARM64_ADRP_LDR_GOT:
ctx.applyAdrpLdrGot(hint.offset0, hint.offset0 + hint.delta[0]);
break;
}
}
});
for (const OptimizationHint &hint : isec->optimizationHints)
if (hint.type == LOH_ARM64_ADRP_ADRP)
ctx.applyAdrpAdrp(hint.offset0, hint.offset0 + hint.delta[0]);
if (!hasAdrpAdrp)
return;
// AdrpAdrp optimization hints are performed in a second pass because they
// might interfere with other transformations. For instance, consider the
// following input:
//
// adrp x0, _foo@PAGE
// add x1, x0, _foo@PAGEOFF
// adrp x0, _bar@PAGE
// add x2, x0, _bar@PAGEOFF
//
// If we perform the AdrpAdrp relaxation first, we get:
//
// adrp x0, _foo@PAGE
// add x1, x0, _foo@PAGEOFF
// nop
// add x2, x0, _bar@PAGEOFF
//
// If we then apply AdrpAdd to the first two instructions, the add will have a
// garbage value in x0:
//
// adr x1, _foo
// nop
// nop
// add x2, x0, _bar@PAGEOFF
forEachHint(data, [&](uint64_t kind, ArrayRef<uint64_t> args) {
if (kind != LOH_ARM64_ADRP_ADRP)
return;
if (!findSection(args[0]))
return;
if (isValidOffset(args[1]))
applyAdrpAdrp(buf, section, args[0] - sectionAddr, args[1] - sectionAddr);
});
}
TargetInfo *macho::createARM64TargetInfo() {

View File

@ -463,155 +463,6 @@ static Defined *findSymbolAtOffset(const ConcatInputSection *isec,
return *it;
}
// Linker optimization hints mark a sequence of instructions used for
// synthesizing an address which that be transformed into a faster sequence. The
// transformations depend on conditions that are determined at link time, like
// the distance to the referenced symbol or its alignment.
//
// Each hint has a type and refers to 2 or 3 instructions. Each of those
// instructions must have a corresponding relocation. After addresses have been
// finalized and relocations have been performed, we check if the requirements
// hold, and perform the optimizations if they do.
//
// Similar linker relaxations exist for ELF as well, with the difference being
// that the explicit marking allows for the relaxation of non-consecutive
// relocations too.
//
// The specific types of hints are documented in Arch/ARM64.cpp
void ObjFile::parseOptimizationHints(ArrayRef<uint8_t> data) {
auto expectedArgCount = [](uint8_t type) {
switch (type) {
case LOH_ARM64_ADRP_ADRP:
case LOH_ARM64_ADRP_LDR:
case LOH_ARM64_ADRP_ADD:
case LOH_ARM64_ADRP_LDR_GOT:
return 2;
case LOH_ARM64_ADRP_ADD_LDR:
case LOH_ARM64_ADRP_ADD_STR:
case LOH_ARM64_ADRP_LDR_GOT_LDR:
case LOH_ARM64_ADRP_LDR_GOT_STR:
return 3;
}
return -1;
};
// Each hint contains at least 4 ULEB128-encoded fields, so in the worst case,
// there are data.size() / 4 LOHs. It's a huge overestimation though, as
// offsets are unlikely to fall in the 0-127 byte range, so we pre-allocate
// half as much.
optimizationHints.reserve(data.size() / 8);
for (const uint8_t *p = data.begin(); p < data.end();) {
const ptrdiff_t inputOffset = p - data.begin();
unsigned int n = 0;
uint8_t type = decodeULEB128(p, &n, data.end());
p += n;
// An entry of type 0 terminates the list.
if (type == 0)
break;
int expectedCount = expectedArgCount(type);
if (LLVM_UNLIKELY(expectedCount == -1)) {
error("Linker optimization hint at offset " + Twine(inputOffset) +
" has unknown type " + Twine(type));
return;
}
uint8_t argCount = decodeULEB128(p, &n, data.end());
p += n;
if (LLVM_UNLIKELY(argCount != expectedCount)) {
error("Linker optimization hint at offset " + Twine(inputOffset) +
" has " + Twine(argCount) + " arguments instead of the expected " +
Twine(expectedCount));
return;
}
uint64_t offset0 = decodeULEB128(p, &n, data.end());
p += n;
int16_t delta[2];
for (int i = 0; i < argCount - 1; ++i) {
uint64_t address = decodeULEB128(p, &n, data.end());
p += n;
int64_t d = address - offset0;
if (LLVM_UNLIKELY(d > std::numeric_limits<int16_t>::max() ||
d < std::numeric_limits<int16_t>::min())) {
error("Linker optimization hint at offset " + Twine(inputOffset) +
" has addresses too far apart");
return;
}
delta[i] = d;
}
optimizationHints.push_back({offset0, {delta[0], delta[1]}, type});
}
// We sort the per-object vector of optimization hints so each section only
// needs to hold an ArrayRef to a contiguous range of hints.
llvm::sort(optimizationHints,
[](const OptimizationHint &a, const OptimizationHint &b) {
return a.offset0 < b.offset0;
});
auto section = sections.begin();
auto subsection = (*section)->subsections.begin();
uint64_t subsectionBase = 0;
uint64_t subsectionEnd = 0;
auto updateAddr = [&]() {
subsectionBase = (*section)->addr + subsection->offset;
subsectionEnd = subsectionBase + subsection->isec->getSize();
};
auto advanceSubsection = [&]() {
if (section == sections.end())
return;
++subsection;
while (subsection == (*section)->subsections.end()) {
++section;
if (section == sections.end())
return;
subsection = (*section)->subsections.begin();
}
};
updateAddr();
auto hintStart = optimizationHints.begin();
for (auto hintEnd = hintStart, end = optimizationHints.end(); hintEnd != end;
++hintEnd) {
if (hintEnd->offset0 >= subsectionEnd) {
subsection->isec->optimizationHints =
ArrayRef<OptimizationHint>(&*hintStart, hintEnd - hintStart);
hintStart = hintEnd;
while (hintStart->offset0 >= subsectionEnd) {
advanceSubsection();
if (section == sections.end())
break;
updateAddr();
assert(hintStart->offset0 >= subsectionBase);
}
}
hintEnd->offset0 -= subsectionBase;
for (int i = 0, count = expectedArgCount(hintEnd->type); i < count - 1;
++i) {
if (LLVM_UNLIKELY(
hintEnd->delta[i] < -static_cast<int64_t>(hintEnd->offset0) ||
hintEnd->delta[i] >=
static_cast<int64_t>(subsectionEnd - hintEnd->offset0))) {
error("Linker optimization hint spans multiple sections");
return;
}
}
}
if (section != sections.end())
subsection->isec->optimizationHints = ArrayRef<OptimizationHint>(
&*hintStart, optimizationHints.end() - hintStart);
}
template <class SectionHeader>
static bool validateRelocationInfo(InputFile *file, const SectionHeader &sec,
relocation_info rel) {
@ -1129,11 +980,6 @@ template <class LP> void ObjFile::parse() {
if (!sections[i]->subsections.empty())
parseRelocations(sectionHeaders, sectionHeaders[i], *sections[i]);
if (!config->ignoreOptimizationHints)
if (auto *cmd = findCommand<linkedit_data_command>(
hdr, LC_LINKER_OPTIMIZATION_HINT))
parseOptimizationHints({buf + cmd->dataoff, cmd->datasize});
parseDebugInfo();
Section *ehFrameSection = nullptr;
@ -1213,6 +1059,14 @@ ArrayRef<data_in_code_entry> ObjFile::getDataInCode() const {
c->datasize / sizeof(data_in_code_entry)};
}
ArrayRef<uint8_t> ObjFile::getOptimizationHints() const {
const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
if (auto *cmd =
findCommand<linkedit_data_command>(buf, LC_LINKER_OPTIMIZATION_HINT))
return {buf + cmd->dataoff, cmd->datasize};
return {};
}
// Create pointers from symbols to their associated compact unwind entries.
void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
for (const Subsection &subsection : compactUnwindSection.subsections) {

View File

@ -159,6 +159,7 @@ public:
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
bool lazy = false, bool forceHidden = false);
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
ArrayRef<uint8_t> getOptimizationHints() const;
template <class LP> void parse();
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
@ -176,7 +177,6 @@ public:
std::vector<ConcatInputSection *> debugSections;
std::vector<CallGraphEntry> callGraph;
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
std::vector<OptimizationHint> optimizationHints;
std::vector<AliasSymbol *> aliases;
private:
@ -193,7 +193,6 @@ private:
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
const SectionHeader &, Section &);
void parseDebugInfo();
void parseOptimizationHints(ArrayRef<uint8_t> data);
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
void registerCompactUnwind(Section &compactUnwindSection);
void registerEhFrames(Section &ehFrameSection);

View File

@ -29,8 +29,8 @@ using namespace lld::macho;
// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
// so account for that.
static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) ==
sizeof(std::vector<Reloc>) + 104,
static_assert(sizeof(void *) != 8 ||
sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88,
"Try to minimize ConcatInputSection's size, we create many "
"instances of it");
@ -219,8 +219,6 @@ void ConcatInputSection::writeTo(uint8_t *buf) {
}
target->relocateOne(loc, r, referentVA, getVA() + r.offset);
}
target->applyOptimizationHints(buf, this);
}
ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,

View File

@ -83,7 +83,6 @@ public:
OutputSection *parent = nullptr;
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
ArrayRef<OptimizationHint> optimizationHints;
// The symbols that belong to this InputSection, sorted by value. With
// .subsections_via_symbols, there is typically only one element here.
llvm::TinyPtrVector<Defined *> symbols;

View File

@ -69,14 +69,6 @@ struct Reloc {
addend(addend), referent(referent) {}
};
struct OptimizationHint {
// Offset of the first address within the containing InputSection.
uint64_t offset0;
// Offset of the other addresses relative to the first one.
int16_t delta[2];
uint8_t type;
};
bool validateSymbolRelocation(const Symbol *, const InputSection *,
const Reloc &);

View File

@ -27,7 +27,7 @@ class Symbol;
class Defined;
class DylibSymbol;
class InputSection;
class ConcatInputSection;
class ObjFile;
class TargetInfo {
public:
@ -97,8 +97,7 @@ public:
llvm_unreachable("Unsupported architecture for dtrace symbols");
}
virtual void applyOptimizationHints(uint8_t *buf,
const ConcatInputSection *) const {};
virtual void applyOptimizationHints(uint8_t *, const ObjFile &) const {};
uint32_t magic;
llvm::MachO::CPUType cpuType;

View File

@ -60,6 +60,7 @@ public:
void openFile();
void writeSections();
void applyOptimizationHints();
void writeUuid();
void writeCodeSignature();
void writeOutputFile();
@ -1072,6 +1073,18 @@ void Writer::writeSections() {
});
}
void Writer::applyOptimizationHints() {
if (config->arch() != AK_arm64 || config->ignoreOptimizationHints)
return;
uint8_t *buf = buffer->getBufferStart();
TimeTraceScope timeScope("Apply linker optimization hints");
parallelForEach(inputFiles, [buf](const InputFile *file) {
if (const auto *objFile = dyn_cast<ObjFile>(file))
target->applyOptimizationHints(buf, *objFile);
});
}
// In order to utilize multiple cores, we first split the buffer into chunks,
// compute a hash for each chunk, and then compute a hash value of the hash
// values.
@ -1114,6 +1127,7 @@ void Writer::writeOutputFile() {
if (errorCount())
return;
writeSections();
applyOptimizationHints();
writeUuid();
writeCodeSignature();

View File

@ -1,15 +1,10 @@
# REQUIRES: aarch64
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/section.s -o %t/section.o
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/far.s -o %t/far.o
# RUN: not %lld -arch arm64 %t/section.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTION
# RUN: not %lld -arch arm64 %t/far.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=FAR
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o
# RUN: not %lld -arch arm64 %t.o -o /dev/null 2>&1 | FileCheck %s
# SECTION: error: Linker optimization hint spans multiple sections
# FAR: error: Linker optimization hint at offset 0 has addresses too far apart
# CHECK: error: linker optimization hint spans multiple sections
#--- section.s
.globl _main
_main:
L1:
@ -23,17 +18,3 @@ _target:
.loh AdrpAdd L1, L2
.subsections_via_symbols
#--- far.s
.globl _main
_main:
L1:
adrp x0, _target@PAGE
.zero 0x8000
L2:
add x0, x0, _target@PAGEOFF
_target:
.loh AdrpAdd L1, L2
.subsections_via_symbols

View File

@ -17,6 +17,11 @@
## Not an adrp instruction (invalid)
# CHECK-NEXT: nop
# CHECK-NEXT: adrp x4
## Other relaxations take precedence over AdrpAdrp
# CHECK-NEXT: adr x6
# CHECK-NEXT: nop
# CHECK-NEXT: adr x6
# CHECK-NEXT: nop
.text
.align 2
@ -39,6 +44,14 @@ L7:
nop
L8:
adrp x4, _baz@PAGE
L9:
adrp x5, _foo@PAGE
L10:
add x6, x5, _foo@PAGEOFF
L11:
adrp x5, _bar@PAGE
L12:
add x6, x5, _bar@PAGEOFF
.data
.align 12
@ -54,3 +67,6 @@ _baz:
.loh AdrpAdrp L3, L4
.loh AdrpAdrp L5, L6
.loh AdrpAdrp L7, L8
.loh AdrpAdrp L9, L11
.loh AdrpAdd L9, L10
.loh AdrpAdd L11, L12