[lld-macho] Have relocation address included in range-check error message

This makes it easier to debug those errors. See e.g. https://github.com/llvm/llvm-project/issues/52767#issuecomment-1028713943

We take the approach of 'reverse-engineering' the InputSection from the
output buffer offset. This provides for a cleaner Target API, and is
similar to LLD-ELF's implementation of getErrorPlace().

Reviewed By: #lld-macho, Roger

Differential Revision: https://reviews.llvm.org/D118903
This commit is contained in:
Jez Ng 2022-02-28 21:56:38 -05:00
parent e03d216c28
commit a552fb2a86
8 changed files with 143 additions and 87 deletions

View File

@ -38,48 +38,10 @@ int64_t ARM64Common::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
} }
} }
// For instruction relocations (load, store, add), the base static void writeValue(uint8_t *loc, const Reloc &r, uint64_t value) {
// instruction is pre-populated in the text section. A pre-populated
// instruction has opcode & register-operand bits set, with immediate
// operands zeroed. We read it from text, OR-in the immediate
// operands, then write-back the completed instruction.
void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
uint64_t pc) const {
uint32_t base = ((r.length == 2) ? read32le(loc) : 0);
switch (r.type) {
case ARM64_RELOC_BRANCH26:
value = encodeBranch26(r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
if (r.length == 2)
checkInt(r, value, 32);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
checkInt(r, value, 32);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
case ARM64_RELOC_TLVP_LOAD_PAGE21: {
assert(r.pcrel);
value = encodePage21(r, base, pageBits(value) - pageBits(pc));
break;
}
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
assert(!r.pcrel);
value = encodePageOff12(base, value);
break;
default:
llvm_unreachable("unexpected relocation type");
}
switch (r.length) { switch (r.length) {
case 2: case 2:
checkInt(loc, r, value, 32);
write32le(loc, value); write32le(loc, value);
break; break;
case 3: case 3:
@ -90,6 +52,45 @@ void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
} }
} }
// For instruction relocations (load, store, add), the base
// instruction is pre-populated in the text section. A pre-populated
// instruction has opcode & register-operand bits set, with immediate
// operands zeroed. We read it from text, OR-in the immediate
// operands, then write-back the completed instruction.
void ARM64Common::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
uint64_t pc) const {
auto loc32 = reinterpret_cast<uint32_t *>(loc);
uint32_t base = ((r.length == 2) ? read32le(loc) : 0);
switch (r.type) {
case ARM64_RELOC_BRANCH26:
encodeBranch26(loc32, r, base, value - pc);
break;
case ARM64_RELOC_SUBTRACTOR:
case ARM64_RELOC_UNSIGNED:
writeValue(loc, r, value);
break;
case ARM64_RELOC_POINTER_TO_GOT:
if (r.pcrel)
value -= pc;
writeValue(loc, r, value);
break;
case ARM64_RELOC_PAGE21:
case ARM64_RELOC_GOT_LOAD_PAGE21:
case ARM64_RELOC_TLVP_LOAD_PAGE21:
assert(r.pcrel);
encodePage21(loc32, r, base, pageBits(value) - pageBits(pc));
break;
case ARM64_RELOC_PAGEOFF12:
case ARM64_RELOC_GOT_LOAD_PAGEOFF12:
case ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
assert(!r.pcrel);
encodePageOff12(loc32, base, value);
break;
default:
llvm_unreachable("unexpected relocation type");
}
}
void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const { void ARM64Common::relaxGotLoad(uint8_t *loc, uint8_t type) const {
// The instruction format comments below are quoted from // The instruction format comments below are quoted from
// Arm® Architecture Reference Manual // Arm® Architecture Reference Manual

View File

@ -40,16 +40,18 @@ inline uint64_t bitField(uint64_t value, int right, int width, int left) {
// | | imm26 | // | | imm26 |
// +-----------+---------------------------------------------------+ // +-----------+---------------------------------------------------+
inline uint64_t encodeBranch26(const Reloc &r, uint64_t base, uint64_t va) { inline void encodeBranch26(uint32_t *loc, const Reloc &r, uint32_t base,
checkInt(r, va, 28); uint64_t va) {
checkInt(loc, r, va, 28);
// Since branch destinations are 4-byte aligned, the 2 least- // Since branch destinations are 4-byte aligned, the 2 least-
// significant bits are 0. They are right shifted off the end. // significant bits are 0. They are right shifted off the end.
return (base | bitField(va, 2, 26, 0)); llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
} }
inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) { inline void encodeBranch26(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
checkInt(d, va, 28); uint64_t va) {
return (base | bitField(va, 2, 26, 0)); checkInt(loc, d, va, 28);
llvm::support::endian::write32le(loc, base | bitField(va, 2, 26, 0));
} }
// 30 29 23 5 // 30 29 23 5
@ -57,14 +59,18 @@ inline uint64_t encodeBranch26(SymbolDiagnostic d, uint64_t base, uint64_t va) {
// | |ilo| | immhi | | // | |ilo| | immhi | |
// +-+---+---------+-------------------------------------+---------+ // +-+---+---------+-------------------------------------+---------+
inline uint64_t encodePage21(const Reloc &r, uint64_t base, uint64_t va) { inline void encodePage21(uint32_t *loc, const Reloc &r, uint32_t base,
checkInt(r, va, 35); uint64_t va) {
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5)); checkInt(loc, r, va, 35);
llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
bitField(va, 14, 19, 5));
} }
inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) { inline void encodePage21(uint32_t *loc, SymbolDiagnostic d, uint32_t base,
checkInt(d, va, 35); uint64_t va) {
return (base | bitField(va, 12, 2, 29) | bitField(va, 14, 19, 5)); checkInt(loc, d, va, 35);
llvm::support::endian::write32le(loc, base | bitField(va, 12, 2, 29) |
bitField(va, 14, 19, 5));
} }
// 21 10 // 21 10
@ -72,7 +78,7 @@ inline uint64_t encodePage21(SymbolDiagnostic d, uint64_t base, uint64_t va) {
// | | imm12 | | // | | imm12 | |
// +-------------------+-----------------------+-------------------+ // +-------------------+-----------------------+-------------------+
inline uint64_t encodePageOff12(uint32_t base, uint64_t va) { inline void encodePageOff12(uint32_t *loc, uint32_t base, uint64_t va) {
int scale = 0; int scale = 0;
if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store if ((base & 0x3b00'0000) == 0x3900'0000) { // load/store
scale = base >> 30; scale = base >> 30;
@ -82,7 +88,8 @@ inline uint64_t encodePageOff12(uint32_t base, uint64_t va) {
// TODO(gkm): extract embedded addend and warn if != 0 // TODO(gkm): extract embedded addend and warn if != 0
// uint64_t addend = ((base & 0x003FFC00) >> 10); // uint64_t addend = ((base & 0x003FFC00) >> 10);
return (base | bitField(va, scale, 12 - scale, 10)); llvm::support::endian::write32le(loc,
base | bitField(va, scale, 12 - scale, 10));
} }
inline uint64_t pageBits(uint64_t address) { inline uint64_t pageBits(uint64_t address) {
@ -99,9 +106,9 @@ inline void writeStub(uint8_t *buf8, const uint32_t stubCode[3],
pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize); pageBits(in.stubs->addr + sym.stubsIndex * stubCodeSize);
uint64_t lazyPointerVA = uint64_t lazyPointerVA =
in.lazyPointers->addr + sym.stubsIndex * LP::wordSize; in.lazyPointers->addr + sym.stubsIndex * LP::wordSize;
buf32[0] = encodePage21({&sym, "stub"}, stubCode[0], encodePage21(&buf32[0], {&sym, "stub"}, stubCode[0],
pageBits(lazyPointerVA) - pcPageBits); pageBits(lazyPointerVA) - pcPageBits);
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA); encodePageOff12(&buf32[1], stubCode[1], lazyPointerVA);
buf32[2] = stubCode[2]; buf32[2] = stubCode[2];
} }
@ -114,15 +121,15 @@ inline void writeStubHelperHeader(uint8_t *buf8,
}; };
uint64_t loaderVA = in.imageLoaderCache->getVA(); uint64_t loaderVA = in.imageLoaderCache->getVA();
SymbolDiagnostic d = {nullptr, "stub header helper"}; SymbolDiagnostic d = {nullptr, "stub header helper"};
buf32[0] = encodePage21(d, stubHelperHeaderCode[0], encodePage21(&buf32[0], d, stubHelperHeaderCode[0],
pageBits(loaderVA) - pcPageBits(0)); pageBits(loaderVA) - pcPageBits(0));
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA); encodePageOff12(&buf32[1], stubHelperHeaderCode[1], loaderVA);
buf32[2] = stubHelperHeaderCode[2]; buf32[2] = stubHelperHeaderCode[2];
uint64_t binderVA = uint64_t binderVA =
in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize; in.got->addr + in.stubHelper->stubBinder->gotIndex * LP::wordSize;
buf32[3] = encodePage21(d, stubHelperHeaderCode[3], encodePage21(&buf32[3], d, stubHelperHeaderCode[3],
pageBits(binderVA) - pcPageBits(3)); pageBits(binderVA) - pcPageBits(3));
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA); encodePageOff12(&buf32[4], stubHelperHeaderCode[4], binderVA);
buf32[5] = stubHelperHeaderCode[5]; buf32[5] = stubHelperHeaderCode[5];
} }
@ -133,7 +140,7 @@ inline void writeStubHelperEntry(uint8_t *buf8,
auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); }; auto pcVA = [entryVA](int i) { return entryVA + i * sizeof(uint32_t); };
uint64_t stubHelperHeaderVA = in.stubHelper->addr; uint64_t stubHelperHeaderVA = in.stubHelper->addr;
buf32[0] = stubHelperEntryCode[0]; buf32[0] = stubHelperEntryCode[0];
buf32[1] = encodeBranch26({&sym, "stub helper"}, stubHelperEntryCode[1], encodeBranch26(&buf32[1], {&sym, "stub helper"}, stubHelperEntryCode[1],
stubHelperHeaderVA - pcVA(1)); stubHelperHeaderVA - pcVA(1));
buf32[2] = sym.lazyBindOffset; buf32[2] = sym.lazyBindOffset;
} }

View File

@ -102,9 +102,9 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
switch (r.length) { switch (r.length) {
case 2: case 2:
if (r.type == X86_64_RELOC_UNSIGNED) if (r.type == X86_64_RELOC_UNSIGNED)
checkUInt(r, value, 32); checkUInt(loc, r, value, 32);
else else
checkInt(r, value, 32); checkInt(loc, r, value, 32);
write32le(loc, value); write32le(loc, value);
break; break;
case 3: case 3:
@ -127,7 +127,7 @@ void X86_64::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value,
static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr, static void writeRipRelative(SymbolDiagnostic d, uint8_t *buf, uint64_t bufAddr,
uint64_t bufOff, uint64_t destAddr) { uint64_t bufOff, uint64_t destAddr) {
uint64_t rip = bufAddr + bufOff; uint64_t rip = bufAddr + bufOff;
checkInt(d, destAddr - rip, 32); checkInt(buf, d, destAddr - rip, 32);
// For the instructions we care about, the RIP-relative address is always // For the instructions we care about, the RIP-relative address is always
// stored in the last 4 bytes of the instruction. // stored in the last 4 bytes of the instruction.
write32le(buf + bufOff - 4, destAddr - rip); write32le(buf + bufOff - 4, destAddr - rip);

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "Relocations.h" #include "Relocations.h"
#include "ConcatOutputSection.h"
#include "Symbols.h" #include "Symbols.h"
#include "SyntheticSections.h" #include "SyntheticSections.h"
#include "Target.h" #include "Target.h"
@ -38,19 +39,65 @@ bool macho::validateSymbolRelocation(const Symbol *sym,
return valid; return valid;
} }
void macho::reportRangeError(const Reloc &r, const Twine &v, uint8_t bits, // Given an offset in the output buffer, figure out which ConcatInputSection (if
int64_t min, uint64_t max) { // any) maps to it. At the same time, update the offset such that it is relative
// to the InputSection rather than to the output buffer.
//
// Obtaining the InputSection allows us to have better error diagnostics.
// However, many of our relocation-handling methods do not take the InputSection
// as a parameter. Since we are already passing the buffer offsets to our Target
// methods, this function allows us to emit better errors without threading an
// additional InputSection argument through the call stack.
//
// This is implemented as a slow linear search through OutputSegments,
// OutputSections, and finally the InputSections themselves. However, this
// function should be called only on error paths, so some overhead is fine.
static InputSection *offsetToInputSection(uint64_t *off) {
for (OutputSegment *seg : outputSegments) {
if (*off < seg->fileOff || *off >= seg->fileOff + seg->fileSize)
continue;
const std::vector<OutputSection *> &sections = seg->getSections();
size_t osecIdx = 0;
for (; osecIdx < sections.size(); ++osecIdx)
if (*off < sections[osecIdx]->fileOff)
break;
assert(osecIdx > 0);
// We should be only calling this function on offsets that belong to
// ConcatOutputSections.
auto *osec = cast<ConcatOutputSection>(sections[osecIdx - 1]);
*off -= osec->fileOff;
size_t isecIdx = 0;
for (; isecIdx < osec->inputs.size(); ++isecIdx) {
const ConcatInputSection *isec = osec->inputs[isecIdx];
if (*off < isec->outSecOff)
break;
}
assert(isecIdx > 0);
ConcatInputSection *isec = osec->inputs[isecIdx - 1];
*off -= isec->outSecOff;
return isec;
}
return nullptr;
}
void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v,
uint8_t bits, int64_t min, uint64_t max) {
std::string hint; std::string hint;
uint64_t off = reinterpret_cast<const uint8_t *>(loc) - in.bufferStart;
const InputSection *isec = offsetToInputSection(&off);
std::string locStr = isec ? isec->getLocation(off) : "(invalid location)";
if (auto *sym = r.referent.dyn_cast<Symbol *>()) if (auto *sym = r.referent.dyn_cast<Symbol *>())
hint = "; references " + toString(*sym); hint = "; references " + toString(*sym);
// TODO: get location of reloc using something like LLD-ELF's getErrorPlace() error(locStr + ": relocation " + target->getRelocAttrs(r.type).name +
error("relocation " + target->getRelocAttrs(r.type).name +
" is out of range: " + v + " is not in [" + Twine(min) + ", " + " is out of range: " + v + " is not in [" + Twine(min) + ", " +
Twine(max) + "]" + hint); Twine(max) + "]" + hint);
} }
void macho::reportRangeError(SymbolDiagnostic d, const Twine &v, uint8_t bits, void macho::reportRangeError(void *loc, SymbolDiagnostic d, const Twine &v,
int64_t min, uint64_t max) { uint8_t bits, int64_t min, uint64_t max) {
// FIXME: should we use `loc` somehow to provide a better error message?
std::string hint; std::string hint;
if (d.symbol) if (d.symbol)
hint = "; references " + toString(*d.symbol); hint = "; references " + toString(*d.symbol);

View File

@ -70,28 +70,28 @@ bool validateSymbolRelocation(const Symbol *, const InputSection *,
* v: The value the relocation is attempting to encode * v: The value the relocation is attempting to encode
* bits: The number of bits actually available to encode this relocation * bits: The number of bits actually available to encode this relocation
*/ */
void reportRangeError(const Reloc &, const llvm::Twine &v, uint8_t bits, void reportRangeError(void *loc, const Reloc &, const llvm::Twine &v,
int64_t min, uint64_t max); uint8_t bits, int64_t min, uint64_t max);
struct SymbolDiagnostic { struct SymbolDiagnostic {
const Symbol *symbol; const Symbol *symbol;
llvm::StringRef reason; llvm::StringRef reason;
}; };
void reportRangeError(SymbolDiagnostic, const llvm::Twine &v, uint8_t bits, void reportRangeError(void *loc, SymbolDiagnostic, const llvm::Twine &v,
int64_t min, uint64_t max); uint8_t bits, int64_t min, uint64_t max);
template <typename Diagnostic> template <typename Diagnostic>
inline void checkInt(Diagnostic d, int64_t v, int bits) { inline void checkInt(void *loc, Diagnostic d, int64_t v, int bits) {
if (v != llvm::SignExtend64(v, bits)) if (v != llvm::SignExtend64(v, bits))
reportRangeError(d, llvm::Twine(v), bits, llvm::minIntN(bits), reportRangeError(loc, d, llvm::Twine(v), bits, llvm::minIntN(bits),
llvm::maxIntN(bits)); llvm::maxIntN(bits));
} }
template <typename Diagnostic> template <typename Diagnostic>
inline void checkUInt(Diagnostic d, uint64_t v, int bits) { inline void checkUInt(void *loc, Diagnostic d, uint64_t v, int bits) {
if ((v >> bits) != 0) if ((v >> bits) != 0)
reportRangeError(d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits)); reportRangeError(loc, d, llvm::Twine(v), bits, 0, llvm::maxUIntN(bits));
} }
inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) { inline void writeAddress(uint8_t *loc, uint64_t addr, uint8_t length) {

View File

@ -595,6 +595,7 @@ private:
}; };
struct InStruct { struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr; MachHeaderSection *header = nullptr;
CStringSection *cStringSection = nullptr; CStringSection *cStringSection = nullptr;
WordLiteralSection *wordLiteralSection = nullptr; WordLiteralSection *wordLiteralSection = nullptr;

View File

@ -1048,10 +1048,10 @@ void Writer::openFile() {
FileOutputBuffer::F_executable); FileOutputBuffer::F_executable);
if (!bufferOrErr) if (!bufferOrErr)
error("failed to open " + config->outputFile + ": " + fatal("failed to open " + config->outputFile + ": " +
llvm::toString(bufferOrErr.takeError())); llvm::toString(bufferOrErr.takeError()));
else
buffer = std::move(*bufferOrErr); buffer = std::move(*bufferOrErr);
in.bufferStart = buffer->getBufferStart();
} }
void Writer::writeSections() { void Writer::writeSections() {

View File

@ -6,8 +6,8 @@
# RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib # RUN: %lld -dylib %t/bar.o -o %t/libbar.dylib
# RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s # RUN: not %lld -lSystem -o /dev/null %t/libbar.dylib %t/test.o 2>&1 | FileCheck %s
# CHECK: error: relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo # CHECK: error: {{.*}}test.o:(symbol _main+0xd): relocation UNSIGNED is out of range: [[#]] is not in [0, 4294967295]; references _foo
# CHECK: error: relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo # CHECK: error: {{.*}}test.o:(symbol _main+0x3): relocation GOT_LOAD is out of range: [[#]] is not in [-2147483648, 2147483647]; references _foo
# CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar # CHECK: error: stub is out of range: [[#]] is not in [-2147483648, 2147483647]; references _bar
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647] # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]
# CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647] # CHECK: error: stub helper header is out of range: [[#]] is not in [-2147483648, 2147483647]