Fix lld crash using --fix-cortex-a53-843419 (#170495)
Original crash was observed in Chromium, in [1]. The problem occurs in elf::isAArch64BTILandingPad because it didn't handle synthetic sections, which can have a nullptr as a buf, so it crashed while trying to read that buf. After fixing that, a second issue occurs: When the patched code grows too much, it gets far away from the short jump, and the current implementation assumes a R_AARCH64_JUMP26 will be enough. This PR changes the implementation to: (a) In isAArch64BTILandingPad, checks if a section is synthetic, and assumes that it'll NOT contain a landing pad, avoiding the buffer check; (b) Suppress the size rounding for thunks that preceeds section (Making the situation less likely to happen); (c) Reimplements the patch by using a R_AARCH64_ABS64 in case the patched code is still far away. [1] https://issues.chromium.org/issues/440019454 --------- Co-authored-by: Tarcisio Fischer <tarcisio.fischer@arm.com>
This commit is contained in:
parent
a5cab90071
commit
e91447f44d
@ -370,7 +370,8 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off,
|
||||
|
||||
class elf::Patch843419Section final : public SyntheticSection {
|
||||
public:
|
||||
Patch843419Section(Ctx &, InputSection *p, uint64_t off);
|
||||
Patch843419Section(Ctx &, InputSection *p, uint64_t off,
|
||||
Defined *patcheeCodeSym);
|
||||
|
||||
void writeTo(uint8_t *buf) override;
|
||||
|
||||
@ -390,7 +391,8 @@ public:
|
||||
Symbol *patchSym;
|
||||
};
|
||||
|
||||
Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
|
||||
Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off,
|
||||
Defined *patcheeCodeSym)
|
||||
: SyntheticSection(ctx, ".text.patch", SHT_PROGBITS,
|
||||
SHF_ALLOC | SHF_EXECINSTR, 4),
|
||||
patchee(p), patcheeOffset(off) {
|
||||
@ -399,6 +401,9 @@ Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
|
||||
ctx, ctx.saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())),
|
||||
STT_FUNC, 0, getSize(), *this);
|
||||
addSyntheticLocal(ctx, ctx.saver.save("$x"), STT_NOTYPE, 0, 0, *this);
|
||||
int64_t retToPatcheeSymOffset =
|
||||
getLDSTAddr() - p->getVA(patcheeCodeSym->value) + 4;
|
||||
addReloc({R_PC, R_AARCH64_JUMP26, 4, retToPatcheeSymOffset, patcheeCodeSym});
|
||||
}
|
||||
|
||||
uint64_t Patch843419Section::getLDSTAddr() const {
|
||||
@ -410,13 +415,8 @@ void Patch843419Section::writeTo(uint8_t *buf) {
|
||||
// patchee Section.
|
||||
write32le(buf, read32le(patchee->content().begin() + patcheeOffset));
|
||||
|
||||
// Apply any relocation transferred from the original patchee section.
|
||||
// Apply relocations
|
||||
ctx.target->relocateAlloc(*this, buf);
|
||||
|
||||
// Return address is the next instruction after the one we have just copied.
|
||||
uint64_t s = getLDSTAddr() + 4;
|
||||
uint64_t p = patchSym->getVA(ctx) + 4;
|
||||
ctx.target->relocateNoSym(buf + 4, R_AARCH64_JUMP26, s - p);
|
||||
}
|
||||
|
||||
void AArch64Err843419Patcher::init() {
|
||||
@ -443,11 +443,14 @@ void AArch64Err843419Patcher::init() {
|
||||
auto *def = dyn_cast<Defined>(b);
|
||||
if (!def)
|
||||
continue;
|
||||
if (!isCodeMapSymbol(def) && !isDataMapSymbol(def))
|
||||
if (!def->isSection() && !isCodeMapSymbol(def) && !isDataMapSymbol(def))
|
||||
continue;
|
||||
if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
|
||||
if (sec->flags & SHF_EXECINSTR)
|
||||
sectionMap[sec].push_back(def);
|
||||
if (auto *sec = dyn_cast_or_null<InputSection>(def->section)) {
|
||||
if (def->isSection())
|
||||
sectionMap[sec].first = def;
|
||||
else if (sec->flags & SHF_EXECINSTR)
|
||||
sectionMap[sec].second.push_back(def);
|
||||
}
|
||||
}
|
||||
}
|
||||
// For each InputSection make sure the mapping symbols are in sorted in
|
||||
@ -455,7 +458,7 @@ void AArch64Err843419Patcher::init() {
|
||||
// the same type. For example we must remove the redundant $d.1 from $x.0
|
||||
// $d.0 $d.1 $x.1.
|
||||
for (auto &kv : sectionMap) {
|
||||
std::vector<const Defined *> &mapSyms = kv.second;
|
||||
auto &mapSyms = kv.second.second;
|
||||
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
|
||||
return a->value < b->value;
|
||||
});
|
||||
@ -529,7 +532,8 @@ void AArch64Err843419Patcher::insertPatches(
|
||||
// Patches that we need to insert.
|
||||
static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
|
||||
InputSection *isec,
|
||||
std::vector<Patch843419Section *> &patches) {
|
||||
std::vector<Patch843419Section *> &patches,
|
||||
Defined *patcheeCodeSym) {
|
||||
// There may be a relocation at the same offset that we are patching. There
|
||||
// are four cases that we need to consider.
|
||||
// Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
|
||||
@ -554,7 +558,7 @@ static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
|
||||
Log(ctx) << "detected cortex-a53-843419 erratum sequence starting at " <<
|
||||
utohexstr(adrpAddr) << " in unpatched output.";
|
||||
|
||||
auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset);
|
||||
auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset, patcheeCodeSym);
|
||||
patches.push_back(ps);
|
||||
|
||||
auto makeRelToPatch = [](uint64_t offset, Symbol *patchSym) {
|
||||
@ -584,7 +588,11 @@ AArch64Err843419Patcher::patchInputSectionDescription(
|
||||
// mapping symbols of the same type. Our range of executable instructions to
|
||||
// scan is therefore [codeSym->value, dataSym->value) or [codeSym->value,
|
||||
// section size).
|
||||
std::vector<const Defined *> &mapSyms = sectionMap[isec];
|
||||
auto [it, inserted] = sectionMap.try_emplace(
|
||||
isec, std::make_pair(nullptr, SmallVector<Defined *, 0>{}));
|
||||
auto &[sectionSym, mapSyms] = it->second;
|
||||
if (inserted || sectionSym == nullptr)
|
||||
sectionSym = addSyntheticLocal(ctx, "", STT_SECTION, 0, 0, *isec);
|
||||
|
||||
auto codeSym = mapSyms.begin();
|
||||
while (codeSym != mapSyms.end()) {
|
||||
@ -597,7 +605,8 @@ AArch64Err843419Patcher::patchInputSectionDescription(
|
||||
uint64_t startAddr = isec->getVA(off);
|
||||
if (uint64_t patcheeOffset =
|
||||
scanCortexA53Errata843419(isec, off, limit))
|
||||
implementPatch(ctx, startAddr, patcheeOffset, isec, patches);
|
||||
implementPatch(ctx, startAddr, patcheeOffset, isec, patches,
|
||||
sectionSym);
|
||||
}
|
||||
if (dataSym == mapSyms.end())
|
||||
break;
|
||||
|
||||
@ -36,10 +36,13 @@ private:
|
||||
void init();
|
||||
|
||||
Ctx &ctx;
|
||||
// A cache of the mapping symbols defined by the InputSection sorted in order
|
||||
// of ascending value with redundant symbols removed. These describe
|
||||
// the ranges of code and data in an executable InputSection.
|
||||
llvm::DenseMap<InputSection *, std::vector<const Defined *>> sectionMap;
|
||||
// A cache mapping InputSections to pairs of section symbols (first) and
|
||||
// the mapping symbols (second) defined by the InputSection sorted in order
|
||||
// of ascending value with redundant symbols removed. These describe the
|
||||
// ranges of code and data in an executable InputSection.
|
||||
llvm::DenseMap<InputSection *,
|
||||
std::pair<Defined *, SmallVector<Defined *, 0>>>
|
||||
sectionMap;
|
||||
|
||||
bool initialized = false;
|
||||
};
|
||||
|
||||
@ -48,6 +48,11 @@ bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
|
||||
if (off >= isec->getSize())
|
||||
return true;
|
||||
const uint8_t *buf = isec->content().begin();
|
||||
// Synthetic sections may have a size but empty data - Assume that they won't
|
||||
// contain a landing pad
|
||||
if (buf == nullptr && isa<SyntheticSection>(isec))
|
||||
return false;
|
||||
|
||||
const uint32_t instr = read32le(buf + off);
|
||||
// All BTI instructions are HINT instructions which all have same encoding
|
||||
// apart from bits [11:5]
|
||||
|
||||
@ -1930,7 +1930,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) {
|
||||
if (isec->outSecOff < first->outSecOff || last->outSecOff < isec->outSecOff)
|
||||
continue;
|
||||
|
||||
ts = addThunkSection(tos, isd, isec->outSecOff);
|
||||
ts = addThunkSection(tos, isd, isec->outSecOff, /*isPrefix=*/true);
|
||||
thunkedSections[isec] = ts;
|
||||
return ts;
|
||||
}
|
||||
@ -1989,11 +1989,11 @@ void ThunkCreator::createInitialThunkSections(
|
||||
|
||||
ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
|
||||
InputSectionDescription *isd,
|
||||
uint64_t off) {
|
||||
uint64_t off, bool isPrefix) {
|
||||
auto *ts = make<ThunkSection>(ctx, os, off);
|
||||
ts->partition = os->partition;
|
||||
if ((ctx.arg.fixCortexA53Errata843419 || ctx.arg.fixCortexA8) &&
|
||||
!isd->sections.empty()) {
|
||||
!isd->sections.empty() && !isPrefix) {
|
||||
// The errata fixes are sensitive to addresses modulo 4 KiB. When we add
|
||||
// thunks we disturb the base addresses of sections placed after the thunks
|
||||
// this makes patches we have generated redundant, and may cause us to
|
||||
@ -2017,6 +2017,12 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
|
||||
// 2.) The InputSectionDescription is larger than 4 KiB. This will prevent
|
||||
// any assertion failures that an InputSectionDescription is < 4 KiB
|
||||
// in size.
|
||||
//
|
||||
// isPrefix is a ThunkSection explicitly inserted before its target
|
||||
// section. We suppress the rounding up of the size of these ThunkSections
|
||||
// as unlike normal ThunkSections, they are small in size, but when BTI is
|
||||
// enabled very frequent. This can bloat code-size and push the errata
|
||||
// patches out of branch range.
|
||||
uint64_t isdSize = isd->sections.back()->outSecOff +
|
||||
isd->sections.back()->getSize() -
|
||||
isd->sections.front()->outSecOff;
|
||||
|
||||
@ -204,7 +204,7 @@ private:
|
||||
std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
|
||||
|
||||
ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
|
||||
uint64_t off);
|
||||
uint64_t off, bool isPrefix = false);
|
||||
|
||||
bool normalizeExistingThunk(Relocation &rel, uint64_t src);
|
||||
|
||||
|
||||
107
lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s
Normal file
107
lld/test/ELF/aarch64-cortex-a53-843419-thunk-relocation-crash.s
Normal file
@ -0,0 +1,107 @@
|
||||
// REQUIRES: aarch64
|
||||
// RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
// RUN: llvm-mc -mattr=+bti -filetype=obj -triple=aarch64 asm -o a.o
|
||||
// RUN: ld.lld --script lds -fix-cortex-a53-843419 -verbose a.o -o exe \
|
||||
// RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s
|
||||
// RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d exe | FileCheck %s
|
||||
|
||||
/// Test case for specific crash wrt interaction between thunks and errata
|
||||
/// patches where the size of the added thunks meant that a range-extension
|
||||
/// thunk to the patch was required. We need to check that a BTI Thunk is
|
||||
/// generated for the patch, and that the patch's direct branch return is also
|
||||
/// range extended, possibly needing another BTI Thunk.
|
||||
///
|
||||
/// The asm below is based on a crash that was happening in Chromium.
|
||||
/// For more information see https://issues.chromium.org/issues/440019454
|
||||
|
||||
//--- asm
|
||||
.section .note.gnu.property,"a"
|
||||
.p2align 3
|
||||
.long 4
|
||||
.long 0x10 // descriptor length
|
||||
.long 0x5 // GNU property type
|
||||
.asciz "GNU"
|
||||
.long 0xc0000000 // GNU_PROPERTY_AARCH64_FEATURE_1_AND
|
||||
.long 4
|
||||
.long 1 // GNU_PROPERTY_AARCH64_FEATURE_1_BTI
|
||||
.long 0
|
||||
|
||||
.section .text.01, "ax", %progbits
|
||||
.balign 4096
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
bl far_away_no_bti
|
||||
|
||||
.section .text.far, "ax", %progbits
|
||||
.globl far_away_no_bti
|
||||
.type far_away, function
|
||||
far_away_no_bti:
|
||||
.space 4096 - 28, 0
|
||||
adrp x0, dat
|
||||
ldr x1, [x1, #0]
|
||||
ldr x0, [x0, :got_lo12:dat]
|
||||
.space 0x8000000, 0
|
||||
ret
|
||||
|
||||
.section .data
|
||||
.globl dat
|
||||
dat: .quad 0
|
||||
|
||||
// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 8010FFC in unpatched output.
|
||||
|
||||
// CHECK: 0000000000010000 <_start>:
|
||||
// CHECK-NEXT: 10000: bl 0x10008 <__AArch64AbsLongThunk_far_away_no_bti>
|
||||
|
||||
// CHECK: <__AArch64AbsLongThunk_far_away_no_bti>:
|
||||
// CHECK-NEXT: 10008: ldr x16, 0x10010
|
||||
// CHECK-NEXT: br x16
|
||||
// CHECK-NEXT: 10010: 18 00 01 08 .word 0x08010018
|
||||
|
||||
// Check that the BTI thunks do NOT have their size rounded up to 4 KiB.
|
||||
// They precede the patch and they contain the landing pad.
|
||||
// CHECK: <__AArch64BTIThunk_far_away_no_bti>:
|
||||
// CHECK-NEXT: 8010018: bti c
|
||||
// CHECK-NEXT: b 0x8010038 <far_away_no_bti>
|
||||
|
||||
// CHECK: <__AArch64AbsLongThunk___CortexA53843419_8011004>:
|
||||
// CHECK-NEXT: 8010020: ldr x16, 0x8010028
|
||||
// CHECK-NEXT: br x16
|
||||
// CHECK-NEXT: 8010028: 34 10 01 10 .word 0x10011034
|
||||
|
||||
// CHECK: <__AArch64BTIThunk_>:
|
||||
// CHECK-NEXT: 8010030: bti c
|
||||
// CHECK-NEXT: b 0x8011028 <far_away_no_bti+0xff0>
|
||||
|
||||
// CHECK: 8010038 <far_away_no_bti>:
|
||||
// CHECK-NEXT: ...
|
||||
// CHECK-NEXT: 801101c: adrp x0, 0x10012000
|
||||
// CHECK-NEXT: ldr x1, [x1]
|
||||
// CHECK-NEXT: b 0x8010020 <__AArch64AbsLongThunk___CortexA53843419_8011004>
|
||||
// CHECK-NEXT: ...
|
||||
// CHECK-NEXT: 10011028: ret
|
||||
|
||||
// Check that the errata thunk does NOT contain a landing pad
|
||||
// CHECK: <__CortexA53843419_8011004>:
|
||||
// CHECK-NEXT: 1001102c: ldr x0, [x0, #64]
|
||||
// CHECK-NEXT: b 0x10011040 <__AArch64AbsLongThunk_>
|
||||
|
||||
// Rest of generated code for readability
|
||||
// CHECK: <__AArch64BTIThunk___CortexA53843419_8011004>:
|
||||
// CHECK-NEXT: 10011034: bti c
|
||||
// CHECK-NEXT: b 0x1001102c <__CortexA53843419_8011004>
|
||||
|
||||
// CHECK: <__AArch64AbsLongThunk_>
|
||||
// CHECK-NEXT: 10011040: ldr x16, 0x10011048
|
||||
// CHECK-NEXT: br x16
|
||||
// CHECK-NEXT: 10011048: 30 00 01 08 .word 0x08010030
|
||||
|
||||
//--- lds
|
||||
SECTIONS {
|
||||
.text 0x10000 : {
|
||||
*(.text.01);
|
||||
. += 0x8000000;
|
||||
*(.text.far);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user