Fix lld crash using --fix-cortex-a53-843419 (#170495)

Original crash was observed in Chromium, in [1]. The problem occurs in
elf::isAArch64BTILandingPad because it didn't handle synthetic sections,
which can have a nullptr as a buf, so it crashed while trying to read
that buf.

After fixing that, a second issue occurs: When the patched code grows
too
much, it gets far away from the short jump, and the current
implementation
assumes a R_AARCH64_JUMP26 will be enough.

This PR changes the implementation to:
(a) In isAArch64BTILandingPad, checks if a section is synthetic, and
assumes that it'll NOT contain a landing pad, avoiding the buffer check;
(b) Suppress the size rounding for thunks that preceeds section
(Making the situation less likely to happen);
(c) Reimplements the patch by using a R_AARCH64_ABS64 in case the
patched code is still far away.

[1] https://issues.chromium.org/issues/440019454

---------

Co-authored-by: Tarcisio Fischer <tarcisio.fischer@arm.com>
This commit is contained in:
Tarcísio Fischer 2026-01-09 11:08:09 +00:00 committed by GitHub
parent a5cab90071
commit e91447f44d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 155 additions and 25 deletions

View File

@ -370,7 +370,8 @@ static uint64_t scanCortexA53Errata843419(InputSection *isec, uint64_t &off,
class elf::Patch843419Section final : public SyntheticSection {
public:
Patch843419Section(Ctx &, InputSection *p, uint64_t off);
Patch843419Section(Ctx &, InputSection *p, uint64_t off,
Defined *patcheeCodeSym);
void writeTo(uint8_t *buf) override;
@ -390,7 +391,8 @@ public:
Symbol *patchSym;
};
Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off,
Defined *patcheeCodeSym)
: SyntheticSection(ctx, ".text.patch", SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR, 4),
patchee(p), patcheeOffset(off) {
@ -399,6 +401,9 @@ Patch843419Section::Patch843419Section(Ctx &ctx, InputSection *p, uint64_t off)
ctx, ctx.saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())),
STT_FUNC, 0, getSize(), *this);
addSyntheticLocal(ctx, ctx.saver.save("$x"), STT_NOTYPE, 0, 0, *this);
int64_t retToPatcheeSymOffset =
getLDSTAddr() - p->getVA(patcheeCodeSym->value) + 4;
addReloc({R_PC, R_AARCH64_JUMP26, 4, retToPatcheeSymOffset, patcheeCodeSym});
}
uint64_t Patch843419Section::getLDSTAddr() const {
@ -410,13 +415,8 @@ void Patch843419Section::writeTo(uint8_t *buf) {
// patchee Section.
write32le(buf, read32le(patchee->content().begin() + patcheeOffset));
// Apply any relocation transferred from the original patchee section.
// Apply relocations
ctx.target->relocateAlloc(*this, buf);
// Return address is the next instruction after the one we have just copied.
uint64_t s = getLDSTAddr() + 4;
uint64_t p = patchSym->getVA(ctx) + 4;
ctx.target->relocateNoSym(buf + 4, R_AARCH64_JUMP26, s - p);
}
void AArch64Err843419Patcher::init() {
@ -443,11 +443,14 @@ void AArch64Err843419Patcher::init() {
auto *def = dyn_cast<Defined>(b);
if (!def)
continue;
if (!isCodeMapSymbol(def) && !isDataMapSymbol(def))
if (!def->isSection() && !isCodeMapSymbol(def) && !isDataMapSymbol(def))
continue;
if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
if (sec->flags & SHF_EXECINSTR)
sectionMap[sec].push_back(def);
if (auto *sec = dyn_cast_or_null<InputSection>(def->section)) {
if (def->isSection())
sectionMap[sec].first = def;
else if (sec->flags & SHF_EXECINSTR)
sectionMap[sec].second.push_back(def);
}
}
}
// For each InputSection make sure the mapping symbols are in sorted in
@ -455,7 +458,7 @@ void AArch64Err843419Patcher::init() {
// the same type. For example we must remove the redundant $d.1 from $x.0
// $d.0 $d.1 $x.1.
for (auto &kv : sectionMap) {
std::vector<const Defined *> &mapSyms = kv.second;
auto &mapSyms = kv.second.second;
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
return a->value < b->value;
});
@ -529,7 +532,8 @@ void AArch64Err843419Patcher::insertPatches(
// Patches that we need to insert.
static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
InputSection *isec,
std::vector<Patch843419Section *> &patches) {
std::vector<Patch843419Section *> &patches,
Defined *patcheeCodeSym) {
// There may be a relocation at the same offset that we are patching. There
// are four cases that we need to consider.
// Case 1: R_AARCH64_JUMP26 branch relocation. We have already patched this
@ -554,7 +558,7 @@ static void implementPatch(Ctx &ctx, uint64_t adrpAddr, uint64_t patcheeOffset,
Log(ctx) << "detected cortex-a53-843419 erratum sequence starting at " <<
utohexstr(adrpAddr) << " in unpatched output.";
auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset);
auto *ps = make<Patch843419Section>(ctx, isec, patcheeOffset, patcheeCodeSym);
patches.push_back(ps);
auto makeRelToPatch = [](uint64_t offset, Symbol *patchSym) {
@ -584,7 +588,11 @@ AArch64Err843419Patcher::patchInputSectionDescription(
// mapping symbols of the same type. Our range of executable instructions to
// scan is therefore [codeSym->value, dataSym->value) or [codeSym->value,
// section size).
std::vector<const Defined *> &mapSyms = sectionMap[isec];
auto [it, inserted] = sectionMap.try_emplace(
isec, std::make_pair(nullptr, SmallVector<Defined *, 0>{}));
auto &[sectionSym, mapSyms] = it->second;
if (inserted || sectionSym == nullptr)
sectionSym = addSyntheticLocal(ctx, "", STT_SECTION, 0, 0, *isec);
auto codeSym = mapSyms.begin();
while (codeSym != mapSyms.end()) {
@ -597,7 +605,8 @@ AArch64Err843419Patcher::patchInputSectionDescription(
uint64_t startAddr = isec->getVA(off);
if (uint64_t patcheeOffset =
scanCortexA53Errata843419(isec, off, limit))
implementPatch(ctx, startAddr, patcheeOffset, isec, patches);
implementPatch(ctx, startAddr, patcheeOffset, isec, patches,
sectionSym);
}
if (dataSym == mapSyms.end())
break;

View File

@ -36,10 +36,13 @@ private:
void init();
Ctx &ctx;
// A cache of the mapping symbols defined by the InputSection sorted in order
// of ascending value with redundant symbols removed. These describe
// the ranges of code and data in an executable InputSection.
llvm::DenseMap<InputSection *, std::vector<const Defined *>> sectionMap;
// A cache mapping InputSections to pairs of section symbols (first) and
// the mapping symbols (second) defined by the InputSection sorted in order
// of ascending value with redundant symbols removed. These describe the
// ranges of code and data in an executable InputSection.
llvm::DenseMap<InputSection *,
std::pair<Defined *, SmallVector<Defined *, 0>>>
sectionMap;
bool initialized = false;
};

View File

@ -48,6 +48,11 @@ bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) {
if (off >= isec->getSize())
return true;
const uint8_t *buf = isec->content().begin();
// Synthetic sections may have a size but empty data - Assume that they won't
// contain a landing pad
if (buf == nullptr && isa<SyntheticSection>(isec))
return false;
const uint32_t instr = read32le(buf + off);
// All BTI instructions are HINT instructions which all have same encoding
// apart from bits [11:5]

View File

@ -1930,7 +1930,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) {
if (isec->outSecOff < first->outSecOff || last->outSecOff < isec->outSecOff)
continue;
ts = addThunkSection(tos, isd, isec->outSecOff);
ts = addThunkSection(tos, isd, isec->outSecOff, /*isPrefix=*/true);
thunkedSections[isec] = ts;
return ts;
}
@ -1989,11 +1989,11 @@ void ThunkCreator::createInitialThunkSections(
ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
InputSectionDescription *isd,
uint64_t off) {
uint64_t off, bool isPrefix) {
auto *ts = make<ThunkSection>(ctx, os, off);
ts->partition = os->partition;
if ((ctx.arg.fixCortexA53Errata843419 || ctx.arg.fixCortexA8) &&
!isd->sections.empty()) {
!isd->sections.empty() && !isPrefix) {
// The errata fixes are sensitive to addresses modulo 4 KiB. When we add
// thunks we disturb the base addresses of sections placed after the thunks
// this makes patches we have generated redundant, and may cause us to
@ -2017,6 +2017,12 @@ ThunkSection *ThunkCreator::addThunkSection(OutputSection *os,
// 2.) The InputSectionDescription is larger than 4 KiB. This will prevent
// any assertion failures that an InputSectionDescription is < 4 KiB
// in size.
//
// isPrefix is a ThunkSection explicitly inserted before its target
// section. We suppress the rounding up of the size of these ThunkSections
// as unlike normal ThunkSections, they are small in size, but when BTI is
// enabled very frequent. This can bloat code-size and push the errata
// patches out of branch range.
uint64_t isdSize = isd->sections.back()->outSecOff +
isd->sections.back()->getSize() -
isd->sections.front()->outSecOff;

View File

@ -204,7 +204,7 @@ private:
std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);
ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
uint64_t off);
uint64_t off, bool isPrefix = false);
bool normalizeExistingThunk(Relocation &rel, uint64_t src);

View File

@ -0,0 +1,107 @@
// REQUIRES: aarch64
// RUN: rm -rf %t && split-file %s %t && cd %t
// RUN: llvm-mc -mattr=+bti -filetype=obj -triple=aarch64 asm -o a.o
// RUN: ld.lld --script lds -fix-cortex-a53-843419 -verbose a.o -o exe \
// RUN: 2>&1 | FileCheck -check-prefix=CHECK-PRINT %s
// RUN: llvm-objdump --no-print-imm-hex --no-show-raw-insn --triple=aarch64-linux-gnu -d exe | FileCheck %s
/// Test case for specific crash wrt interaction between thunks and errata
/// patches where the size of the added thunks meant that a range-extension
/// thunk to the patch was required. We need to check that a BTI Thunk is
/// generated for the patch, and that the patch's direct branch return is also
/// range extended, possibly needing another BTI Thunk.
///
/// The asm below is based on a crash that was happening in Chromium.
/// For more information see https://issues.chromium.org/issues/440019454
//--- asm
.section .note.gnu.property,"a"
.p2align 3
.long 4
.long 0x10 // descriptor length
.long 0x5 // GNU property type
.asciz "GNU"
.long 0xc0000000 // GNU_PROPERTY_AARCH64_FEATURE_1_AND
.long 4
.long 1 // GNU_PROPERTY_AARCH64_FEATURE_1_BTI
.long 0
.section .text.01, "ax", %progbits
.balign 4096
.globl _start
.type _start, %function
_start:
bl far_away_no_bti
.section .text.far, "ax", %progbits
.globl far_away_no_bti
.type far_away, function
far_away_no_bti:
.space 4096 - 28, 0
adrp x0, dat
ldr x1, [x1, #0]
ldr x0, [x0, :got_lo12:dat]
.space 0x8000000, 0
ret
.section .data
.globl dat
dat: .quad 0
// CHECK-PRINT: detected cortex-a53-843419 erratum sequence starting at 8010FFC in unpatched output.
// CHECK: 0000000000010000 <_start>:
// CHECK-NEXT: 10000: bl 0x10008 <__AArch64AbsLongThunk_far_away_no_bti>
// CHECK: <__AArch64AbsLongThunk_far_away_no_bti>:
// CHECK-NEXT: 10008: ldr x16, 0x10010
// CHECK-NEXT: br x16
// CHECK-NEXT: 10010: 18 00 01 08 .word 0x08010018
// Check that the BTI thunks do NOT have their size rounded up to 4 KiB.
// They precede the patch and they contain the landing pad.
// CHECK: <__AArch64BTIThunk_far_away_no_bti>:
// CHECK-NEXT: 8010018: bti c
// CHECK-NEXT: b 0x8010038 <far_away_no_bti>
// CHECK: <__AArch64AbsLongThunk___CortexA53843419_8011004>:
// CHECK-NEXT: 8010020: ldr x16, 0x8010028
// CHECK-NEXT: br x16
// CHECK-NEXT: 8010028: 34 10 01 10 .word 0x10011034
// CHECK: <__AArch64BTIThunk_>:
// CHECK-NEXT: 8010030: bti c
// CHECK-NEXT: b 0x8011028 <far_away_no_bti+0xff0>
// CHECK: 8010038 <far_away_no_bti>:
// CHECK-NEXT: ...
// CHECK-NEXT: 801101c: adrp x0, 0x10012000
// CHECK-NEXT: ldr x1, [x1]
// CHECK-NEXT: b 0x8010020 <__AArch64AbsLongThunk___CortexA53843419_8011004>
// CHECK-NEXT: ...
// CHECK-NEXT: 10011028: ret
// Check that the errata thunk does NOT contain a landing pad
// CHECK: <__CortexA53843419_8011004>:
// CHECK-NEXT: 1001102c: ldr x0, [x0, #64]
// CHECK-NEXT: b 0x10011040 <__AArch64AbsLongThunk_>
// Rest of generated code for readability
// CHECK: <__AArch64BTIThunk___CortexA53843419_8011004>:
// CHECK-NEXT: 10011034: bti c
// CHECK-NEXT: b 0x1001102c <__CortexA53843419_8011004>
// CHECK: <__AArch64AbsLongThunk_>
// CHECK-NEXT: 10011040: ldr x16, 0x10011048
// CHECK-NEXT: br x16
// CHECK-NEXT: 10011048: 30 00 01 08 .word 0x08010030
//--- lds
SECTIONS {
.text 0x10000 : {
*(.text.01);
. += 0x8000000;
*(.text.far);
}
}