[BOLT][AArch64] Symbolize ADRP after relaxation (#131414)

When the linker relaxes a GOT load, it changes ADRP+LDR instruction pair
into ADRP+ADD. It is relatively straightforward to detect and symbolize
the second instruction in the disassembler. However, it is not always
possible to properly symbolize the ADRP instruction without looking at
the second instruction. Hence, we have the FixRelaxationPass that adjust
the operand of ADRP by looking at the corresponding ADD.

This PR tries to properly symbolize ADRP earlier in the pipeline, i.e.
in AArch64MCSymbolizer. This change makes it easier to adjust the
instruction once we add AArch64 support in `scanExternalRefs()`.
Additionally, we get a benefit of looking at proper operands while
observing the function state prior to running FixRelaxationPass.

To disambiguate the operand of ADRP that has a GOT relocation against
it, we look at the contents/value of the operand. If it contains an
address of a page that is valid for GOT, we assume that the operand
wasn't modified by the linker and leave it up to FixRelaxationPass to do
a proper adjustment. If the page referenced by ADRP cannot point to GOT,
then it's an indication that the linker has modified the operand and we
substitute the operand with a non-GOT reference to the symbol.
This commit is contained in:
Maksim Panchenko 2025-03-18 14:31:31 -07:00 committed by GitHub
parent bc646f4b30
commit 70bf5e514b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 130 additions and 3 deletions

View File

@ -125,15 +125,39 @@ AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
// instruction pairs and will perform necessary adjustments.
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
assert(SymbolValue && "Symbol value should be set");
(void)SymbolValue;
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
// Check if defined symbol and GOT are on the same page. If they are not,
// disambiguate the operand.
if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 &&
SymbolPageAddr == Rel.Value &&
!isPageAddressValidForGOT(SymbolPageAddr)) {
AdjustedRel.Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
} else {
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
AdjustedRel.Addend = Rel.Value;
}
}
return AdjustedRel;
}
bool AArch64MCSymbolizer::isPageAddressValidForGOT(uint64_t PageAddress) const {
assert(!(PageAddress & 0xfffULL) && "Page address not aligned at 4KB");
ErrorOr<BinarySection &> GOT =
Function.getBinaryContext().getUniqueSectionByName(".got");
if (!GOT || !GOT->getSize())
return false;
const uint64_t GOTFirstPageAddress = GOT->getAddress() & ~0xfffULL;
const uint64_t GOTLastPageAddress =
(GOT->getAddress() + GOT->getSize() - 1) & ~0xfffULL;
return PageAddress >= GOTFirstPageAddress &&
PageAddress <= GOTLastPageAddress;
}
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
int64_t Value,
uint64_t Address) {}

View File

@ -28,6 +28,9 @@ protected:
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
const MCInst &Inst) const;
/// Return true if \p PageAddress is a valid page address for .got section.
bool isPageAddressValidForGOT(uint64_t PageAddress) const;
public:
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

View File

@ -0,0 +1,100 @@
## Check that BOLT symbolizer properly handles loads from GOT, including
## instruction sequences changed/relaxed by the linker.
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/main.s \
# RUN: -o %t/main.o
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/near.s \
# RUN: -o %t/near.o
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %t/far.s \
# RUN: -o %t/far.o
# RUN: %clang %cflags %t/main.o %t/near.o %t/far.o -o %t/main.exe -Wl,-q -static
# RUN: llvm-bolt %t/main.exe -o %t/main.bolt --keep-nops --print-disasm \
# RUN: --print-only=_start | FileCheck %s
#--- main.s
.text
.globl _start
.p2align 2
.type _start, @function
# CHECK-LABEL: _start
_start:
## Function address load relaxable into nop+adr.
# CHECK: nop
# CHECK-NEXT: adr x0, near
adrp x0, :got:near
ldr x0, [x0, :got_lo12:near]
## Function address load relaxable into adrp+add.
# CHECK-NEXT: adrp x1, far
# CHECK-NEXT: add x1, x1, :lo12:far
adrp x1, :got:far
ldr x1, [x1, :got_lo12:far]
## Non-relaxable due to the instruction in-between.
# CHECK-NEXT: adrp x2, __BOLT_got_zero
# CHECK-NEXT: nop
# CHECK-NEXT: ldr x2, [x2, :lo12:__BOLT_got_zero{{.*}}]
adrp x2, :got:near
nop
ldr x2, [x2, :got_lo12:near]
## Load data object with local visibility. Relaxable into adrp+add.
# CHECK-NEXT: adrp x3, "local_far_data/1"
# CHECK-NEXT: add x3, x3, :lo12:"local_far_data/1"
adrp x3, :got:local_far_data
ldr x3, [x3, :got_lo12:local_far_data]
## Global data reference relaxable into adrp+add.
# CHECK-NEXT: adrp x4, far_data
# CHECK-NEXT: add x4, x4, :lo12:far_data
adrp x4, :got:far_data
ldr x4, [x4, :got_lo12:far_data]
ret
.size _start, .-_start
.weak near
.weak far
.weak far_data
## Data object separated by more than 1MB from _start.
.data
.type local_far_data, @object
local_far_data:
.xword 0
.size local_far_data, .-local_far_data
#--- near.s
.text
.globl near
.type near, @function
near:
ret
.size near, .-near
#--- far.s
.text
## Insert 1MB of empty space to make objects after it unreachable by adr
## instructions in _start.
.space 0x100000
.globl far
.type far, @function
far:
ret
.size far, .-far
.data
.globl far_data
.type far_data, @object
far_data:
.xword 0
.size far_data, .-far_data