[ELF][LoongArch] -r: Synthesize R_LARCH_ALIGN at input section start (#153935)

Similay to

94655dc8ae

The difference is that in LoongArch, the ALIGN is synthesized when the
alignment is >4, (instead of >=4), and the number of bytes inserted is
`sec->addralign - 4`.
This commit is contained in:
Zhaoxin Yang 2025-08-22 16:02:41 +08:00 committed by GitHub
parent 6560adb584
commit 149d9a38e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 277 additions and 3 deletions

View File

@ -39,6 +39,7 @@ public:
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
bool relaxOnce(int pass) const override;
bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
void finalizeRelax(int passes) const override;
@ -48,6 +49,19 @@ private:
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20,
const Relocation &rLo12, uint64_t secAddr) const;
template <class ELFT, class RelTy>
bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
Relocs<RelTy> rels);
template <class ELFT, class RelTy>
void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
Relocs<RelTy> rels);
template <class ELFT>
bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
// The following two variables are used by synthesized ALIGN relocations.
InputSection *baseSec = nullptr;
// r_offset and r_addend pairs.
SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
};
} // end anonymous namespace
@ -766,6 +780,117 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}
// If the section alignment is > 4, advance `dot` to insert NOPs and synthesize
// an ALIGN relocation. Otherwise, return false to use default handling.
template <class ELFT, class RelTy>
bool LoongArch::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
Relocs<RelTy> rels) {
if (!baseSec) {
// Record the first input section with RELAX relocations. We will synthesize
// ALIGN relocations here.
for (auto rel : rels) {
if (rel.getType(false) == R_LARCH_RELAX) {
baseSec = sec;
break;
}
}
} else if (sec->addralign > 4) {
// If the alignment is > 4 and the section does not start with an ALIGN
// relocation, synthesize one.
bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
return rel.r_offset == 0 && rel.getType(false) == R_LARCH_ALIGN;
});
if (!hasAlignRel) {
synthesizedAligns.emplace_back(dot - baseSec->getVA(),
sec->addralign - 4);
dot += sec->addralign - 4;
return true;
}
}
return false;
}
// Finalize the relocation section by appending synthesized ALIGN relocations
// after processing all input sections.
template <class ELFT, class RelTy>
void LoongArch::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
Relocs<RelTy> rels) {
auto *f = cast<ObjFile<ELFT>>(baseSec->file);
auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
// Create a copy of InputSection.
sec = make<InputSection>(*f, shdr, baseSec->name);
auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
*sec = *baseRelSec;
baseSec = nullptr;
// Allocate buffer for original and synthesized relocations in RELA format.
// If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
// CREL.
auto newSize = rels.size() + synthesizedAligns.size();
auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
sec->size = newSize * sizeof(typename ELFT::Rela);
sec->content_ = reinterpret_cast<uint8_t *>(relas);
sec->type = SHT_RELA;
// Copy original relocations to the new buffer, potentially converting CREL to
// RELA.
for (auto [i, r] : llvm::enumerate(rels)) {
relas[i].r_offset = r.r_offset;
relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
if constexpr (RelTy::HasAddend)
relas[i].r_addend = r.r_addend;
}
// Append synthesized ALIGN relocations to the buffer.
for (auto [i, r] : llvm::enumerate(synthesizedAligns)) {
auto &rela = relas[rels.size() + i];
rela.r_offset = r.first;
rela.setSymbolAndType(0, R_LARCH_ALIGN, false);
rela.r_addend = r.second;
}
synthesizedAligns.clear();
// Replace the old relocation section with the new one in the output section.
// addOrphanSections ensures that the output relocation section is processed
// after osec.
for (SectionCommand *cmd : sec->getParent()->commands) {
auto *isd = dyn_cast<InputSectionDescription>(cmd);
if (!isd)
continue;
for (auto *&isec : isd->sections)
if (isec == baseRelSec)
isec = sec;
}
}
template <class ELFT>
bool LoongArch::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
bool ret = false;
if (sec) {
invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
} else if (baseSec) {
invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
}
return ret;
}
// Without linker relaxation enabled for a particular relocatable file or
// section, the assembler will not generate R_LARCH_ALIGN relocations for
// alignment directives. This becomes problematic in a two-stage linking
// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
// R_LARCH_ALIGN relocation at section start when needed.
//
// When called with an input section (`sec` is not null): If the section
// alignment is > 4, advance `dot` to insert NOPs and synthesize an ALIGN
// relocation.
//
// When called after all input sections are processed (`sec` is null): The
// output relocation section is updated with all the newly synthesized ALIGN
// relocations.
bool LoongArch::synthesizeAlign(uint64_t &dot, InputSection *sec) {
assert(ctx.arg.relocatable);
if (ctx.arg.is64)
return synthesizeAlignAux<ELF64LE>(dot, sec);
return synthesizeAlignAux<ELF32LE>(dot, sec);
}
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
}

View File

@ -1230,9 +1230,9 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
if (sec->firstInOverlay)
state->overlaySize = 0;
bool synthesizeAlign = ctx.arg.relocatable && ctx.arg.relax &&
(sec->flags & SHF_EXECINSTR) &&
ctx.arg.emachine == EM_RISCV;
bool synthesizeAlign =
ctx.arg.relocatable && ctx.arg.relax && (sec->flags & SHF_EXECINSTR) &&
(ctx.arg.emachine == EM_LOONGARCH || ctx.arg.emachine == EM_RISCV);
// We visited SectionsCommands from processSectionCommands to
// layout sections. Now, we visit SectionsCommands again to fix
// section offsets.

View File

@ -899,6 +899,8 @@ std::array<uint8_t, 4> OutputSection::getFiller(Ctx &ctx) {
return {1, 0, 1, 0};
return {0x13, 0, 0, 0};
}
if (ctx.arg.relocatable && ctx.arg.emachine == EM_LOONGARCH)
return {0, 0, 0x40, 0x03};
return ctx.target->trapInstr;
}

View File

@ -0,0 +1,147 @@
# REQUIRES: loongarch
## Test LA64.
# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --defsym ELF64=1 b.s -o b.o
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax b1.s -o b1.o
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.o
# RUN: llvm-mc -filetype=obj -triple=loongarch64 d.s -o d.o
## No RELAX. Don't synthesize ALIGN.
# RUN: ld.lld -r b.o d.o -o bd.ro
# RUN: llvm-readelf -r bd.ro | FileCheck %s --check-prefix=NOREL
# NOREL: no relocations
# RUN: ld.lld -r b.o b.o a.o b.o b1.o c.o d.o -o out.ro
# RUN: llvm-objdump -dr --no-show-raw-insn out.ro | FileCheck %s
# RUN: llvm-readelf -r out.ro | FileCheck %s --check-prefix=CHECK-REL
# CHECK: <b0>:
# CHECK-NEXT: 0: addi.d $a0, $a1, 1
# CHECK-NEXT: 4: nop
# CHECK-EMPTY:
# CHECK-NEXT: <b0>:
# CHECK-NEXT: 8: addi.d $a0, $a1, 1
# CHECK-EMPTY:
# CHECK-NEXT: <_start>:
# CHECK-NEXT: c: pcalau12i $a0, 0
# CHECK-NEXT: 000000000000000c: R_LARCH_PCALA_HI20 .Ltext1_start
# CHECK-NEXT: 000000000000000c: R_LARCH_RELAX *ABS*
# CHECK-NEXT: 10: addi.d $a0, $a0, 0
# CHECK-NEXT: 0000000000000010: R_LARCH_PCALA_LO12 .Ltext1_start
# CHECK-NEXT: 0000000000000010: R_LARCH_RELAX *ABS*
# CHECK-NEXT: 14: nop
# CHECK-NEXT: 0000000000000014: R_LARCH_ALIGN *ABS*+0x4
# CHECK-EMPTY:
# CHECK-NEXT: <b0>:
# CHECK-NEXT: 18: addi.d $a0, $a1, 1
# CHECK-NEXT: 1c: nop
# CHECK-NEXT: 20: nop
# CHECK-NEXT: 0000000000000020: R_LARCH_ALIGN *ABS*+0x4
# CHECK-NEXT: 24: nop
# CHECK-EMPTY:
# CHECK-NEXT: <b1>:
# CHECK-NEXT: 28: addi.d $a0, $a1, 3
# CHECK-EMPTY:
# CHECK-NEXT: <c0>:
# CHECK-NEXT: 2c: addi.d $a0, $a1, 4
# CHECK-NEXT: 30: nop
# CHECK-NEXT: 0000000000000030: R_LARCH_ALIGN *ABS*+0x4
# CHECK-EMPTY:
# CHECK-NEXT: <d0>:
# CHECK-NEXT: 34: addi.d $a0, $a1, 5
# CHECK-REL: Relocation section '.rela.text' at offset {{.*}} contains 7 entries:
# CHECK-REL: Relocation section '.rela.text1' at offset {{.*}} contains 5 entries:
## Test LA32.
# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax a.s -o a.32.o
# RUN: llvm-mc -filetype=obj -triple=loongarch32 -mattr=+relax b.s -o b.32.o
# RUN: ld.lld -r a.32.o b.32.o -o out.32.ro
# RUN: ld.lld -Ttext=0x10000 out.32.ro -o out32
# RUN: llvm-objdump -dr --no-show-raw-insn out32 | FileCheck %s --check-prefix=CHECK32
# CHECK32: <_start>:
# CHECK32-NEXT: 10000: pcaddi $a0, 4
# CHECK32-NEXT: 10004: nop
# CHECK32-EMPTY:
# CHECK32-NEXT: <b0>:
# CHECK32-NEXT: 10008: addi.w $a0, $a1, 1
# CHECK32: <.Ltext1_start>:
# CHECK32-NEXT: 10010: pcaddi $a1, 0
# CHECK32-NEXT: 10014: nop
# CHECK32-NEXT: 10018: addi.w $a0, $a1, 2
## Test CREL.
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax --crel a.s -o acrel.o
# RUN: ld.lld -r acrel.o b.o -o out.crel.ro
# RUN: llvm-objdump -dr --no-show-raw-insn out.crel.ro | FileCheck %s --check-prefix=CHECKC
# CHECKC: <_start>:
# CHECKC-NEXT: 0: pcalau12i $a0, 0
# CHECKC-NEXT: 0000000000000000: R_LARCH_PCALA_HI20 .Ltext1_start
# CHECKC-NEXT: 0000000000000000: R_LARCH_RELAX *ABS*
# CHECKC-NEXT: 4: addi.d $a0, $a0, 0
# CHECKC-NEXT: 0000000000000004: R_LARCH_PCALA_LO12 .Ltext1_start
# CHECKC-NEXT: 0000000000000004: R_LARCH_RELAX *ABS*
# CHECKC-NEXT: 8: nop
# CHECKC-NEXT: 0000000000000008: R_LARCH_ALIGN *ABS*+0x4
# CHECKC-EMPTY:
# CHECKC-NEXT: <b0>:
# CHECKC-NEXT: c: addi.d $a0, $a1, 1
#--- a.s
.globl _start
_start:
la.pcrel $a0, .Ltext1_start
.section .text1,"ax"
.Ltext1_start:
la.pcrel $a1, .Ltext1_start
#--- b.s
.macro addi dst, src1, src2
.ifdef ELF64
addi.d \dst, \src1, \src2
.else
addi.w \dst, \src1, \src2
.endif
.endm
## Needs synthesized ALIGN.
.option push
.option norelax
.balign 8
b0:
addi $a0, $a1, 1
.section .text1,"ax"
.balign 8
addi $a0, $a1, 2
.option pop
#--- b1.s
# Starts with an ALIGN relocation, don't need synthesized ALIGN.
.option push
.option norelax
.reloc ., R_LARCH_ALIGN, 4
nop
.balign 8
b1:
addi.d $a0, $a1, 3
.option pop
#--- c.s
## Alignment == 4, don't need synthesized ALIGN.
.balign 4
c0:
addi.d $a0, $a1, 4
#--- d.s
## Needs synthesized ALIGN.
.balign 8
d0:
addi.d $a0, $a1, 5