[LLD][COFF] Add support for range extension thunks for ARM64EC targets. (#106289)

Thunks themselves are the same as regular ARM64 thunks; they just need
to report the correct machine type. When processing the code, we also
need to use the current chunk's machine type instead of the global one:
we don't want to treat x86_64 thunks as ARM64EC, and we need to report
the correct machine type in hybrid binaries.
This commit is contained in:
Jacek Caban 2024-08-29 10:19:32 +02:00 committed by GitHub
parent b7981a78f0
commit efad561890
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 215 additions and 26 deletions

View File

@ -842,14 +842,9 @@ const uint8_t arm64Thunk[] = {
0x00, 0x02, 0x1f, 0xd6, // br x16 0x00, 0x02, 0x1f, 0xd6, // br x16
}; };
size_t RangeExtensionThunkARM64::getSize() const { size_t RangeExtensionThunkARM64::getSize() const { return sizeof(arm64Thunk); }
assert(ctx.config.machine == ARM64);
(void)&ctx;
return sizeof(arm64Thunk);
}
void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const { void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
assert(ctx.config.machine == ARM64);
memcpy(buf, arm64Thunk, sizeof(arm64Thunk)); memcpy(buf, arm64Thunk, sizeof(arm64Thunk));
applyArm64Addr(buf + 0, target->getRVA(), rva, 12); applyArm64Addr(buf + 0, target->getRVA(), rva, 12);
applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0); applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);

View File

@ -615,20 +615,22 @@ private:
COFFLinkerContext &ctx; COFFLinkerContext &ctx;
}; };
// A ragnge extension thunk used for both ARM64EC and ARM64 machine types.
class RangeExtensionThunkARM64 : public NonSectionCodeChunk { class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
public: public:
explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t) explicit RangeExtensionThunkARM64(MachineTypes machine, Defined *t)
: target(t), ctx(ctx) { : target(t), machine(machine) {
setAlignment(4); setAlignment(4);
assert(llvm::COFF::isAnyArm64(machine));
} }
size_t getSize() const override; size_t getSize() const override;
void writeTo(uint8_t *buf) const override; void writeTo(uint8_t *buf) const override;
MachineTypes getMachine() const override { return ARM64; } MachineTypes getMachine() const override { return machine; }
Defined *target; Defined *target;
private: private:
COFFLinkerContext &ctx; MachineTypes machine;
}; };
// Windows-specific. // Windows-specific.

View File

@ -219,10 +219,12 @@ private:
void sortECChunks(); void sortECChunks();
void removeUnusedSections(); void removeUnusedSections();
void assignAddresses(); void assignAddresses();
bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin); bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
MachineTypes machine);
std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks, std::pair<Defined *, bool> getThunk(DenseMap<uint64_t, Defined *> &lastThunks,
Defined *target, uint64_t p, Defined *target, uint64_t p,
uint16_t type, int margin); uint16_t type, int margin,
MachineTypes machine);
bool createThunks(OutputSection *os, int margin); bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks); bool verifyRanges(const std::vector<Chunk *> chunks);
void createECCodeMap(); void createECCodeMap();
@ -396,8 +398,9 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) {
// Check whether the target address S is in range from a relocation // Check whether the target address S is in range from a relocation
// of type relType at address P. // of type relType at address P.
bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin,
if (ctx.config.machine == ARMNT) { MachineTypes machine) {
if (machine == ARMNT) {
int64_t diff = AbsoluteDifference(s, p + 4) + margin; int64_t diff = AbsoluteDifference(s, p + 4) + margin;
switch (relType) { switch (relType) {
case IMAGE_REL_ARM_BRANCH20T: case IMAGE_REL_ARM_BRANCH20T:
@ -408,7 +411,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
default: default:
return true; return true;
} }
} else if (ctx.config.machine == ARM64) { } else if (isAnyArm64(machine)) {
int64_t diff = AbsoluteDifference(s, p) + margin; int64_t diff = AbsoluteDifference(s, p) + margin;
switch (relType) { switch (relType) {
case IMAGE_REL_ARM64_BRANCH26: case IMAGE_REL_ARM64_BRANCH26:
@ -421,7 +424,7 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
return true; return true;
} }
} else { } else {
llvm_unreachable("Unexpected architecture"); return true;
} }
} }
@ -429,17 +432,17 @@ bool Writer::isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) {
// or create a new one. // or create a new one.
std::pair<Defined *, bool> std::pair<Defined *, bool>
Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target, Writer::getThunk(DenseMap<uint64_t, Defined *> &lastThunks, Defined *target,
uint64_t p, uint16_t type, int margin) { uint64_t p, uint16_t type, int margin, MachineTypes machine) {
Defined *&lastThunk = lastThunks[target->getRVA()]; Defined *&lastThunk = lastThunks[target->getRVA()];
if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin)) if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin, machine))
return {lastThunk, false}; return {lastThunk, false};
Chunk *c; Chunk *c;
switch (ctx.config.machine) { switch (getMachineArchType(machine)) {
case ARMNT: case Triple::thumb:
c = make<RangeExtensionThunkARM>(ctx, target); c = make<RangeExtensionThunkARM>(ctx, target);
break; break;
case ARM64: case Triple::aarch64:
c = make<RangeExtensionThunkARM64>(ctx, target); c = make<RangeExtensionThunkARM64>(machine, target);
break; break;
default: default:
llvm_unreachable("Unexpected architecture"); llvm_unreachable("Unexpected architecture");
@ -471,6 +474,7 @@ bool Writer::createThunks(OutputSection *os, int margin) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]); SectionChunk *sc = dyn_cast_or_null<SectionChunk>(os->chunks[i]);
if (!sc) if (!sc)
continue; continue;
MachineTypes machine = sc->getMachine();
size_t thunkInsertionSpot = i + 1; size_t thunkInsertionSpot = i + 1;
// Try to get a good enough estimate of where new thunks will be placed. // Try to get a good enough estimate of where new thunks will be placed.
@ -497,11 +501,12 @@ bool Writer::createThunks(OutputSection *os, int margin) {
uint64_t s = sym->getRVA(); uint64_t s = sym->getRVA();
if (isInRange(rel.Type, s, p, margin)) if (isInRange(rel.Type, s, p, margin, machine))
continue; continue;
// If the target isn't in range, hook it up to an existing or new thunk. // If the target isn't in range, hook it up to an existing or new thunk.
auto [thunk, wasNew] = getThunk(lastThunks, sym, p, rel.Type, margin); auto [thunk, wasNew] =
getThunk(lastThunks, sym, p, rel.Type, margin, machine);
if (wasNew) { if (wasNew) {
Chunk *thunkChunk = thunk->getChunk(); Chunk *thunkChunk = thunk->getChunk();
thunkChunk->setRVA( thunkChunk->setRVA(
@ -603,6 +608,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c); SectionChunk *sc = dyn_cast_or_null<SectionChunk>(c);
if (!sc) if (!sc)
continue; continue;
MachineTypes machine = sc->getMachine();
ArrayRef<coff_relocation> relocs = sc->getRelocs(); ArrayRef<coff_relocation> relocs = sc->getRelocs();
for (const coff_relocation &rel : relocs) { for (const coff_relocation &rel : relocs) {
@ -615,7 +621,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
uint64_t p = sc->getRVA() + rel.VirtualAddress; uint64_t p = sc->getRVA() + rel.VirtualAddress;
uint64_t s = sym->getRVA(); uint64_t s = sym->getRVA();
if (!isInRange(rel.Type, s, p, 0)) if (!isInRange(rel.Type, s, p, 0, machine))
return false; return false;
} }
} }
@ -625,7 +631,7 @@ bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
// Assign addresses and add thunks if necessary. // Assign addresses and add thunks if necessary.
void Writer::finalizeAddresses() { void Writer::finalizeAddresses() {
assignAddresses(); assignAddresses();
if (ctx.config.machine != ARMNT && ctx.config.machine != ARM64) if (ctx.config.machine != ARMNT && !isAnyArm64(ctx.config.machine))
return; return;
size_t origNumChunks = 0; size_t origNumChunks = 0;

View File

@ -0,0 +1,186 @@
# REQUIRES: aarch64, x86
# RUN: split-file %s %t.dir && cd %t.dir
# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows funcs.s -o funcs-arm64ec.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
# Test generating range extension thunks for ARM64EC code. Place some x86_64 chunks in a middle
# and make sure that thunks stay in ARM64EC code range.
# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj space-x86_64.obj loadconfig-arm64ec.obj -out:test.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# VERBOSE: Added 3 thunks with margin {{.*}} in 1 passes
# RUN: llvm-objdump -d test.dll | FileCheck --check-prefix=DISASM %s
# DISASM: Disassembly of section .code1:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180003000 <.code1>:
# DISASM-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
# DISASM-NEXT: 180003004: d65f03c0 ret
# DISASM-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
# DISASM-NEXT: 18000300c: 91000210 add x16, x16, #0x0
# DISASM-NEXT: 180003010: d61f0200 br x16
# DISASM-EMPTY:
# DISASM-NEXT: Disassembly of section .code2:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180004000 <.code2>:
# DISASM-NEXT: ...
# DISASM-EMPTY:
# DISASM-NEXT: Disassembly of section .code3:
# DISASM-EMPTY:
# DISASM-NEXT: 0000000180005000 <.code3>:
# DISASM-NEXT: ...
# DISASM-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
# DISASM-NEXT: 18000c004: d65f03c0 ret
# DISASM-NEXT: 18000c008: 00000000 udf #0x0
# DISASM-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
# DISASM-NEXT: 18000c010: 91006210 add x16, x16, #0x18
# DISASM-NEXT: 18000c014: d61f0200 br x16
# DISASM-NEXT: ...
# DISASM-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
# DISASM-NEXT: 18001401c: d65f03c0 ret
# DISASM-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
# DISASM-NEXT: 180014024: 91000210 add x16, x16, #0x0
# DISASM-NEXT: 180014028: d61f0200 br x16
# RUN: llvm-readobj --coff-load-config test.dll | FileCheck --check-prefix=LOADCFG %s
# LOADCFG: CodeMap [
# LOADCFG-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFG-NEXT: 0x4000 - 0x4300 X64
# LOADCFG-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFG-NEXT: ]
# A similar test using a hybrid binary and native placeholder chunks.
# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s
# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s
# LOADCFGX: CodeMap [
# LOADCFGX-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFGX-NEXT: 0x4000 - 0x4300 ARM64
# LOADCFGX-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFGX-NEXT: ]
# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.
# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes
# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s
# DISASMX: Disassembly of section .code1:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180003000 <.code1>:
# DISASMX-NEXT: 180003000: 36000040 tbz w0, #0x0, 0x180003008 <.code1+0x8>
# DISASMX-NEXT: 180003004: d65f03c0 ret
# DISASMX-NEXT: 180003008: b0000050 adrp x16, 0x18000c000
# DISASMX-NEXT: 18000300c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180003010: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code2:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180004000 <.code2>:
# DISASMX-NEXT: 180004000: 36000040 tbz w0, #0x0, 0x180004008 <.code2+0x8>
# DISASMX-NEXT: 180004004: d65f03c0 ret
# DISASMX-NEXT: 180004008: b0000090 adrp x16, 0x180015000
# DISASMX-NEXT: 18000400c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180004010: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code3:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180005000 <.code3>:
# DISASMX-NEXT: ...
# DISASMX-NEXT: 18000c000: 36000060 tbz w0, #0x0, 0x18000c00c <.code3+0x700c>
# DISASMX-NEXT: 18000c004: d65f03c0 ret
# DISASMX-NEXT: 18000c008: 00000000 udf #0x0
# DISASMX-NEXT: 18000c00c: 90000050 adrp x16, 0x180014000 <.code3+0xf000>
# DISASMX-NEXT: 18000c010: 91006210 add x16, x16, #0x18
# DISASMX-NEXT: 18000c014: d61f0200 br x16
# DISASMX-NEXT: ...
# DISASMX-NEXT: 180014018: 36000040 tbz w0, #0x0, 0x180014020 <.code3+0xf020>
# DISASMX-NEXT: 18001401c: d65f03c0 ret
# DISASMX-NEXT: 180014020: f0ffff70 adrp x16, 0x180003000 <.code1>
# DISASMX-NEXT: 180014024: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180014028: d61f0200 br x16
# DISASMX-EMPTY:
# DISASMX-NEXT: Disassembly of section .code4:
# DISASMX-EMPTY:
# DISASMX-NEXT: 0000000180015000 <.code4>:
# DISASMX-NEXT: 180015000: 36000040 tbz w0, #0x0, 0x180015008 <.code4+0x8>
# DISASMX-NEXT: 180015004: d65f03c0 ret
# DISASMX-NEXT: 180015008: f0ffff70 adrp x16, 0x180004000 <.code2>
# DISASMX-NEXT: 18001500c: 91000210 add x16, x16, #0x0
# DISASMX-NEXT: 180015010: d61f0200 br x16
# RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s
# LOADCFGX2: CodeMap [
# LOADCFGX2-NEXT: 0x3000 - 0x3014 ARM64EC
# LOADCFGX2-NEXT: 0x4000 - 0x4014 ARM64
# LOADCFGX2-NEXT: 0x5000 - 0x1402C ARM64EC
# LOADCFGX2-NEXT: 0x15000 - 0x15014 ARM64
# LOADCFGX2-NEXT: ]
#--- funcs.s
.globl main
.globl func1
.globl func2
.section .code1, "xr"
main:
tbz w0, #0, func1
ret
.section .code3$a, "xr"
.space 0x7000
.section .code3$b, "xr"
func1:
tbz w0, #0, func2
ret
.space 1
.section .code3$c, "xr"
.space 0x8000
.section .code3$d, "xr"
.align 2
func2:
tbz w0, #0, main
ret
#--- space.s
.section .code2$a, "xr"
.space 0x100
.section .code2$b, "xr"
.space 0x100
.section .code2$c, "xr"
.space 0x100
#--- native-funcs.s
.globl nmain
.globl nfunc
.section .code2, "xr"
nmain:
tbz w0, #0, nfunc
ret
.section .code4, "xr"
.align 2
nfunc:
tbz w0, #0, nmain
ret