[lld-macho] Fix thunks for non-__text TEXT sections (#99052)

This supersedes https://github.com/llvm/llvm-project/pull/87818 and
fixes https://github.com/llvm/llvm-project/issues/52767

When calculating arm64 thunks, we make a few assumptions that may not
hold when considering code sections outside of `__text`:

1. That a section needs thunks only if its size is larger than the
branch range.
2. That any calls into `__stubs` are necessarily forward jumps (that is,
the section with the jump is ordered before `__stubs`)

Sections like this exist in the wild, most prominently the
`__lcxx_overrides` section introduced in
https://github.com/llvm/llvm-project/pull/69498

This change:
- Ensures that if one section in `__TEXT` gets thunks, all of them do.
- Makes all code sections in `__TEXT` contiguous (and guaranteed to be
placed before `__stubs`)
This commit is contained in:
Leonard Grey 2024-07-23 11:02:55 -04:00 committed by GitHub
parent 81e2a57b41
commit 58f3c5e696
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 112 additions and 22 deletions

View File

@ -26,6 +26,7 @@ add_lld_library(lldMachO
OutputSegment.cpp
Relocations.cpp
SectionPriorities.cpp
Sections.cpp
SymbolTable.cpp
Symbols.cpp
SyntheticSections.cpp

View File

@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
uint64_t isecAddr = addr;
for (ConcatInputSection *isec : inputs)
isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
if (isecAddr - addr + in.stubs->getSize() <=
// Other sections besides __text might be small enough to pass this
// test but nevertheless need thunks for calling into other sections.
// An imperfect heuristic to use in this case is that if a section
// we've already processed in this segment needs thunks, so do the
// rest.
bool needsThunks = parent && parent->needsThunks;
if (!needsThunks &&
isecAddr - addr + in.stubs->getSize() <=
std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
if (parent) {
parent->needsThunks = true;
}
for (ConcatInputSection *isec : inputs) {
for (Reloc &r : isec->relocs) {
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))

View File

@ -11,6 +11,7 @@
#include "Config.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
}
bool macho::isCodeSection(const InputSection *isec) {
uint32_t type = sectionType(isec->getFlags());
if (type != S_REGULAR && type != S_COALESCED)
return false;
uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
if (attr == S_ATTR_PURE_INSTRUCTIONS)
return true;
if (isec->getSegName() == segment_names::text)
return StringSwitch<bool>(isec->getName())
.Cases(section_names::textCoalNt, section_names::staticInit, true)
.Default(false);
return false;
return sections::isCodeSection(isec->getName(), isec->getSegName(),
isec->getFlags());
}
bool macho::isCfStringSection(const InputSection *isec) {

View File

@ -9,6 +9,7 @@
#include "OutputSegment.h"
#include "ConcatOutputSection.h"
#include "InputSection.h"
#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
@ -89,9 +90,20 @@ static int sectionOrder(OutputSection *osec) {
StringRef segname = osec->parent->name;
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
if (osec->name == section_names::header)
return -7;
// `__text` needs to precede the other code sections since its
// expected to be the largest. This means in effect that it will
// be the section that determines whether we need thunks or not.
if (osec->name == section_names::text)
return -6;
// Ensure all code sections are contiguous with `__text` for thunk
// calculations.
if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
osec->name != section_names::stubHelper) {
return -5;
}
return StringSwitch<int>(osec->name)
.Case(section_names::header, -6)
.Case(section_names::text, -5)
.Case(section_names::stubs, -4)
.Case(section_names::stubHelper, -3)
.Case(section_names::objcStubs, -2)

View File

@ -57,6 +57,7 @@ public:
uint32_t initProt = 0;
uint32_t flags = 0;
uint8_t index;
bool needsThunks = false;
llvm::TinyPtrVector<Defined *> segmentStartSymbols;
llvm::TinyPtrVector<Defined *> segmentEndSymbols;

36
lld/MachO/Sections.cpp Normal file
View File

@ -0,0 +1,36 @@
//===- Sections.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Sections.h"
#include "InputSection.h"
#include "OutputSegment.h"
#include "llvm/ADT/StringSwitch.h"
using namespace llvm;
using namespace llvm::MachO;
namespace lld::macho::sections {
bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
uint32_t type = sectionType(flags);
if (type != S_REGULAR && type != S_COALESCED)
return false;
uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
if (attr == S_ATTR_PURE_INSTRUCTIONS)
return true;
if (segName == segment_names::text)
return StringSwitch<bool>(name)
.Cases(section_names::textCoalNt, section_names::staticInit, true)
.Default(false);
return false;
}
} // namespace lld::macho::sections

19
lld/MachO/Sections.h Normal file
View File

@ -0,0 +1,19 @@
//===- Sections.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_SECTIONS_H
#define LLD_MACHO_SECTIONS_H
#include "llvm/ADT/StringRef.h"
namespace lld::macho::sections {
bool isCodeSection(llvm::StringRef name, llvm::StringRef segName,
uint32_t flags);
} // namespace lld::macho::sections
#endif // #ifndef LLD_MACHO_SECTIONS_H

View File

@ -7,12 +7,13 @@
## (3) a second thunk is created when the first one goes out of range
## (4) early calls to a dylib stub use a thunk, and later calls the stub
## directly
## (5) Thunks are created for all sections in the text segment with branches.
## Notes:
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
# CHECK: Disassembly of section __TEXT,__text:
@ -164,6 +165,10 @@
# CHECK: adrp x16, 0x[[#%x, F_PAGE]]
# CHECK: add x16, x16, #[[#F_OFFSET]]
# CHECK: Disassembly of section __TEXT,__lcxx_override:
# CHECK: <_z>:
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>
# CHECK: Disassembly of section __TEXT,__stubs:
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
@ -300,3 +305,17 @@ _main:
bl _h
bl ___nan
ret
.section __TEXT,__cstring
.space 0x4000000
.section __TEXT,__lcxx_override,regular,pure_instructions
.globl _z
.no_dead_strip _z
.p2align 2
_z:
bl _a
## Ensure calling into stubs works
bl _extern_sym
ret

View File

@ -23,18 +23,20 @@
# CHECK-12-NEXT: __cstring
# CHECK-21: __text
## `foo` always sorts next to `__text` since it's a code section
## and needs to be adjacent for arm64 thunk calculations
# CHECK-21-NEXT: foo
# CHECK-21-NEXT: __cstring
# CHECK-21-NEXT: bar
# CHECK-21-NEXT: foo
# CHECK-SYNTHETIC-ORDER: __text
# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
# CHECK-SYNTHETIC-ORDER-NEXT: __cstring
# CHECK-SYNTHETIC-ORDER-NEXT: bar
# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
# CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
@ -52,5 +54,5 @@
.asciz ""
.section __TEXT,bar
.space 1
.section __TEXT,foo
.section __TEXT,foo,regular,pure_instructions
.space 1

View File

@ -44,6 +44,7 @@ static_library("MachO") {
"OutputSegment.cpp",
"Relocations.cpp",
"SectionPriorities.cpp",
"Sections.cpp",
"SymbolTable.cpp",
"Symbols.cpp",
"SyntheticSections.cpp",