[BOLT] Avoid EH trampolines for PIEs/DSOs (#117106)

We used to emit EH trampolines for PIE/DSO whenever a function fragment
contained a landing pad outside of it. However, it is common to have all
landing pads in a cold fragment even when their throwers are in a hot
one.

To reduce the number of trampolines, analyze landing pads for any given
function fragment, and if they all belong to the same (possibly
different) fragment, designate that fragment as a landing pad fragment
for the "thrower" fragment. Later, emit landing pad fragment symbol as
an LPStart for the thrower LSDA.
This commit is contained in:
Maksim Panchenko 2024-11-21 18:18:30 -08:00 committed by GitHub
parent dc580c9cf6
commit 105ecd8bb2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 200 additions and 34 deletions

View File

@ -527,6 +527,11 @@ private:
/// fragment of the function. /// fragment of the function.
SmallVector<MCSymbol *, 0> LSDASymbols; SmallVector<MCSymbol *, 0> LSDASymbols;
/// Each function fragment may have another fragment containing all landing
/// pads for it. If that's the case, the LP fragment will be stored in the
/// vector below with indexing starting with the main fragment.
SmallVector<std::optional<FragmentNum>, 0> LPFragments;
/// Map to discover which CFIs are attached to a given instruction offset. /// Map to discover which CFIs are attached to a given instruction offset.
/// Maps an instruction offset into a FrameInstructions offset. /// Maps an instruction offset into a FrameInstructions offset.
/// This is only relevant to the buildCFG phase and is discarded afterwards. /// This is only relevant to the buildCFG phase and is discarded afterwards.
@ -1885,6 +1890,42 @@ public:
return LSDASymbols[F.get()]; return LSDASymbols[F.get()];
} }
/// If all landing pads for the function fragment \p F are located in fragment
/// \p LPF, designate \p LPF as a landing-pad fragment for \p F. Passing
/// std::nullopt in LPF, means that landing pads for \p F are located in more
/// than one fragment.
void setLPFragment(const FragmentNum F, std::optional<FragmentNum> LPF) {
if (F.get() >= LPFragments.size())
LPFragments.resize(F.get() + 1);
LPFragments[F.get()] = LPF;
}
/// If function fragment \p F has a designated landing pad fragment, i.e. a
/// fragment that contains all landing pads for throwers in \p F, then return
/// that landing pad fragment number. If \p F does not need landing pads,
/// return \p F. Return nullptr if landing pads for \p F are scattered among
/// several function fragments.
std::optional<FragmentNum> getLPFragment(const FragmentNum F) {
if (!isSplit()) {
assert(F == FragmentNum::main() && "Invalid fragment number");
return FragmentNum::main();
}
if (F.get() >= LPFragments.size())
return std::nullopt;
return LPFragments[F.get()];
}
/// Return a symbol corresponding to a landing pad fragment for fragment \p F.
/// See getLPFragment().
MCSymbol *getLPStartSymbol(const FragmentNum F) {
if (std::optional<FragmentNum> LPFragment = getLPFragment(F))
return getSymbol(*LPFragment);
return nullptr;
}
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; } void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
uint64_t getOutputDataAddress() const { return OutputDataOffset; } uint64_t getOutputDataAddress() const { return OutputDataOffset; }

View File

@ -140,7 +140,7 @@ private:
void emitCFIInstruction(const MCCFIInstruction &Inst) const; void emitCFIInstruction(const MCCFIInstruction &Inst) const;
/// Emit exception handling ranges for the function. /// Emit exception handling ranges for the function fragment.
void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF); void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF);
/// Emit line number information corresponding to \p NewLoc. \p PrevLoc /// Emit line number information corresponding to \p NewLoc. \p PrevLoc
@ -915,15 +915,15 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
// Emit the LSDA header. // Emit the LSDA header.
// If LPStart is omitted, then the start of the FDE is used as a base for // If LPStart is omitted, then the start of the FDE is used as a base for
// landing pad displacements. Then, if a cold fragment starts with a landing // landing pad displacements. Then, if a cold fragment starts with
// pad, this means that the first landing pad offset will be 0. However, C++ // a landing pad, this means that the first landing pad offset will be 0.
// runtime treats 0 as if there is no landing pad present, thus we *must* emit // However, C++ runtime will treat 0 as if there is no landing pad, thus we
// non-zero offsets for all valid LPs. // cannot emit LP offset as 0.
// //
// As a solution, for fixed-address binaries we set LPStart to 0, and for // As a solution, for fixed-address binaries we set LPStart to 0, and for
// position-independent binaries we set LP start to FDE start minus one byte // position-independent binaries we offset LP start by one byte.
// for FDEs that start with a landing pad. bool NeedsLPAdjustment = false;
const bool NeedsLPAdjustment = !FF.empty() && FF.front()->isLandingPad(); const MCSymbol *LPStartSymbol = nullptr;
std::function<void(const MCSymbol *)> emitLandingPad; std::function<void(const MCSymbol *)> emitLandingPad;
if (BC.HasFixedLoadAddress) { if (BC.HasFixedLoadAddress) {
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
@ -935,15 +935,24 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
Streamer.emitIntValue(0, 4); Streamer.emitIntValue(0, 4);
}; };
} else { } else {
if (NeedsLPAdjustment) { std::optional<FragmentNum> LPFN = BF.getLPFragment(FF.getFragmentNum());
// Use relative LPStart format and emit LPStart as [SymbolStart - 1]. LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
assert(LPFN && LPStartSymbol && "Expected LPStart symbol to be set");
const FunctionFragment &LPFragment = BF.getLayout().getFragment(*LPFN);
NeedsLPAdjustment =
(!LPFragment.empty() && LPFragment.front()->isLandingPad());
// Emit LPStart encoding and optionally LPStart.
if (NeedsLPAdjustment || LPStartSymbol != StartSymbol) {
Streamer.emitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1); Streamer.emitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
MCSymbol *DotSymbol = BC.Ctx->createTempSymbol("LPBase"); MCSymbol *DotSymbol = BC.Ctx->createTempSymbol("LPBase");
Streamer.emitLabel(DotSymbol); Streamer.emitLabel(DotSymbol);
const MCExpr *LPStartExpr = MCBinaryExpr::createSub( const MCExpr *LPStartExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx), MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx),
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx); MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment)
LPStartExpr = MCBinaryExpr::createSub( LPStartExpr = MCBinaryExpr::createSub(
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx); LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
Streamer.emitValue(LPStartExpr, 4); Streamer.emitValue(LPStartExpr, 4);
@ -955,7 +964,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
if (LPSymbol) { if (LPSymbol) {
const MCExpr *LPOffsetExpr = MCBinaryExpr::createSub( const MCExpr *LPOffsetExpr = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(LPSymbol, *BC.Ctx), MCSymbolRefExpr::create(LPSymbol, *BC.Ctx),
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx), *BC.Ctx); MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx), *BC.Ctx);
if (NeedsLPAdjustment) if (NeedsLPAdjustment)
LPOffsetExpr = MCBinaryExpr::createAdd( LPOffsetExpr = MCBinaryExpr::createAdd(
LPOffsetExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx); LPOffsetExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);

View File

@ -901,8 +901,43 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
// have to be placed in the same fragment. When we split them, create // have to be placed in the same fragment. When we split them, create
// trampoline landing pads that will redirect the execution to real LPs. // trampoline landing pads that will redirect the execution to real LPs.
TrampolineSetType Trampolines; TrampolineSetType Trampolines;
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) {
// If all landing pads for this fragment are grouped in one (potentially
// different) fragment, we can set LPStart to the start of that fragment
// and avoid trampoline code.
bool NeedsTrampolines = false;
for (FunctionFragment &FF : BF.getLayout().fragments()) {
// Vector of fragments that contain landing pads for this fragment.
SmallVector<FragmentNum, 4> LandingPadFragments;
for (const BinaryBasicBlock *BB : FF)
for (const BinaryBasicBlock *LPB : BB->landing_pads())
LandingPadFragments.push_back(LPB->getFragmentNum());
// Eliminate duplicate entries from the vector.
llvm::sort(LandingPadFragments);
auto Last = llvm::unique(LandingPadFragments);
LandingPadFragments.erase(Last, LandingPadFragments.end());
if (LandingPadFragments.size() == 0) {
// If the fragment has no landing pads, we can safely set itself as its
// landing pad fragment.
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
} else if (LandingPadFragments.size() == 1) {
BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front());
} else {
NeedsTrampolines = true;
break;
}
}
// Trampolines guarantee that all landing pads for any given fragment will
// be contained in the same fragment.
if (NeedsTrampolines) {
for (FunctionFragment &FF : BF.getLayout().fragments())
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
Trampolines = createEHTrampolines(BF); Trampolines = createEHTrampolines(BF);
}
}
// Check the new size to see if it's worth splitting the function. // Check the new size to see if it's worth splitting the function.
if (BC.isX86() && LayoutUpdated) { if (BC.isX86() && LayoutUpdated) {
@ -933,6 +968,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
} }
} }
// Restore LP fragment for the main fragment if the splitting was undone.
if (BF.hasEHRanges() && !BF.isSplit())
BF.setLPFragment(FragmentNum::main(), FragmentNum::main());
// Fix branches if the splitting decision of the pass after function // Fix branches if the splitting decision of the pass after function
// reordering is different from that of the pass before function reordering. // reordering is different from that of the pass before function reordering.
if (LayoutUpdated && BC.HasFinalizedFunctionOrder) if (LayoutUpdated && BC.HasFinalizedFunctionOrder)

View File

@ -0,0 +1,86 @@
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: ld.lld --pie %t.o -o %t.exe -q
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --split-functions --split-eh \
# RUN: --split-all-cold --print-after-lowering --print-only=_start 2>&1 \
# RUN: | FileCheck %s
## _start has two landing pads: one hot and one cold. Hence, BOLT will introduce
## a landing pad trampoline. However, the trampoline code will make the main
## split fragment larger than the whole function before split. Then BOLT will
## undo the splitting and remove the trampoline.
# CHECK: Binary Function "_start"
# CHECK: IsSplit :
# CHECK-SAME: 0
## Check that a landing pad trampoline was created, but contains no instructions
## and falls though to the real landing pad.
# CHECK: {{^[^[:space:]]+}} (0 instructions
# CHECK-NEXT: Landing Pad{{$}}
# CHECK: Exec Count
# CHECK-SAME: : 0
# CHECK: Successors:
# CHECK-SAME: [[LP:[^[:space:]]+]]
# CHECK-EMPTY:
# CHECK-NEXT: [[LP]]
.text
.global foo
.type foo, %function
foo:
.cfi_startproc
ret
.cfi_endproc
.size foo, .-foo
.globl _start
.type _start, %function
_start:
# FDATA: 0 [unknown] 0 1 _start 0 1 100
.Lfunc_begin0:
.cfi_startproc
.cfi_lsda 27, .Lexception0
call foo
.Ltmp0:
call foo
.Ltmp1:
ret
## Cold landing pad.
.LLP1:
ret
## Hot landing pad.
LLP0:
# FDATA: 0 [unknown] 0 1 _start #LLP0# 1 100
ret
.cfi_endproc
.Lfunc_end0:
.size _start, .-_start
## EH table.
.section .gcc_except_table,"a",@progbits
.p2align 2
GCC_except_table0:
.Lexception0:
.byte 255 # @LPStart Encoding = omit
.byte 255 # @TType Encoding = omit
.byte 1 # Call site Encoding = uleb128
.uleb128 .Lcst_end0-.Lcst_begin0
.Lcst_begin0:
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
.uleb128 LLP0-.Lfunc_begin0 # jumps to LLP0
.byte 0 # On action: cleanup
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
.uleb128 .LLP1-.Lfunc_begin0 # jumps to .LLP1
.byte 0 # On action: cleanup
.Lcst_end0:

View File

@ -11,25 +11,16 @@ RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
RUN: --split-functions --split-eh --print-after-lowering \ RUN: --split-functions --split-eh --print-after-lowering \
RUN: --print-only=_Z10throw_testiPPc 2>&1 | FileCheck %s RUN: --print-only=_Z10throw_testiPPc 2>&1 | FileCheck %s
## Hot code in the test case gets larger after splitting because of jump ## Check that a landing pad is split from its thrower and does not require a
## instruction relaxation. Check that BOLT reverts the split correctly. ## trampoline LP.
CHECK: Binary Function "_Z10throw_testiPPc" CHECK: Binary Function "_Z10throw_testiPPc"
CHECK: IsSplit : CHECK: IsSplit :
CHECK-SAME: 0 CHECK-SAME: 1
CHECK: callq {{.*}} # handler: [[LPAD:.*]];
## Check that the landing pad trampoline was created, but contains no CHECK-NOT: Landing Pad{{$}}
## instructions and falls to the real landing pad. CHECK: HOT-COLD SPLIT POINT
CHECK: {{^[^[:space:]]+}} (0 instructions CHECK: {{^}}[[LPAD]]
CHECK-NEXT: Landing Pad{{$}} CHECK-NEXT: Landing Pad
CHECK: Exec Count
CHECK-SAME: : 0
CHECK: Successors:
CHECK-SAME: [[LP:[^[:space:]]+]]
CHECK-EMPTY:
CHECK-NEXT: [[LP]]
CHECK-DAG: Exec Count
CHECK-NOT: Exec Count
CHECK-DAG: callq __cxa_begin_catch
## Verify the output still executes correctly when the exception path is being ## Verify the output still executes correctly when the exception path is being
## taken. ## taken.