llvm-project/bolt/lib/Core/Exceptions.cpp
Maksim Panchenko 1b8e0cf090
[BOLT] Never emit "large" functions (#115974)
"Large" functions are functions that are too big to fit into their
original slots after code modifications. CheckLargeFunctions pass is
designed to prevent such functions from emission. Extend this pass to
work with functions with constant islands.

Now that CheckLargeFunctions covers all functions, it guarantees that we
will never see such functions after code emission on all platforms
(previously it was guaranteed on x86 only). Hence, we can get rid of
RewriteInstance extensions that were meant to support "large" functions.
2024-11-13 09:58:44 -08:00

894 lines
33 KiB
C++

//===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements functions for handling C++ exception meta data.
//
// Some of the code is taken from examples/ExceptionDemo
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/Exceptions.h"
#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-exceptions"
using namespace llvm::dwarf;
namespace opts {
extern llvm::cl::OptionCategory BoltCategory;
extern llvm::cl::opt<unsigned> Verbosity;
static llvm::cl::opt<bool>
PrintExceptions("print-exceptions",
llvm::cl::desc("print exception handling data"),
llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
} // namespace opts
namespace llvm {
namespace bolt {
// Read and dump the .gcc_exception_table section entry.
//
// .gcc_except_table section contains a set of Language-Specific Data Areas -
// a fancy name for exception handling tables. There's one LSDA entry per
// function. However, we can't actually tell which function LSDA refers to
// unless we parse .eh_frame entry that refers to the LSDA.
// Then inside LSDA most addresses are encoded relative to the function start,
// so we need the function context in order to get to real addresses.
//
// The best visual representation of the tables comprising LSDA and
// relationships between them is illustrated at:
// https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
// Keep in mind that GCC implementation deviates slightly from that document.
//
// To summarize, there are 4 tables in LSDA: call site table, actions table,
// types table, and types index table (for indirection). The main table contains
// call site entries. Each call site includes a PC range that can throw an
// exception, a handler (landing pad), and a reference to an entry in the action
// table. The handler and/or action could be 0. The action entry is a head
// of a list of actions associated with a call site. The action table contains
// all such lists (it could be optimized to share list tails). Each action could
// be either to catch an exception of a given type, to perform a cleanup, or to
// propagate the exception after filtering it out (e.g. to make sure function
// exception specification is not violated). Catch action contains a reference
// to an entry in the type table, and filter action refers to an entry in the
// type index table to encode a set of types to filter.
//
// Call site table follows LSDA header. Action table immediately follows the
// call site table.
//
// Both types table and type index table start at the same location, but they
// grow in opposite directions (types go up, indices go down). The beginning of
// these tables is encoded in LSDA header. Sizes for both of the tables are not
// included anywhere.
//
// We have to parse all of the tables to determine their sizes. Then we have
// to parse the call site table and associate discovered information with
// actual call instructions and landing pad blocks.
//
// For the purpose of rewriting exception handling tables, we can reuse action,
// and type index tables in their original binary format.
//
// Type table could be encoded using position-independent references, and thus
// may require relocation.
//
// Ideally we should be able to re-write LSDA in-place, without the need to
// allocate a new space for it. Sadly there's no guarantee that the new call
// site table will be the same size as GCC uses uleb encodings for PC offsets.
//
// Note: some functions have LSDA entries with 0 call site entries.
Error BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
uint64_t LSDASectionAddress) {
assert(CurrentState == State::Disassembled && "unexpected function state");
if (!getLSDAAddress())
return Error::success();
DWARFDataExtractor Data(
StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
LSDASectionData.size()),
BC.AsmInfo->isLittleEndian(), BC.AsmInfo->getCodePointerSize());
uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
assert(Data.isValidOffset(Offset) && "wrong LSDA address");
const uint8_t LPStartEncoding = Data.getU8(&Offset);
uint64_t LPStart = Address;
if (LPStartEncoding != dwarf::DW_EH_PE_omit) {
std::optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
&Offset, LPStartEncoding, Offset + LSDASectionAddress);
if (!MaybeLPStart) {
BC.errs() << "BOLT-ERROR: unsupported LPStartEncoding: "
<< (unsigned)LPStartEncoding << '\n';
return createFatalBOLTError("");
}
LPStart = *MaybeLPStart;
}
const uint8_t TTypeEncoding = Data.getU8(&Offset);
LSDATypeEncoding = TTypeEncoding;
size_t TTypeEncodingSize = 0;
uintptr_t TTypeEnd = 0;
if (TTypeEncoding != DW_EH_PE_omit) {
TTypeEnd = Data.getULEB128(&Offset);
TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
}
if (opts::PrintExceptions) {
BC.outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
<< " for function " << *this << "]:\n";
BC.outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
<< '\n';
BC.outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
BC.outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding)
<< '\n';
BC.outs() << "TType End = " << TTypeEnd << '\n';
}
// Table to store list of indices in type table. Entries are uleb128 values.
const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
// Offset past the last decoded index.
uint64_t MaxTypeIndexTableOffset = 0;
// Max positive index used in type table.
unsigned MaxTypeIndex = 0;
// The actual type info table starts at the same location, but grows in
// opposite direction. TTypeEncoding is used to encode stored values.
const uint64_t TypeTableStart = Offset + TTypeEnd;
uint8_t CallSiteEncoding = Data.getU8(&Offset);
uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
uint64_t CallSiteTableStart = Offset;
uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
uint64_t CallSitePtr = CallSiteTableStart;
uint64_t ActionTableStart = CallSiteTableEnd;
if (opts::PrintExceptions) {
BC.outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
BC.outs() << "CallSite table length = " << CallSiteTableLength << '\n';
BC.outs() << '\n';
}
this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
const uint64_t RangeBase = getAddress();
while (CallSitePtr < CallSiteTableEnd) {
uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
CallSitePtr + LSDASectionAddress);
uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
CallSitePtr + LSDASectionAddress);
uint64_t LandingPad = *Data.getEncodedPointer(
&CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
if (LandingPad)
LandingPad += LPStart;
if (opts::PrintExceptions) {
BC.outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
<< ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
<< "); landing pad: 0x" << Twine::utohexstr(LandingPad)
<< "; action entry: 0x" << Twine::utohexstr(ActionEntry)
<< "\n";
BC.outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
<< '\n';
}
// Create a handler entry if necessary.
MCSymbol *LPSymbol = nullptr;
if (LandingPad) {
// Verify if landing pad code is located outside current function
// Support landing pad to builtin_unreachable
if (LandingPad < Address || LandingPad > Address + getSize()) {
BinaryFunction *Fragment =
BC.getBinaryFunctionContainingAddress(LandingPad);
assert(Fragment != nullptr &&
"BOLT-ERROR: cannot find landing pad fragment");
BC.addInterproceduralReference(this, Fragment->getAddress());
BC.processInterproceduralReferences();
assert(BC.areRelatedFragments(this, Fragment) &&
"BOLT-ERROR: cannot have landing pads in different functions");
setHasIndirectTargetToSplitFragment(true);
BC.addFragmentsToSkip(this);
return Error::success();
}
const uint64_t LPOffset = LandingPad - getAddress();
if (!getInstructionAtOffset(LPOffset)) {
if (opts::Verbosity >= 1)
BC.errs() << "BOLT-WARNING: landing pad "
<< Twine::utohexstr(LPOffset)
<< " not pointing to an instruction in function " << *this
<< " - ignoring.\n";
} else {
auto Label = Labels.find(LPOffset);
if (Label != Labels.end()) {
LPSymbol = Label->second;
} else {
LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
Labels[LPOffset] = LPSymbol;
}
}
}
// Mark all call instructions in the range.
auto II = Instructions.find(Start);
auto IE = Instructions.end();
assert(II != IE && "exception range not pointing to an instruction");
do {
MCInst &Instruction = II->second;
if (BC.MIB->isCall(Instruction) &&
!BC.MIB->getConditionalTailCall(Instruction)) {
assert(!BC.MIB->isInvoke(Instruction) &&
"overlapping exception ranges detected");
// Add extra operands to a call instruction making it an invoke from
// now on.
BC.MIB->addEHInfo(Instruction,
MCPlus::MCLandingPad(LPSymbol, ActionEntry));
}
++II;
} while (II != IE && II->first < Start + Length);
if (ActionEntry != 0) {
auto printType = [&](int Index, raw_ostream &OS) {
assert(Index > 0 && "only positive indices are valid");
uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
uint64_t TypeAddress =
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
TypeAddress = 0;
if (TypeAddress == 0) {
OS << "<all>";
return;
}
if (TTypeEncoding & DW_EH_PE_indirect) {
ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
OS << TypeSymBD->getName();
else
OS << "0x" << Twine::utohexstr(TypeAddress);
};
if (opts::PrintExceptions)
BC.outs() << " actions: ";
uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
int64_t ActionType;
int64_t ActionNext;
const char *Sep = "";
do {
ActionType = Data.getSLEB128(&ActionPtr);
const uint32_t Self = ActionPtr;
ActionNext = Data.getSLEB128(&ActionPtr);
if (opts::PrintExceptions)
BC.outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
if (ActionType == 0) {
if (opts::PrintExceptions)
BC.outs() << "cleanup";
} else if (ActionType > 0) {
// It's an index into a type table.
MaxTypeIndex =
std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
if (opts::PrintExceptions) {
BC.outs() << "catch type ";
printType(ActionType, BC.outs());
}
} else { // ActionType < 0
if (opts::PrintExceptions)
BC.outs() << "filter exception types ";
const char *TSep = "";
// ActionType is a negative *byte* offset into *uleb128-encoded* table
// of indices with base 1.
// E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
// encoded using uleb128 thus we cannot directly dereference them.
uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
if (opts::PrintExceptions) {
BC.outs() << TSep;
printType(Index, BC.outs());
TSep = ", ";
}
}
MaxTypeIndexTableOffset = std::max(
MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
}
Sep = "; ";
ActionPtr = Self + ActionNext;
} while (ActionNext);
if (opts::PrintExceptions)
BC.outs() << '\n';
}
}
if (opts::PrintExceptions)
BC.outs() << '\n';
assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
Data.getData().size() &&
"LSDA entry has crossed section boundary");
if (TTypeEnd) {
LSDAActionTable = LSDASectionData.slice(
ActionTableStart, TypeIndexTableStart -
MaxTypeIndex * TTypeEncodingSize -
ActionTableStart);
for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
uint64_t TypeAddress =
*Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
TypeAddress = 0;
if (TTypeEncoding & DW_EH_PE_indirect) {
LSDATypeAddressTable.emplace_back(TypeAddress);
if (TypeAddress) {
ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
assert(PointerOrErr && "failed to decode indirect address");
TypeAddress = *PointerOrErr;
}
}
LSDATypeTable.emplace_back(TypeAddress);
}
LSDATypeIndexTable =
LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
}
return Error::success();
}
void BinaryFunction::updateEHRanges() {
if (getSize() == 0)
return;
assert(CurrentState == State::CFG_Finalized && "unexpected state");
// Build call sites table.
struct EHInfo {
const MCSymbol *LP; // landing pad
uint64_t Action;
};
// Sites to update.
CallSitesList Sites;
for (FunctionFragment &FF : getLayout().fragments()) {
// If previous call can throw, this is its exception handler.
EHInfo PreviousEH = {nullptr, 0};
// Marker for the beginning of exceptions range.
const MCSymbol *StartRange = nullptr;
for (BinaryBasicBlock *const BB : FF) {
for (MCInst &Instr : *BB) {
if (!BC.MIB->isCall(Instr))
continue;
// Instruction can throw an exception that should be handled.
const bool Throws = BC.MIB->isInvoke(Instr);
// Ignore the call if it's a continuation of a no-throw gap.
if (!Throws && !StartRange)
continue;
// Extract exception handling information from the instruction.
const MCSymbol *LP = nullptr;
uint64_t Action = 0;
if (const std::optional<MCPlus::MCLandingPad> EHInfo =
BC.MIB->getEHInfo(Instr))
std::tie(LP, Action) = *EHInfo;
// No action if the exception handler has not changed.
if (Throws && StartRange && PreviousEH.LP == LP &&
PreviousEH.Action == Action)
continue;
// Same symbol is used for the beginning and the end of the range.
MCSymbol *EHSymbol;
if (MCSymbol *InstrLabel = BC.MIB->getInstLabel(Instr)) {
EHSymbol = InstrLabel;
} else {
std::unique_lock<llvm::sys::RWMutex> Lock(BC.CtxMutex);
EHSymbol = BC.MIB->getOrCreateInstLabel(Instr, "EH", BC.Ctx.get());
}
// At this point we could be in one of the following states:
//
// I. Exception handler has changed and we need to close previous range
// and start a new one.
//
// II. Start a new exception range after the gap.
//
// III. Close current exception range and start a new gap.
const MCSymbol *EndRange;
if (StartRange) {
// I, III:
EndRange = EHSymbol;
} else {
// II:
StartRange = EHSymbol;
EndRange = nullptr;
}
// Close the previous range.
if (EndRange)
Sites.emplace_back(
FF.getFragmentNum(),
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
if (Throws) {
// I, II:
StartRange = EHSymbol;
PreviousEH = EHInfo{LP, Action};
} else {
StartRange = nullptr;
}
}
}
// Check if we need to close the range.
if (StartRange) {
const MCSymbol *EndRange = getFunctionEndLabel(FF.getFragmentNum());
Sites.emplace_back(
FF.getFragmentNum(),
CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
}
}
addCallSites(Sites);
}
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
CFIReaderWriter::CFIReaderWriter(BinaryContext &BC,
const DWARFDebugFrame &EHFrame)
: BC(BC) {
// Prepare FDEs for fast lookup
for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
// Skip CIEs.
if (!CurFDE)
continue;
// There could me multiple FDEs with the same initial address, and perhaps
// different sizes (address ranges). Use the first entry with non-zero size.
auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
if (CurFDE->getAddressRange()) {
if (FDEI->second->getAddressRange() == 0) {
FDEI->second = CurFDE;
} else if (opts::Verbosity > 0) {
BC.errs() << "BOLT-WARNING: different FDEs for function at 0x"
<< Twine::utohexstr(FDEI->first)
<< " detected; sizes: " << FDEI->second->getAddressRange()
<< " and " << CurFDE->getAddressRange() << '\n';
}
}
} else {
FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
}
}
}
bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
uint64_t Address = Function.getAddress();
auto I = FDEs.find(Address);
// Ignore zero-length FDE ranges.
if (I == FDEs.end() || !I->second->getAddressRange())
return true;
const FDE &CurFDE = *I->second;
std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
Function.setLSDAAddress(LSDA ? *LSDA : 0);
uint64_t Offset = Function.getFirstInstructionOffset();
uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
Function.setPersonalityFunction(
*CurFDE.getLinkedCIE()->getPersonalityAddress());
Function.setPersonalityEncoding(
*CurFDE.getLinkedCIE()->getPersonalityEncoding());
}
auto decodeFrameInstruction = [this, &Function, &Offset, Address,
CodeAlignment, DataAlignment](
const CFIProgram::Instruction &Instr) {
uint8_t Opcode = Instr.Opcode;
if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
switch (Instr.Opcode) {
case DW_CFA_nop:
break;
case DW_CFA_advance_loc4:
case DW_CFA_advance_loc2:
case DW_CFA_advance_loc1:
case DW_CFA_advance_loc:
// Advance our current address
Offset += CodeAlignment * int64_t(Instr.Ops[0]);
break;
case DW_CFA_offset_extended_sf:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::createOffset(
nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
break;
case DW_CFA_offset_extended:
case DW_CFA_offset:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
DataAlignment * Instr.Ops[1]));
break;
case DW_CFA_restore_extended:
case DW_CFA_restore:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
break;
case DW_CFA_set_loc:
assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
assert(Instr.Ops[0] <= Address + Function.getSize() &&
"set_loc out of function bounds");
Offset = Instr.Ops[0] - Address;
break;
case DW_CFA_undefined:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
break;
case DW_CFA_same_value:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
break;
case DW_CFA_register:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
Instr.Ops[1]));
break;
case DW_CFA_remember_state:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createRememberState(nullptr));
break;
case DW_CFA_restore_state:
Function.addCFIInstruction(Offset,
MCCFIInstruction::createRestoreState(nullptr));
break;
case DW_CFA_def_cfa:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
break;
case DW_CFA_def_cfa_sf:
Function.addCFIInstruction(
Offset,
MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
DataAlignment * int64_t(Instr.Ops[1])));
break;
case DW_CFA_def_cfa_register:
Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
nullptr, Instr.Ops[0]));
break;
case DW_CFA_def_cfa_offset:
Function.addCFIInstruction(
Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
break;
case DW_CFA_def_cfa_offset_sf:
Function.addCFIInstruction(
Offset, MCCFIInstruction::cfiDefCfaOffset(
nullptr, DataAlignment * int64_t(Instr.Ops[0])));
break;
case DW_CFA_GNU_args_size:
Function.addCFIInstruction(
Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
Function.setUsesGnuArgsSize();
break;
case DW_CFA_val_offset_sf:
case DW_CFA_val_offset:
if (opts::Verbosity >= 1) {
BC.errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
}
return false;
case DW_CFA_def_cfa_expression:
case DW_CFA_val_expression:
case DW_CFA_expression: {
StringRef ExprBytes = Instr.Expression->getData();
std::string Str;
raw_string_ostream OS(Str);
// Manually encode this instruction using CFI escape
OS << Opcode;
if (Opcode != DW_CFA_def_cfa_expression)
encodeULEB128(Instr.Ops[0], OS);
encodeULEB128(ExprBytes.size(), OS);
OS << ExprBytes;
Function.addCFIInstruction(
Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
break;
}
case DW_CFA_MIPS_advance_loc8:
if (opts::Verbosity >= 1)
BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
return false;
case DW_CFA_GNU_window_save:
// DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same
// id but mean different things. The latter is used in AArch64.
if (Function.getBinaryContext().isAArch64()) {
Function.addCFIInstruction(
Offset, MCCFIInstruction::createNegateRAState(nullptr));
break;
}
if (opts::Verbosity >= 1)
BC.errs() << "BOLT-WARNING: DW_CFA_GNU_window_save unimplemented\n";
return false;
case DW_CFA_lo_user:
case DW_CFA_hi_user:
if (opts::Verbosity >= 1)
BC.errs() << "BOLT-WARNING: DW_CFA_*_user unimplemented\n";
return false;
default:
if (opts::Verbosity >= 1)
BC.errs() << "BOLT-WARNING: Unrecognized CFI instruction: "
<< Instr.Opcode << '\n';
return false;
}
return true;
};
for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
if (!decodeFrameInstruction(Instr))
return false;
for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
if (!decodeFrameInstruction(Instr))
return false;
return true;
}
std::vector<char>
CFIReaderWriter::generateEHFrameHeader(const DWARFDebugFrame &OldEHFrame,
const DWARFDebugFrame &NewEHFrame,
uint64_t EHFrameHeaderAddress) const {
// Common PC -> FDE map to be written into .eh_frame_hdr.
std::map<uint64_t, uint64_t> PCToFDE;
// Initialize PCToFDE using NewEHFrame.
for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
if (FDE == nullptr)
continue;
const uint64_t FuncAddress = FDE->getInitialLocation();
const uint64_t FDEAddress =
NewEHFrame.getEHFrameAddress() + FDE->getOffset();
// Ignore unused FDEs.
if (FuncAddress == 0)
continue;
// Add the address to the map unless we failed to write it.
PCToFDE[FuncAddress] = FDEAddress;
};
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
<< llvm::size(NewEHFrame.entries()) << " entries\n");
// Add entries from the original .eh_frame corresponding to the functions
// that we did not update.
for (const dwarf::FrameEntry &Entry : OldEHFrame) {
const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
if (FDE == nullptr)
continue;
const uint64_t FuncAddress = FDE->getInitialLocation();
const uint64_t FDEAddress =
OldEHFrame.getEHFrameAddress() + FDE->getOffset();
// Add the address if we failed to write it.
if (PCToFDE.count(FuncAddress) == 0) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
<< Twine::utohexstr(FuncAddress) << " is at 0x"
<< Twine::utohexstr(FDEAddress) << '\n');
PCToFDE[FuncAddress] = FDEAddress;
}
};
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
<< llvm::size(OldEHFrame.entries()) << " entries\n");
// Generate a new .eh_frame_hdr based on the new map.
// Header plus table of entries of size 8 bytes.
std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
// Version is 1.
EHFrameHeader[0] = 1;
// Encoding of the eh_frame pointer.
EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
// Encoding of the count field to follow.
EHFrameHeader[2] = DW_EH_PE_udata4;
// Encoding of the table entries - 4-byte offset from the start of the header.
EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
// Address of eh_frame. Use the new one.
support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
// Number of entries in the table (FDE count).
support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
// Write the table at offset 12.
char *Ptr = EHFrameHeader.data();
uint32_t Offset = 12;
for (const auto &PCI : PCToFDE) {
int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
Offset += 4;
int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
Offset += 4;
}
return EHFrameHeader;
}
Error EHFrameParser::parseCIE(uint64_t StartOffset) {
uint8_t Version = Data.getU8(&Offset);
const char *Augmentation = Data.getCStr(&Offset);
StringRef AugmentationString(Augmentation ? Augmentation : "");
uint8_t AddressSize =
Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
Data.setAddressSize(AddressSize);
// Skip segment descriptor size
if (Version >= 4)
Offset += 1;
// Skip code alignment factor
Data.getULEB128(&Offset);
// Skip data alignment
Data.getSLEB128(&Offset);
// Skip return address register
if (Version == 1)
Offset += 1;
else
Data.getULEB128(&Offset);
uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
// Walk the augmentation string to get all the augmentation data.
for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
switch (AugmentationString[i]) {
default:
return createStringError(
errc::invalid_argument,
"unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
case 'L':
LSDAPointerEncoding = Data.getU8(&Offset);
break;
case 'P': {
uint32_t PersonalityEncoding = Data.getU8(&Offset);
std::optional<uint64_t> Personality =
Data.getEncodedPointer(&Offset, PersonalityEncoding,
EHFrameAddress ? EHFrameAddress + Offset : 0);
// Patch personality address
if (Personality)
PatcherCallback(*Personality, Offset, PersonalityEncoding);
break;
}
case 'R':
FDEPointerEncoding = Data.getU8(&Offset);
break;
case 'z':
if (i)
return createStringError(
errc::invalid_argument,
"'z' must be the first character at 0x%" PRIx64, StartOffset);
// Skip augmentation length
Data.getULEB128(&Offset);
break;
case 'S':
case 'B':
break;
}
}
Entries.emplace_back(std::make_unique<CIEInfo>(
FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
CIEs[StartOffset] = &*Entries.back();
return Error::success();
}
Error EHFrameParser::parseFDE(uint64_t CIEPointer,
uint64_t StartStructureOffset) {
std::optional<uint64_t> LSDAAddress;
CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
// The address size is encoded in the CIE we reference.
if (!Cie)
return createStringError(errc::invalid_argument,
"parsing FDE data at 0x%" PRIx64
" failed due to missing CIE",
StartStructureOffset);
// Patch initial location
if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
EHFrameAddress + Offset)) {
PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
}
// Skip address range
Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
// Process augmentation data for this FDE.
StringRef AugmentationString = Cie->AugmentationString;
if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
// Skip augmentation length
Data.getULEB128(&Offset);
LSDAAddress =
Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
EHFrameAddress ? Offset + EHFrameAddress : 0);
// Patch LSDA address
PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
}
return Error::success();
}
Error EHFrameParser::parse() {
while (Data.isValidOffset(Offset)) {
const uint64_t StartOffset = Offset;
uint64_t Length;
DwarfFormat Format;
std::tie(Length, Format) = Data.getInitialLength(&Offset);
// If the Length is 0, then this CIE is a terminator
if (Length == 0)
break;
const uint64_t StartStructureOffset = Offset;
const uint64_t EndStructureOffset = Offset + Length;
Error Err = Error::success();
const uint64_t Id = Data.getRelocatedValue(4, &Offset,
/*SectionIndex=*/nullptr, &Err);
if (Err)
return Err;
if (!Id) {
if (Error Err = parseCIE(StartOffset))
return Err;
} else {
if (Error Err = parseFDE(Id, StartStructureOffset))
return Err;
}
Offset = EndStructureOffset;
}
return Error::success();
}
Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
PatcherCallbackTy PatcherCallback) {
EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
return Parser.parse();
}
} // namespace bolt
} // namespace llvm