[lld-macho] Support EH frame pointer encodings that use sdata4

Previously we only supporting using the system pointer size (aka the `absptr` encoding) because `llvm-mc`'s CFI directives always generate EH frames with that encoding. But libffi uses 4-byte-encoded, hand-rolled EH frames, so this patch adds support for it. Fixes #56576. Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D130804
2022-07-31 20:16:08 -04:00 · 2022-07-31 20:16:08 -04:00 · 6c9f681252
commit 6c9f681252
parent 773d51ce3b
4 changed files with 118 additions and 26 deletions
--- a/lld/MachO/EhFrame.cpp
+++ b/lld/MachO/EhFrame.cpp
@ -58,17 +58,17 @@ uint32_t EhReader::readU32(size_t *off) const {
  return v;
 }
-uint64_t EhReader::readPointer(size_t *off) const {
+uint64_t EhReader::readPointer(size_t *off, uint8_t size) const {
-  if (*off + wordSize > data.size())
+  if (*off + size > data.size())
    failOn(*off, "unexpected end of CIE/FDE");
  uint64_t v;
-  if (wordSize == 8)
+  if (size == 8)
    v = read64le(data.data() + *off);
  else {
-    assert(wordSize == 4);
+    assert(size == 4);
    v = read32le(data.data() + *off);
  }
-  *off += wordSize;
+  *off += size;
  return v;
 }
--- a/lld/MachO/EhFrame.h
+++ b/lld/MachO/EhFrame.h
@ -55,9 +55,8 @@ namespace macho {
 class EhReader {
 public:
-  EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff,
+  EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
-           size_t wordSize)
+      : file(file), data(data), dataOff(dataOff) {}
      : file(file), data(data), dataOff(dataOff), wordSize(wordSize) {}
  size_t size() const { return data.size(); }
  // Read and validate the length field.
  uint64_t readLength(size_t *off) const;
@ -65,7 +64,7 @@ public:
  void skipValidLength(size_t *off) const;
  uint8_t readByte(size_t *off) const;
  uint32_t readU32(size_t *off) const;
-  uint64_t readPointer(size_t *off) const;
+  uint64_t readPointer(size_t *off, uint8_t size) const;
  StringRef readString(size_t *off) const;
  void skipLeb128(size_t *off) const;
  void failOn(size_t errOff, const Twine &msg) const;
@ -76,7 +75,6 @@ private:
  // The offset of the data array within its section. Used only for error
  // reporting.
  const size_t dataOff;
  size_t wordSize;
 };
 // The EH frame format, when emitted by llvm-mc, consists of a number of
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@ -385,7 +385,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
 }
 void ObjFile::splitEhFrames(ArrayRef<uint8_t> data, Section &ehFrameSection) {
-  EhReader reader(this, data, /*dataOff=*/0, target->wordSize);
+  EhReader reader(this, data, /*dataOff=*/0);
  size_t off = 0;
  while (off < reader.size()) {
    uint64_t frameOff = off;
@ -1293,10 +1293,25 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
 struct CIE {
  macho::Symbol *personalitySymbol = nullptr;
  bool fdesHaveLsda = false;
  bool fdesHaveAug = false;
  uint8_t lsdaPtrSize = 0; // 0 => no LSDA
  uint8_t funcPtrSize = 0;
 };
 static uint8_t pointerEncodingToSize(uint8_t enc) {
  switch (enc & 0xf) {
  case dwarf::DW_EH_PE_absptr:
    return target->wordSize;
  case dwarf::DW_EH_PE_sdata4:
    return 4;
  case dwarf::DW_EH_PE_sdata8:
    // ld64 doesn't actually support sdata8, but this seems simple enough...
    return 8;
  default:
    return 0;
  };
 }
 static CIE parseCIE(const InputSection *isec, const EhReader &reader,
                    size_t off) {
  // Handling the full generality of possible DWARF encodings would be a major
@ -1304,8 +1319,6 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
  // DWARF and handle just that.
  constexpr uint8_t expectedPersonalityEnc =
      dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_sdata4;
  constexpr uint8_t expectedPointerEnc =
      dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_absptr;
  CIE cie;
  uint8_t version = reader.readByte(&off);
@ -1332,16 +1345,17 @@ static CIE parseCIE(const InputSection *isec, const EhReader &reader,
      break;
    }
    case 'L': {
      cie.fdesHaveLsda = true;
      uint8_t lsdaEnc = reader.readByte(&off);
-      if (lsdaEnc != expectedPointerEnc)
+      cie.lsdaPtrSize = pointerEncodingToSize(lsdaEnc);
      if (cie.lsdaPtrSize == 0)
        reader.failOn(off, "unexpected LSDA encoding 0x" +
                               Twine::utohexstr(lsdaEnc));
      break;
    }
    case 'R': {
      uint8_t pointerEnc = reader.readByte(&off);
-      if (pointerEnc != expectedPointerEnc)
+      cie.funcPtrSize = pointerEncodingToSize(pointerEnc);
      if (cie.funcPtrSize == 0 || !(pointerEnc & dwarf::DW_EH_PE_pcrel))
        reader.failOn(off, "unexpected pointer encoding 0x" +
                               Twine::utohexstr(pointerEnc));
      break;
@ -1471,7 +1485,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
    else if (isec->symbols[0]->value != 0)
      fatal("found symbol at unexpected offset in __eh_frame");
-    EhReader reader(this, isec->data, subsec.offset, target->wordSize);
+    EhReader reader(this, isec->data, subsec.offset);
    size_t dataOff = 0; // Offset from the start of the EH frame.
    reader.skipValidLength(&dataOff); // readLength() already validated this.
    // cieOffOff is the offset from the start of the EH frame to the cieOff
@ -1510,20 +1524,20 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
      continue;
    }
    // Offset of the function address within the EH frame.
    const size_t funcAddrOff = dataOff;
    uint64_t funcAddr = reader.readPointer(&dataOff) + ehFrameSection.addr +
                        isecOff + funcAddrOff;
    uint32_t funcLength = reader.readPointer(&dataOff);
    size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
    assert(cieMap.count(cieIsec));
    const CIE &cie = cieMap[cieIsec];
    // Offset of the function address within the EH frame.
    const size_t funcAddrOff = dataOff;
    uint64_t funcAddr = reader.readPointer(&dataOff, cie.funcPtrSize) +
                        ehFrameSection.addr + isecOff + funcAddrOff;
    uint32_t funcLength = reader.readPointer(&dataOff, cie.funcPtrSize);
    size_t lsdaAddrOff = 0; // Offset of the LSDA address within the EH frame.
    Optional<uint64_t> lsdaAddrOpt;
    if (cie.fdesHaveAug) {
      reader.skipLeb128(&dataOff);
      lsdaAddrOff = dataOff;
-      if (cie.fdesHaveLsda) {
+      if (cie.lsdaPtrSize != 0) {
-        uint64_t lsdaOff = reader.readPointer(&dataOff);
+        uint64_t lsdaOff = reader.readPointer(&dataOff, cie.lsdaPtrSize);
        if (lsdaOff != 0) // FIXME possible to test this?
          lsdaAddrOpt = ehFrameSection.addr + isecOff + lsdaAddrOff + lsdaOff;
      }
--- a/lld/test/MachO/eh-frame-sdata4.s
+++ b/lld/test/MachO/eh-frame-sdata4.s
@ -0,0 +1,80 @@
 # REQUIRES: x86
 # RUN: rm -rf %t; split-file %s %t
 ## Test that we correctly handle the sdata4 DWARF pointer encoding. llvm-mc's
 ## CFI directives always generate EH frames using the absptr (i.e. system
 ## pointer size) encoding, but it is possible to hand-roll your own EH frames
 ## that use the sdata4 encoding. For instance, libffi does this.
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos10.15 %t/sdata4.s -o %t/sdata4.o
 # RUN: %lld -lSystem %t/sdata4.o -o %t/sdata4
 # RUN: llvm-objdump --macho --syms --dwarf=frames %t/sdata4 | FileCheck %s
 # CHECK: SYMBOL TABLE:
 # CHECK: [[#%.16x,MAIN:]] g     F __TEXT,__text _main
 # CHECK: .eh_frame contents:
 # CHECK: 00000000 00000010 00000000 CIE
 # CHECK:   Format:                DWARF32
 # CHECK:   Version:               1
 # CHECK:   Augmentation:          "zR"
 # CHECK:   Code alignment factor: 1
 # CHECK:   Data alignment factor: 1
 # CHECK:   Return address column: 1
 # CHECK:   Augmentation data:     1B
 # CHECK:   DW_CFA_def_cfa: reg7 +8
 # CHECK:   CFA=reg7+8
 # CHECK: 00000014 00000010 00000018 FDE cie=00000000 pc=[[#%x,MAIN]]...[[#%x,MAIN+1]]
 # CHECK:   Format:       DWARF32
 # CHECK:   DW_CFA_GNU_args_size: +16
 # CHECK:   DW_CFA_nop:
 # CHECK:   0x[[#%x,MAIN]]: CFA=reg7+8
 #--- sdata4.s
 .globl  _main
 _main:
  retq
 LmainEnd:
 .balign 4
 .section __TEXT,__eh_frame
 # Although we don't reference this EhFrame symbol directly, we must have at
 # least one non-local symbol in this section, otherwise llvm-mc generates bogus
 # subtractor relocations.
 EhFrame:
 LCieHdr:
  .long LCieEnd - LCieStart
 LCieStart:
  .long 0           # CIE ID
  .byte 1           # CIE version
  .ascii "zR\0"
  .byte 1           # Code alignment
  .byte 1           # Data alignment
  .byte 1           # RA column
  .byte 1           # Augmentation size
  .byte 0x1b        # FDE pointer encoding (pcrel | sdata4)
  .byte 0xc, 7, 8   # DW_CFA_def_cfa reg7 +8
  .balign 4
 LCieEnd:
 LFdeHdr:
  .long LFdeEnd - LFdeStart
 LFdeStart:
  .long LFdeStart - LCieHdr
  # The next two fields are longs instead of quads because of the sdata4
  # encoding.
  .long _main - .        # Function address
  .long LmainEnd - _main # Function length
  .byte 0
  ## Insert DW_CFA_GNU_args_size to prevent ld64 from creating a compact unwind
  ## entry to replace this FDE. Makes it easier for us to cross-check behavior
  ## across the two linkers (LLD never bothers trying to synthesize compact
  ## unwind if it is not already present).
  .byte 0x2e, 0x10       # DW_CFA_GNU_args_size
  .balign 4
 LFdeEnd:
  .long 0 # terminator
 .subsections_via_symbols