
This refactor was motivated by two bugs identified in out-of-tree builds: 1. Some implementations of the VisitMembersFunction type (often used to implement special loading semantics, e.g. -all_load or -ObjC) were assuming that buffers for archive members were null-terminated, which they are not in general. This was triggering occasional assertions. 2. Archives may include multiple members with the same file name, e.g. when constructed by appending files with the same name: % llvm-ar crs libfoo.a foo.o % llvm-ar q libfoo.a foo.o % llvm-ar t libfoo.a foo.o foo.o While confusing, these members may be safe to link (provided that they're individually valid and don't define duplicate symbols). In ORC however, the archive member name may be used to construct an ORC initializer symbol, which must also be unique. In that case the duplicate member names lead to a duplicate definition error even if the members define unrelated symbols. In addition to these bugs, StaticLibraryDefinitionGenerator had grown a collection of all member buffers (ObjectFilesMap), a BumpPtrAllocator that was redundantly storing synthesized archive member names (these are copied into the MemoryBuffers created for each Object, but were never freed in the allocator), and a set of COFF-specific import files. To fix the bugs above and simplify StaticLibraryDefinitionGenerator this patch makes the following changes: 1. StaticLibraryDefinitionGenerator::VisitMembersFunction is generalized to take a reference to the containing archive, and the index of the member within the archive. It now returns an Expected<bool> indicating whether the member visited should be treated as loadable, not loadable, or as invalidating the entire archive. 2. A static StaticLibraryDefinitionGenerator::createMemberBuffer method is added which creates MemoryBuffers with unique names of the form `<archive-name>[<index>](<member-name>)`. This defers construction of member names until they're loaded, allowing the BumpPtrAllocator (with its redundant name storage) to be removed. 3. The ObjectFilesMap (symbol name -> memory-buffer-ref) is replaced with a SymbolToMemberIndexMap (symbol name -> index) which should be smaller and faster to construct. 4. The 'loadability' result from VisitMemberFunctions is now taken into consideration when building the SymbolToMemberIndexMap so that members that have already been loaded / filtered out can be skipped, and do not take up any ongoing space. 5. The COFF ImportedDynamicLibraries member is moved out into the COFFImportFileScanner utility, which can be used as a VisitMemberFunction. This fixes the bugs described above; and should lower memory consumption slightly, especially for archives with many files and / or symbol where most files are eventually loaded.
285 lines
9.9 KiB
C++
285 lines
9.9 KiB
C++
//===----------------- MachO.cpp - MachO format utilities -----------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/ExecutionEngine/Orc/MachO.h"
|
|
|
|
#include "llvm/ADT/ScopeExit.h"
|
|
#include "llvm/BinaryFormat/MachO.h"
|
|
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
|
|
#include "llvm/ExecutionEngine/Orc/Layer.h"
|
|
#include "llvm/Object/MachOUniversal.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
|
|
#define DEBUG_TYPE "orc"
|
|
|
|
namespace llvm {
|
|
namespace orc {
|
|
|
|
static std::string objDesc(const MemoryBufferRef &Obj, const Triple &TT,
|
|
bool ObjIsSlice) {
|
|
std::string Desc;
|
|
if (ObjIsSlice)
|
|
Desc += (TT.getArchName() + " slice of universal binary").str();
|
|
Desc += Obj.getBufferIdentifier();
|
|
return Desc;
|
|
}
|
|
|
|
template <typename HeaderType>
|
|
static Error checkMachORelocatableObject(MemoryBufferRef Obj,
|
|
bool SwapEndianness, const Triple &TT,
|
|
bool ObjIsSlice) {
|
|
StringRef Data = Obj.getBuffer();
|
|
|
|
HeaderType Hdr;
|
|
memcpy(&Hdr, Data.data(), sizeof(HeaderType));
|
|
|
|
if (SwapEndianness)
|
|
swapStruct(Hdr);
|
|
|
|
if (Hdr.filetype != MachO::MH_OBJECT)
|
|
return make_error<StringError>(objDesc(Obj, TT, ObjIsSlice) +
|
|
" is not a MachO relocatable object",
|
|
inconvertibleErrorCode());
|
|
|
|
auto ObjArch = object::MachOObjectFile::getArch(Hdr.cputype, Hdr.cpusubtype);
|
|
if (ObjArch != TT.getArch())
|
|
return make_error<StringError>(
|
|
objDesc(Obj, TT, ObjIsSlice) + Triple::getArchTypeName(ObjArch) +
|
|
", cannot be loaded into " + TT.str() + " process",
|
|
inconvertibleErrorCode());
|
|
|
|
return Error::success();
|
|
}
|
|
|
|
Error checkMachORelocatableObject(MemoryBufferRef Obj, const Triple &TT,
|
|
bool ObjIsSlice) {
|
|
StringRef Data = Obj.getBuffer();
|
|
|
|
if (Data.size() < 4)
|
|
return make_error<StringError>(
|
|
objDesc(Obj, TT, ObjIsSlice) +
|
|
" is not a valid MachO relocatable object file (truncated header)",
|
|
inconvertibleErrorCode());
|
|
|
|
uint32_t Magic;
|
|
memcpy(&Magic, Data.data(), sizeof(uint32_t));
|
|
|
|
switch (Magic) {
|
|
case MachO::MH_MAGIC:
|
|
case MachO::MH_CIGAM:
|
|
return checkMachORelocatableObject<MachO::mach_header>(
|
|
std::move(Obj), Magic == MachO::MH_CIGAM, TT, ObjIsSlice);
|
|
case MachO::MH_MAGIC_64:
|
|
case MachO::MH_CIGAM_64:
|
|
return checkMachORelocatableObject<MachO::mach_header_64>(
|
|
std::move(Obj), Magic == MachO::MH_CIGAM_64, TT, ObjIsSlice);
|
|
default:
|
|
return make_error<StringError>(
|
|
objDesc(Obj, TT, ObjIsSlice) +
|
|
" is not a valid MachO relocatable object (bad magic value)",
|
|
inconvertibleErrorCode());
|
|
}
|
|
}
|
|
|
|
Expected<std::unique_ptr<MemoryBuffer>>
|
|
checkMachORelocatableObject(std::unique_ptr<MemoryBuffer> Obj, const Triple &TT,
|
|
bool ObjIsSlice) {
|
|
if (auto Err =
|
|
checkMachORelocatableObject(Obj->getMemBufferRef(), TT, ObjIsSlice))
|
|
return std::move(Err);
|
|
return std::move(Obj);
|
|
}
|
|
|
|
Expected<std::pair<std::unique_ptr<MemoryBuffer>, LinkableFileKind>>
|
|
loadMachORelocatableObject(StringRef Path, const Triple &TT, LoadArchives LA,
|
|
std::optional<StringRef> IdentifierOverride) {
|
|
assert((TT.getObjectFormat() == Triple::UnknownObjectFormat ||
|
|
TT.getObjectFormat() == Triple::MachO) &&
|
|
"TT must specify MachO or Unknown object format");
|
|
|
|
if (!IdentifierOverride)
|
|
IdentifierOverride = Path;
|
|
|
|
Expected<sys::fs::file_t> FDOrErr =
|
|
sys::fs::openNativeFileForRead(Path, sys::fs::OF_None);
|
|
if (!FDOrErr)
|
|
return createFileError(Path, FDOrErr.takeError());
|
|
sys::fs::file_t FD = *FDOrErr;
|
|
auto CloseFile = make_scope_exit([&]() { sys::fs::closeFile(FD); });
|
|
|
|
auto Buf =
|
|
MemoryBuffer::getOpenFile(FD, *IdentifierOverride, /*FileSize=*/-1);
|
|
if (!Buf)
|
|
return make_error<StringError>(
|
|
StringRef("Could not load MachO object at path ") + Path,
|
|
Buf.getError());
|
|
|
|
switch (identify_magic((*Buf)->getBuffer())) {
|
|
case file_magic::macho_object: {
|
|
auto CheckedObj = checkMachORelocatableObject(std::move(*Buf), TT, false);
|
|
if (!CheckedObj)
|
|
return CheckedObj.takeError();
|
|
return std::make_pair(std::move(*CheckedObj),
|
|
LinkableFileKind::RelocatableObject);
|
|
}
|
|
case file_magic::macho_universal_binary:
|
|
return loadLinkableSliceFromMachOUniversalBinary(FD, std::move(*Buf), TT,
|
|
LoadArchives::Never, Path,
|
|
*IdentifierOverride);
|
|
default:
|
|
return make_error<StringError>(
|
|
Path + " does not contain a relocatable object file compatible with " +
|
|
TT.str(),
|
|
inconvertibleErrorCode());
|
|
}
|
|
}
|
|
|
|
Expected<std::pair<std::unique_ptr<MemoryBuffer>, LinkableFileKind>>
|
|
loadLinkableSliceFromMachOUniversalBinary(sys::fs::file_t FD,
|
|
std::unique_ptr<MemoryBuffer> UBBuf,
|
|
const Triple &TT, LoadArchives LA,
|
|
StringRef UBPath,
|
|
StringRef Identifier) {
|
|
|
|
auto UniversalBin =
|
|
object::MachOUniversalBinary::create(UBBuf->getMemBufferRef());
|
|
if (!UniversalBin)
|
|
return UniversalBin.takeError();
|
|
|
|
auto SliceRange = getMachOSliceRangeForTriple(**UniversalBin, TT);
|
|
if (!SliceRange)
|
|
return SliceRange.takeError();
|
|
|
|
auto Buf = MemoryBuffer::getOpenFileSlice(FD, Identifier, SliceRange->second,
|
|
SliceRange->first);
|
|
if (!Buf)
|
|
return make_error<StringError>(
|
|
"Could not load " + TT.getArchName() +
|
|
" slice of MachO universal binary at path " + UBPath,
|
|
Buf.getError());
|
|
|
|
switch (identify_magic((*Buf)->getBuffer())) {
|
|
case file_magic::archive:
|
|
if (LA != LoadArchives::Never)
|
|
return std::make_pair(std::move(*Buf), LinkableFileKind::Archive);
|
|
break;
|
|
case file_magic::macho_object: {
|
|
if (LA != LoadArchives::Required) {
|
|
auto CheckedObj = checkMachORelocatableObject(std::move(*Buf), TT, true);
|
|
if (!CheckedObj)
|
|
return CheckedObj.takeError();
|
|
return std::make_pair(std::move(*CheckedObj),
|
|
LinkableFileKind::RelocatableObject);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
auto FT = [&] {
|
|
switch (LA) {
|
|
case LoadArchives::Never:
|
|
return "a mach-o relocatable object file";
|
|
case LoadArchives::Allowed:
|
|
return "a mach-o relocatable object file or archive";
|
|
case LoadArchives::Required:
|
|
return "an archive";
|
|
}
|
|
llvm_unreachable("Unknown LoadArchives enum");
|
|
};
|
|
|
|
return make_error<StringError>(TT.getArchName() + " slice of " + UBPath +
|
|
" does not contain " + FT(),
|
|
inconvertibleErrorCode());
|
|
}
|
|
|
|
Expected<std::pair<size_t, size_t>>
|
|
getMachOSliceRangeForTriple(object::MachOUniversalBinary &UB,
|
|
const Triple &TT) {
|
|
|
|
for (const auto &Obj : UB.objects()) {
|
|
auto ObjTT = Obj.getTriple();
|
|
if (ObjTT.getArch() == TT.getArch() &&
|
|
ObjTT.getSubArch() == TT.getSubArch() &&
|
|
(TT.getVendor() == Triple::UnknownVendor ||
|
|
ObjTT.getVendor() == TT.getVendor())) {
|
|
// We found a match. Return the range for the slice.
|
|
return std::make_pair(Obj.getOffset(), Obj.getSize());
|
|
}
|
|
}
|
|
|
|
return make_error<StringError>(Twine("Universal binary ") + UB.getFileName() +
|
|
" does not contain a slice for " +
|
|
TT.str(),
|
|
inconvertibleErrorCode());
|
|
}
|
|
|
|
Expected<std::pair<size_t, size_t>>
|
|
getMachOSliceRangeForTriple(MemoryBufferRef UBBuf, const Triple &TT) {
|
|
|
|
auto UB = object::MachOUniversalBinary::create(UBBuf);
|
|
if (!UB)
|
|
return UB.takeError();
|
|
|
|
return getMachOSliceRangeForTriple(**UB, TT);
|
|
}
|
|
|
|
Expected<bool> ForceLoadMachOArchiveMembers::operator()(
|
|
object::Archive &A, MemoryBufferRef MemberBuf, size_t Index) {
|
|
|
|
auto LoadMember = [&]() {
|
|
return StaticLibraryDefinitionGenerator::createMemberBuffer(A, MemberBuf,
|
|
Index);
|
|
};
|
|
|
|
if (!ObjCOnly) {
|
|
// If we're loading all files then just load the buffer immediately. Return
|
|
// false to indicate that there's no further loading to do here.
|
|
if (auto Err = L.add(JD, LoadMember()))
|
|
return Err;
|
|
return false;
|
|
}
|
|
|
|
// We need to check whether this archive member contains any Objective-C
|
|
// or Swift metadata.
|
|
auto Obj = object::ObjectFile::createObjectFile(MemberBuf);
|
|
if (!Obj) {
|
|
// Invalid files are not loadable, but don't invalidate the archive.
|
|
consumeError(Obj.takeError());
|
|
return false;
|
|
}
|
|
|
|
if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(&**Obj)) {
|
|
// Load the object if any recognized special section is present.
|
|
for (auto Sec : MachOObj->sections()) {
|
|
auto SegName =
|
|
MachOObj->getSectionFinalSegmentName(Sec.getRawDataRefImpl());
|
|
if (auto SecName = Sec.getName()) {
|
|
if (*SecName == "__objc_classlist" || *SecName == "__objc_protolist" ||
|
|
*SecName == "__objc_clsrolist" || *SecName == "__objc_catlist" ||
|
|
*SecName == "__objc_catlist2" || *SecName == "__objc_nlcatlist" ||
|
|
(SegName == "__TEXT" && (*SecName).starts_with("__swift") &&
|
|
*SecName != "__swift_modhash")) {
|
|
if (auto Err = L.add(JD, LoadMember()))
|
|
return Err;
|
|
return false;
|
|
}
|
|
} else
|
|
return SecName.takeError();
|
|
}
|
|
}
|
|
|
|
// This is an object file but we didn't load it, so return true to indicate
|
|
// that it's still loadable.
|
|
return true;
|
|
}
|
|
|
|
} // End namespace orc.
|
|
} // End namespace llvm.
|