
The previous map file code left out was modeled after LLD-ELF's implementation. However, ld64's map file differs quite a bit from LLD-ELF's. I've revamped our map file implementation so it is better able to emit ld64-style map files. Notable differences: * ld64 doesn't demangle symbols in map files, regardless of whether `-demangle` is passed. So we don't have to bother with `getSymbolStrings()`. * ld64 doesn't emit symbols in cstring sections; it emits just the literal values. Moreover, it emits these literal values regardless of whether they are labeled with a symbol. * ld64 emits map file entries for things that are not strictly symbols, such as unwind info, GOT entries, etc. That isn't handled in this diff, but this redesign makes them easy to implement. Additionally, the previous implementation sorted the symbols so as to emit them in address order. This was slow and unnecessary -- the symbols can already be traversed in address order by walking the list of OutputSections. This brings significant speedups. Here's the numbers from the chromium_framework_less_dwarf benchmark on my Mac Pro, with the `-map` argument added to the response file: base diff difference (95% CI) sys_time 2.922 ± 0.059 2.950 ± 0.085 [ -0.7% .. +2.5%] user_time 11.464 ± 0.191 8.290 ± 0.123 [ -28.7% .. -26.7%] wall_time 11.235 ± 0.175 9.184 ± 0.169 [ -19.3% .. -17.2%] samples 16 23 (It's worth noting that map files are written in parallel with the output binary, but they often took longer to write than the binary itself.) Finally, I did further cleanups to the map-file.s test -- there was no real need to have a custom-named section. There were also alt_entry symbol declarations that had no corresponding definition. Either way, neither custom-named sections nor alt_entry symbols trigger special code paths in our map file implementation. Reviewed By: #lld-macho, Roger Differential Revision: https://reviews.llvm.org/D137368
206 lines
7.2 KiB
C++
206 lines
7.2 KiB
C++
//===- MapFile.cpp --------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the -map option, which maps address ranges to their
|
|
// respective contents, plus the input file these contents were originally from.
|
|
// The contents (typically symbols) are listed in address order. Dead-stripped
|
|
// contents are included as well.
|
|
//
|
|
// # Path: test
|
|
// # Arch: x86_84
|
|
// # Object files:
|
|
// [ 0] linker synthesized
|
|
// [ 1] a.o
|
|
// # Sections:
|
|
// # Address Size Segment Section
|
|
// 0x1000005C0 0x0000004C __TEXT __text
|
|
// # Symbols:
|
|
// # Address Size File Name
|
|
// 0x1000005C0 0x00000001 [ 1] _main
|
|
// # Dead Stripped Symbols:
|
|
// # Size File Name
|
|
// <<dead>> 0x00000001 [ 1] _foo
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "MapFile.h"
|
|
#include "ConcatOutputSection.h"
|
|
#include "Config.h"
|
|
#include "InputFiles.h"
|
|
#include "InputSection.h"
|
|
#include "OutputSegment.h"
|
|
#include "Symbols.h"
|
|
#include "SyntheticSections.h"
|
|
#include "Target.h"
|
|
#include "lld/Common/ErrorHandler.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/Support/Parallel.h"
|
|
#include "llvm/Support/TimeProfiler.h"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::sys;
|
|
using namespace lld;
|
|
using namespace lld::macho;
|
|
|
|
struct CStringInfo {
|
|
uint32_t fileIndex;
|
|
StringRef str;
|
|
};
|
|
|
|
struct MapInfo {
|
|
SmallVector<InputFile *> files;
|
|
SmallVector<Defined *> deadSymbols;
|
|
DenseMap<const OutputSection *,
|
|
SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>>
|
|
liveCStringsForSection;
|
|
SmallVector<CStringInfo> deadCStrings;
|
|
};
|
|
|
|
static MapInfo gatherMapInfo() {
|
|
MapInfo info;
|
|
for (InputFile *file : inputFiles)
|
|
if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) {
|
|
uint32_t fileIndex = info.files.size() + 1;
|
|
bool isReferencedFile = false;
|
|
|
|
// Gather the dead symbols. We don't have to bother with the live ones
|
|
// because we will pick them up as we iterate over the OutputSections
|
|
// later.
|
|
for (Symbol *sym : file->symbols) {
|
|
if (auto *d = dyn_cast_or_null<Defined>(sym))
|
|
// Only emit the prevailing definition of a symbol. Also, don't emit
|
|
// the symbol if it is part of a cstring section (we use the literal
|
|
// value instead, similar to ld64)
|
|
if (d->isec && d->getFile() == file &&
|
|
!isa<CStringInputSection>(d->isec)) {
|
|
isReferencedFile = true;
|
|
if (!d->isLive())
|
|
info.deadSymbols.push_back(d);
|
|
}
|
|
}
|
|
|
|
// Gather all the cstrings (both live and dead). A CString(Output)Section
|
|
// doesn't provide us a way of figuring out which InputSections its
|
|
// cstring contents came from, so we need to build up that mapping here.
|
|
for (const Section *sec : file->sections) {
|
|
for (const Subsection &subsec : sec->subsections) {
|
|
if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) {
|
|
auto &liveCStrings = info.liveCStringsForSection[isec->parent];
|
|
for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) {
|
|
if (piece.live)
|
|
liveCStrings.push_back({isec->parent->addr + piece.outSecOff,
|
|
{fileIndex, isec->getStringRef(i)}});
|
|
else
|
|
info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)});
|
|
isReferencedFile = true;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (isReferencedFile)
|
|
info.files.push_back(file);
|
|
}
|
|
|
|
// cstrings are not stored in sorted order in their OutputSections, so we sort
|
|
// them here.
|
|
for (auto &liveCStrings : info.liveCStringsForSection)
|
|
parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) {
|
|
return p1.first < p2.first;
|
|
});
|
|
return info;
|
|
}
|
|
|
|
void macho::writeMapFile() {
|
|
if (config->mapFile.empty())
|
|
return;
|
|
|
|
TimeTraceScope timeScope("Write map file");
|
|
|
|
// Open a map file for writing.
|
|
std::error_code ec;
|
|
raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);
|
|
if (ec) {
|
|
error("cannot open " + config->mapFile + ": " + ec.message());
|
|
return;
|
|
}
|
|
|
|
os << format("# Path: %s\n", config->outputFile.str().c_str());
|
|
os << format("# Arch: %s\n",
|
|
getArchitectureName(config->arch()).str().c_str());
|
|
|
|
MapInfo info = gatherMapInfo();
|
|
|
|
os << "# Object files:\n";
|
|
os << format("[%3u] %s\n", 0, (const char *)"linker synthesized");
|
|
uint32_t fileIndex = 1;
|
|
DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal;
|
|
for (InputFile *file : info.files) {
|
|
os << format("[%3u] %s\n", fileIndex, file->getName().str().c_str());
|
|
readerToFileOrdinal[file] = fileIndex++;
|
|
}
|
|
|
|
os << "# Sections:\n";
|
|
os << "# Address\tSize \tSegment\tSection\n";
|
|
for (OutputSegment *seg : outputSegments)
|
|
for (OutputSection *osec : seg->getSections()) {
|
|
if (osec->isHidden())
|
|
continue;
|
|
|
|
os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(),
|
|
seg->name.str().c_str(), osec->name.str().c_str());
|
|
}
|
|
|
|
os << "# Symbols:\n";
|
|
os << "# Address\tSize \tFile Name\n";
|
|
for (const OutputSegment *seg : outputSegments) {
|
|
for (const OutputSection *osec : seg->getSections()) {
|
|
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
|
|
for (const InputSection *isec : concatOsec->inputs) {
|
|
for (Defined *sym : isec->symbols)
|
|
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
|
|
sym->size, readerToFileOrdinal[sym->getFile()],
|
|
sym->getName().str().data());
|
|
}
|
|
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
|
|
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
|
|
uint64_t lastAddr = 0; // strings will never start at address 0, so this
|
|
// is a sentinel value
|
|
for (const auto &[addr, info] : liveCStrings) {
|
|
uint64_t size = 0;
|
|
if (addr != lastAddr)
|
|
size = info.str.size() + 1; // include null terminator
|
|
lastAddr = addr;
|
|
os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size,
|
|
info.fileIndex);
|
|
os.write_escaped(info.str) << "\n";
|
|
}
|
|
}
|
|
// TODO print other synthetic sections
|
|
}
|
|
}
|
|
|
|
if (config->deadStrip) {
|
|
os << "# Dead Stripped Symbols:\n";
|
|
os << "# \tSize \tFile Name\n";
|
|
for (Defined *sym : info.deadSymbols) {
|
|
assert(!sym->isLive());
|
|
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
|
|
readerToFileOrdinal[sym->getFile()],
|
|
sym->getName().str().data());
|
|
}
|
|
for (CStringInfo &cstrInfo : info.deadCStrings) {
|
|
os << format("<<dead>>\t0x%08llX\t[%3u] literal string: ",
|
|
cstrInfo.str.size() + 1, cstrInfo.fileIndex);
|
|
os.write_escaped(cstrInfo.str) << "\n";
|
|
}
|
|
}
|
|
}
|