[ELF] Add internal InputFile (#78944)

Based on https://reviews.llvm.org/D45375 . Introduce a new InputFile
kind `InternalKind`, use it for

* `ctx.internalFile`: for linker-defined symbols and some synthesized
`Undefined`
* `createInternalFile`: for symbol assignments and --defsym

I picked "internal" instead of "synthetic" to avoid confusion with
SyntheticSection.

Currently a symbol's file is one of: nullptr, ObjKind, SharedKind,
BitcodeKind, BinaryKind. Now it's non-null (I plan to add an
`assert(file)` to Symbol::Symbol and change `toString(const InputFile
*)`
separately).

Debugging and error reporting gets improved. The immediate user-facing
difference is more descriptive "File" column in the --cref output. This
patch may unlock further simplification.

Currently each symbol assignment gets its own
`createInternalFile(cmd->location)`. Two symbol assignments in a linker
script do not share the same file. Making the file the same would be
nice, but would require non trivial code.
This commit is contained in:
Fangrui Song 2024-01-22 09:09:46 -08:00 committed by GitHub
parent a859df3b0a
commit 43b13341fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 75 additions and 45 deletions

View File

@ -1381,9 +1381,9 @@ template <typename ELFT> void elf::writeARMCmseImportLib() {
// Copy the secure gateway entry symbols to the import library symbol table.
for (auto &p : symtab.cmseSymMap) {
Defined *d = cast<Defined>(p.second.sym);
impSymTab->addSymbol(makeDefined(nullptr, d->getName(), d->computeBinding(),
/*stOther=*/0, STT_FUNC, d->getVA(),
d->getSize(), nullptr));
impSymTab->addSymbol(makeDefined(
ctx.internalFile, d->getName(), d->computeBinding(),
/*stOther=*/0, STT_FUNC, d->getVA(), d->getSize(), nullptr));
}
size_t idx = 0;

View File

@ -771,12 +771,11 @@ template <class ELFT> bool elf::isMipsPIC(const Defined *sym) {
if (!sym->section)
return false;
ObjFile<ELFT> *file =
cast<InputSectionBase>(sym->section)->template getFile<ELFT>();
if (!file)
InputFile *file = cast<InputSectionBase>(sym->section)->file;
if (!file || file->isInternal())
return false;
return file->getObj().getHeader().e_flags & EF_MIPS_PIC;
return cast<ObjFile<ELFT>>(file)->getObj().getHeader().e_flags & EF_MIPS_PIC;
}
template <class ELFT> TargetInfo *elf::getMipsTargetInfo() {

View File

@ -253,7 +253,7 @@ static bool addOptional(StringRef name, uint64_t value,
Symbol *sym = symtab.find(name);
if (!sym || sym->isDefined())
return false;
sym->resolve(Defined{/*file=*/nullptr, StringRef(), STB_GLOBAL, STV_HIDDEN,
sym->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL, STV_HIDDEN,
STT_FUNC, value,
/*size=*/0, /*section=*/nullptr});
defined.push_back(cast<Defined>(sym));

View File

@ -473,6 +473,8 @@ struct Ctx {
std::pair<const InputFile *, const InputFile *>>
backwardReferences;
llvm::SmallSet<llvm::StringRef, 0> auxiliaryFiles;
// InputFile for linker created symbols with no source location.
InputFile *internalFile;
// True if SHT_LLVM_SYMPART is used.
std::atomic<bool> hasSympart{false};
// True if there are TLS IE relocations. Set DF_STATIC_TLS if -shared.

View File

@ -105,6 +105,7 @@ void Ctx::reset() {
whyExtractRecords.clear();
backwardReferences.clear();
auxiliaryFiles.clear();
internalFile = nullptr;
hasSympart.store(false, std::memory_order_relaxed);
hasTlsIe.store(false, std::memory_order_relaxed);
needsTlsLd.store(false, std::memory_order_relaxed);
@ -2337,7 +2338,8 @@ static void readSymbolPartitionSection(InputSectionBase *s) {
static Symbol *addUnusedUndefined(StringRef name,
uint8_t binding = STB_GLOBAL) {
return symtab.addSymbol(Undefined{nullptr, name, binding, STV_DEFAULT, 0});
return symtab.addSymbol(
Undefined{ctx.internalFile, name, binding, STV_DEFAULT, 0});
}
static void markBuffersAsDontNeed(bool skipLinkedOutput) {
@ -2696,6 +2698,8 @@ void LinkerDriver::link(opt::InputArgList &args) {
for (auto *arg : args.filtered(OPT_trace_symbol))
symtab.insert(arg->getValue())->traced = true;
ctx.internalFile = createInternalFile("<internal>");
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
// -u foo a.a b.so will extract a.a.
for (StringRef name : config->undefined)

View File

@ -1785,6 +1785,10 @@ void BinaryFile::parse() {
nullptr});
}
InputFile *elf::createInternalFile(StringRef name) {
return make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name));
}
ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
bool lazy) {
ELFFileBase *f;

View File

@ -63,14 +63,17 @@ public:
SharedKind,
BitcodeKind,
BinaryKind,
InternalKind,
};
InputFile(Kind k, MemoryBufferRef m);
Kind kind() const { return fileKind; }
bool isElf() const {
Kind k = kind();
return k == ObjKind || k == SharedKind;
}
bool isInternal() const { return kind() == InternalKind; }
StringRef getName() const { return mb.getBufferIdentifier(); }
MemoryBufferRef mb;
@ -151,9 +154,6 @@ public:
// R_PPC64_TLSLD. Disable TLS relaxation to avoid bad code generation.
bool ppc64DisableTLSRelax = false;
protected:
InputFile(Kind k, MemoryBufferRef m);
public:
// If not empty, this stores the name of the archive containing this file.
// We use this string for creating error messages.
@ -380,6 +380,7 @@ public:
void parse();
};
InputFile *createInternalFile(StringRef name);
ELFFileBase *createObjFile(MemoryBufferRef mb, StringRef archiveName = "",
bool lazy = false);

View File

@ -245,6 +245,8 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
// Find a symbol that encloses a given location.
Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset,
uint8_t type) const {
if (file->isInternal())
return nullptr;
for (Symbol *b : file->getSymbols())
if (Defined *d = dyn_cast<Defined>(b))
if (d->section == this && d->value <= offset &&
@ -344,7 +346,7 @@ template <class ELFT> void InputSection::copyShtGroup(uint8_t *buf) {
}
InputSectionBase *InputSection::getRelocatedSection() const {
if (!file || (type != SHT_RELA && type != SHT_REL))
if (!file || file->isInternal() || (type != SHT_RELA && type != SHT_REL))
return nullptr;
ArrayRef<InputSectionBase *> sections = file->getSections();
return sections[info];

View File

@ -9,6 +9,7 @@
#ifndef LLD_ELF_INPUT_SECTION_H
#define LLD_ELF_INPUT_SECTION_H
#include "Config.h"
#include "Relocations.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/LLVM.h"
@ -413,7 +414,7 @@ class SyntheticSection : public InputSection {
public:
SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign,
StringRef name)
: InputSection(nullptr, flags, type, addralign, {}, name,
: InputSection(ctx.internalFile, flags, type, addralign, {}, name,
InputSectionBase::Synthetic) {}
virtual ~SyntheticSection() = default;

View File

@ -256,10 +256,12 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// Symbol section is always null for bitcode symbols, hence the check
// for isElf(). Skip linker script defined symbols as well: they have
// no File defined.
!(dr->section == nullptr && (!sym->file || sym->file->isElf()));
!(dr->section == nullptr &&
(sym->file->isInternal() || sym->file->isElf()));
if (r.Prevailing)
Undefined(nullptr, StringRef(), STB_GLOBAL, STV_DEFAULT, sym->type)
Undefined(ctx.internalFile, StringRef(), STB_GLOBAL, STV_DEFAULT,
sym->type)
.overwrite(*sym);
// We tell LTO to not apply interprocedural optimization for wrapped

View File

@ -233,8 +233,8 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) {
// write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
uint64_t symValue = value.sec ? 0 : value.getValue();
Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type,
symValue, 0, sec);
Defined newSym(createInternalFile(cmd->location), cmd->name, STB_GLOBAL,
visibility, value.type, symValue, 0, sec);
Symbol *sym = symtab.insert(cmd->name);
sym->mergeProperties(newSym);
@ -250,8 +250,8 @@ static void declareSymbol(SymbolAssignment *cmd) {
return;
uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT;
Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0,
nullptr);
Defined newSym(ctx.internalFile, cmd->name, STB_GLOBAL, visibility,
STT_NOTYPE, 0, 0, nullptr);
// If the symbol is already defined, its order is 0 (with absence indicating
// 0); otherwise it's assigned the order of the SymbolAssignment.

View File

@ -1757,7 +1757,7 @@ void elf::postScanRelocations() {
GotSection *got = in.got.get();
if (ctx.needsTlsLd.load(std::memory_order_relaxed) && got->addTlsIndex()) {
static Undefined dummy(nullptr, "", STB_LOCAL, 0, 0);
static Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
if (config->shared)
mainPart->relaDyn->addReloc(
{target->tlsModuleIndexRel, got, got->getTlsIndexOff()});

View File

@ -32,6 +32,7 @@ public:
void expect(StringRef expect);
bool consumeLabel(StringRef tok);
std::string getCurrentLocation();
MemoryBufferRef getCurrentMB();
std::vector<MemoryBufferRef> mbs;
std::vector<StringRef> tokens;
@ -41,9 +42,6 @@ public:
size_t lastLineNumber = 0;
size_t lastLineNumberOffset = 0;
protected:
MemoryBufferRef getCurrentMB();
private:
void maybeSplitExpr();
StringRef getLine();

View File

@ -288,7 +288,8 @@ void ScriptParser::readDefsym(StringRef name) {
Expr e = readExpr();
if (!atEOF())
setError("EOF expected, but got " + next());
auto *cmd = make<SymbolAssignment>(name, e, 0, getCurrentLocation());
auto *cmd = make<SymbolAssignment>(
name, e, 0, getCurrentMB().getBufferIdentifier().str());
script->sectionCommands.push_back(cmd);
}

View File

@ -261,8 +261,8 @@ InputSection *elf::createInterpSection() {
StringRef s = saver().save(config->dynamicLinker);
ArrayRef<uint8_t> contents = {(const uint8_t *)s.data(), s.size() + 1};
return make<InputSection>(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, contents,
".interp");
return make<InputSection>(ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 1,
contents, ".interp");
}
Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value,

View File

@ -112,7 +112,7 @@ ErrorPlace elf::getErrorPlace(const uint8_t *loc) {
std::string objLoc = isec->getLocation(loc - isecLoc);
// Return object file location and source file location.
// TODO: Refactor getSrcMsg not to take a variable.
Undefined dummy(nullptr, "", STB_LOCAL, 0, 0);
Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
return {isec, objLoc + ": ",
isec->file ? isec->getSrcMsg(dummy, loc - isecLoc) : ""};
}

View File

@ -148,23 +148,24 @@ static Defined *addOptionalRegular(StringRef name, SectionBase *sec,
if (!s || s->isDefined() || s->isCommon())
return nullptr;
s->resolve(Defined{nullptr, StringRef(), STB_GLOBAL, stOther, STT_NOTYPE, val,
s->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL, stOther,
STT_NOTYPE, val,
/*size=*/0, sec});
s->isUsedInRegularObj = true;
return cast<Defined>(s);
}
static Defined *addAbsolute(StringRef name) {
Symbol *sym = symtab.addSymbol(Defined{nullptr, name, STB_GLOBAL, STV_HIDDEN,
STT_NOTYPE, 0, 0, nullptr});
sym->isUsedInRegularObj = true;
return cast<Defined>(sym);
}
// The linker is expected to define some symbols depending on
// the linking result. This function defines such symbols.
void elf::addReservedSymbols() {
if (config->emachine == EM_MIPS) {
auto addAbsolute = [](StringRef name) {
Symbol *sym =
symtab.addSymbol(Defined{ctx.internalFile, name, STB_GLOBAL,
STV_HIDDEN, STT_NOTYPE, 0, 0, nullptr});
sym->isUsedInRegularObj = true;
return cast<Defined>(sym);
};
// Define _gp for MIPS. st_value of _gp symbol will be updated by Writer
// so that it points to an absolute address which by default is relative
// to GOT. Default offset is 0x7ff0.
@ -213,7 +214,7 @@ void elf::addReservedSymbols() {
if (config->emachine == EM_PPC64)
gotOff = 0x8000;
s->resolve(Defined{/*file=*/nullptr, StringRef(), STB_GLOBAL, STV_HIDDEN,
s->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL, STV_HIDDEN,
STT_NOTYPE, gotOff, /*size=*/0, Out::elfHeader});
ElfSym::globalOffsetTable = cast<Defined>(s);
}
@ -280,7 +281,8 @@ static void demoteSymbolsAndComputeIsPreemptible() {
auto *s = dyn_cast<SharedSymbol>(sym);
if (sym->isLazy() || (s && !cast<SharedFile>(s->file)->isNeeded)) {
uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK);
Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type)
Undefined(ctx.internalFile, sym->getName(), binding, sym->stOther,
sym->type)
.overwrite(*sym);
sym->versionId = VER_NDX_GLOBAL;
}
@ -1922,7 +1924,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// https://sourceware.org/ml/binutils/2002-03/msg00360.html
if (mainPart->dynamic->parent) {
Symbol *s = symtab.addSymbol(Defined{
/*file=*/nullptr, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE,
ctx.internalFile, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE,
/*value=*/0, /*size=*/0, mainPart->dynamic.get()});
s->isUsedInRegularObj = true;
}
@ -1964,7 +1966,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// define _TLS_MODULE_BASE_ relative to the first TLS section.
Symbol *s = symtab.find("_TLS_MODULE_BASE_");
if (s && s->isUndefined()) {
s->resolve(Defined{/*file=*/nullptr, StringRef(), STB_GLOBAL,
s->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL,
STV_HIDDEN, STT_TLS, /*value=*/0, 0,
/*section=*/nullptr});
ElfSym::tlsModuleBase = cast<Defined>(s);
@ -2106,8 +2108,9 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// With the outputSections available check for GDPLT relocations
// and add __tls_get_addr symbol if needed.
if (config->emachine == EM_HEXAGON && hexagonNeedsTLSSymbol(outputSections)) {
Symbol *sym = symtab.addSymbol(Undefined{
nullptr, "__tls_get_addr", STB_GLOBAL, STV_DEFAULT, STT_NOTYPE});
Symbol *sym =
symtab.addSymbol(Undefined{ctx.internalFile, "__tls_get_addr",
STB_GLOBAL, STV_DEFAULT, STT_NOTYPE});
sym->isPreemptible = true;
partitions[0].dynSymTab->addSymbol(sym);
}

View File

@ -3,11 +3,12 @@
// RUN: echo '.global foo; foo:' | llvm-mc -filetype=obj -triple=x86_64-pc-linux - -o %t1.o
// RUN: echo '.global foo, bar; bar:' | llvm-mc -filetype=obj -triple=x86_64-pc-linux - -o %t2.o
// RUN: echo '.global zed; zed:' | llvm-mc -filetype=obj -triple=x86_64-pc-linux - -o %ta.o
// RUN: echo 'abs1 = 42;' > %t.lds
// RUN: rm -f %t.a
// RUN: llvm-ar rcs %t.a %ta.o
// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t3.o
// RUN: ld.lld -shared -o %t1.so %t1.o
// RUN: ld.lld -o /dev/null %t1.so %t2.o %t3.o %t.a --gc-sections --cref | FileCheck -strict-whitespace %s
// RUN: ld.lld -o /dev/null %t1.so %t2.o %t3.o %t.a %t.a %t.lds --defsym abs2=43 --gc-sections --cref | FileCheck -strict-whitespace %s
/// If -Map is specified, print to the map file.
// RUN: ld.lld -o /dev/null %t1.so %t2.o %t3.o %t.a --gc-sections -Map=%t.map --cref
@ -26,6 +27,10 @@
// CHECK-NEXT: baz {{.*}}3.o
// CHECK-NEXT: zed {{.*}}.a({{.*}}a.o)
// CHECK-NEXT: {{.*}}3.o
// CHECK-NEXT: abs1 {{.*}}.lds:1
// CHECK-NEXT: {{.*}}3.o
// CHECK-NEXT: abs2 --defsym{{$}}
// CHECK-NEXT: {{.*}}3.o
// CHECK-NOT: discarded
// CHECK2: VMA LMA Size Align Out In Symbol
@ -46,3 +51,7 @@ baz:
.section .text.a,"ax",@progbits
discarded:
.data
.quad abs1
.quad abs2

View File

@ -7,10 +7,11 @@
# RUN: echo "SECTIONS { bar = 1; }" > %t.script
# RUN: ld.lld --symbol-ordering-file %t.ord %t.o --script %t.script \
# RUN: -o %t.out 2>&1 | FileCheck %s
# CHECK: warning: <internal>: unable to order absolute symbol: bar
# CHECK: warning: {{.*}}.script:1: unable to order absolute symbol: bar
## Check we do not crash when trying to order --defsym symbol.
# RUN: echo "bar" > %t.ord
# RUN: ld.lld --symbol-ordering-file %t.ord %t.o -defsym=bar=1 \
# RUN: -o %t.out 2>&1 | FileCheck %s
# RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DEFSYM
# DEFSYM: warning: --defsym: unable to order absolute symbol: bar

View File

@ -8,7 +8,10 @@
## associated file or linker script line number).
# CHECK: error: {{.*}}:(.text+0x2): relocation R_X86_64_GOTPCRELX out of range: 2147483658 is not in [-2147483648, 2147483647]; references '__stop_data'
# CHECK-NEXT: >>> defined in <internal>
# CHECK-EMPTY:
# CHECK-NEXT: error: {{.*}}:(.text+0x9): relocation R_X86_64_REX_GOTPCRELX out of range: 2147483651 is not in [-2147483648, 2147483647]; references '__stop_data'
# CHECK-NEXT: >>> defined in <internal>
#--- a.s
movl __stop_data@GOTPCREL(%rip), %eax # out of range