From 22b7b84860d39da71964c9b329937f2ee1d875ba Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 12 Jul 2024 13:26:52 -0700 Subject: [PATCH] [lld][WebAssembly] Report undefined symbols in -shared/-pie builds (#75242) Previously we would ignore all undefined symbols when using `-shared` or `-pie`. All undefined symbols would be treated as imports regardless of whether those symbols we defined in any shared library. With this change we now track symbol in shared libraries and report undefined symbols in the main program by default. The old behavior is still available via the `--unresolved-symbols=import-dynamic` command line flag. This rationale for allowing this type of breaking change is that `-pie` and `-shared` are both still experimental will warn as such, unless `--experimental-pic` is passed. As part of this change the linker now models shared library symbols via new SharedFunctionSymbol and SharedDataSymbol types. I've also added a new `--no-shlib-sigcheck` option that bypassed the checking of functions signature in shared libraries. This is specifically required by emscripten the case where the imports/exports of shared libraries have been modified by via JS type legalization (this is only needed when targeting old JS engines where bigint is not yet available See https://github.com/emscripten-core/emscripten/issues/18198 --- lld/test/wasm/Inputs/ret32.s | 1 - lld/test/wasm/dylink.s | 31 ++++++++ lld/test/wasm/emit-relocs.s | 2 +- lld/test/wasm/no-shlib-sigcheck.s | 39 +++++++++ lld/test/wasm/pie.s | 6 +- lld/test/wasm/shared-needed.s | 31 +++++--- lld/test/wasm/shared.s | 6 +- lld/test/wasm/shared64.s | 2 +- lld/test/wasm/signature-mismatch.s | 2 +- lld/test/wasm/tag-section.ll | 2 +- lld/test/wasm/undef-shared.s | 12 +++ lld/test/wasm/undefined-data.s | 2 +- lld/test/wasm/unresolved-symbols.s | 2 +- lld/wasm/Config.h | 1 + lld/wasm/Driver.cpp | 1 + lld/wasm/InputFiles.cpp | 48 +++++++++-- lld/wasm/InputFiles.h | 25 ++++-- lld/wasm/MarkLive.cpp | 16 ++-- lld/wasm/Options.td | 3 + lld/wasm/Relocations.cpp | 10 ++- lld/wasm/SymbolTable.cpp | 124 +++++++++++++++++++++++++---- lld/wasm/SymbolTable.h | 3 + lld/wasm/Symbols.cpp | 9 ++- lld/wasm/Symbols.h | 26 +++++- lld/wasm/SyntheticSections.cpp | 6 +- lld/wasm/Writer.cpp | 8 +- llvm/lib/Object/WasmObjectFile.cpp | 13 +-- 27 files changed, 356 insertions(+), 75 deletions(-) create mode 100644 lld/test/wasm/dylink.s create mode 100644 lld/test/wasm/no-shlib-sigcheck.s create mode 100644 lld/test/wasm/undef-shared.s diff --git a/lld/test/wasm/Inputs/ret32.s b/lld/test/wasm/Inputs/ret32.s index 5233455917e6..009f28c8cc9b 100644 --- a/lld/test/wasm/Inputs/ret32.s +++ b/lld/test/wasm/Inputs/ret32.s @@ -1,4 +1,3 @@ - .hidden ret32 .globl ret32 ret32: .functype ret32 (f32) -> (i32) diff --git a/lld/test/wasm/dylink.s b/lld/test/wasm/dylink.s new file mode 100644 index 000000000000..27e8c3ea7a7c --- /dev/null +++ b/lld/test/wasm/dylink.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten -o %t.o %s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %p/Inputs/ret32.s -o %t.ret32.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %p/Inputs/libsearch-dyn.s -o %t.dyn.o +# RUN: wasm-ld --experimental-pic -shared %t.ret32.o %t.dyn.o -o %t.lib.so +# RUN: not wasm-ld --experimental-pic -pie -o %t.wasm %t.o 2>&1 | FileCheck --check-prefix=ERROR %s +# RUN: wasm-ld --experimental-pic -pie -o %t.wasm %t.o %t.lib.so +# RUN: obj2yaml %t.wasm | FileCheck %s + +# ERROR: error: {{.*}}: undefined symbol: ret32 +# ERROR: error: {{.*}}: undefined symbol: _bar +.functype ret32 (f32) -> (i32) + +.globl _start +_start: + .functype _start () -> () + f32.const 0.0 + call ret32 + drop + i32.const _bar@GOT + drop + end_function + +# CHECK: Sections: +# CHECK-NEXT: - Type: CUSTOM +# CHECK-NEXT: Name: dylink.0 +# CHECK-NEXT: MemorySize: 0 +# CHECK-NEXT: MemoryAlignment: 0 +# CHECK-NEXT: TableSize: 0 +# CHECK-NEXT: TableAlignment: 0 +# CHECK-NEXT: Needed: +# CHECK-NEXT: - {{.*}}.lib.so diff --git a/lld/test/wasm/emit-relocs.s b/lld/test/wasm/emit-relocs.s index 91de6116164f..bd136ba810b5 100644 --- a/lld/test/wasm/emit-relocs.s +++ b/lld/test/wasm/emit-relocs.s @@ -54,7 +54,7 @@ foo: # CHECK-NEXT: - Index: 1 # CHECK-NEXT: Kind: FUNCTION # CHECK-NEXT: Name: ret32 -# CHECK-NEXT: Flags: [ VISIBILITY_HIDDEN ] +# CHECK-NEXT: Flags: [ ] # CHECK-NEXT: Function: 1 # CHECK-NEXT: - Index: 2 # CHECK-NEXT: Kind: DATA diff --git a/lld/test/wasm/no-shlib-sigcheck.s b/lld/test/wasm/no-shlib-sigcheck.s new file mode 100644 index 000000000000..13f2a2132ac7 --- /dev/null +++ b/lld/test/wasm/no-shlib-sigcheck.s @@ -0,0 +1,39 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten -o %t.o %s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %p/Inputs/ret32.s -o %t.ret32.o +# RUN: wasm-ld --experimental-pic -shared %t.ret32.o -o %t.lib.so + +## Fails with signature mismatch by default +# RUN: not wasm-ld --experimental-pic -pie -o %t.wasm %t.o %t.lib.so 2>&1 | FileCheck --check-prefix=ERROR %s +## Same again with shared library first. +# RUN: not wasm-ld --experimental-pic -pie -o %t.wasm %t.lib.so %t.o 2>&1 | FileCheck --check-prefix=ERROR %s + +## Succeeds with --no-shlib-sigcheck added +# RUN: wasm-ld --experimental-pic -pie -o %t.wasm %t.o %t.lib.so --no-shlib-sigcheck +# RUN: obj2yaml %t.wasm | FileCheck %s +## Same again with shared library first. +# RUN: wasm-ld --experimental-pic -pie -o %t.wasm %t.lib.so %t.o --no-shlib-sigcheck +# RUN: obj2yaml %t.wasm | FileCheck %s + +.functype ret32 (f32) -> (i64) + +.globl _start +_start: + .functype _start () -> () + f32.const 0.0 + call ret32 + drop + end_function + +# ERROR: wasm-ld: error: function signature mismatch: ret32 +# ERROR: >>> defined as (f32) -> i64 in {{.*}}.o + +# CHECK: - Type: TYPE +# CHECK-NEXT: Signatures: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: ParamTypes: +# CHECK-NEXT: - F32 +# CHECK-NEXT: ReturnTypes: +# CHECK-NEXT: - I64 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: ParamTypes: [] +# CHECK-NEXT: ReturnTypes: [] diff --git a/lld/test/wasm/pie.s b/lld/test/wasm/pie.s index 887377043e55..21eac7920731 100644 --- a/lld/test/wasm/pie.s +++ b/lld/test/wasm/pie.s @@ -1,6 +1,6 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten -o %t.o %s # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-emscripten %S/Inputs/internal_func.s -o %t.internal_func.o -# RUN: wasm-ld --no-gc-sections --experimental-pic -pie -o %t.wasm %t.o %t.internal_func.o +# RUN: wasm-ld --no-gc-sections --experimental-pic -pie --unresolved-symbols=import-dynamic -o %t.wasm %t.o %t.internal_func.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM @@ -150,7 +150,7 @@ _start: # instruction in the InitExpr. We also, therefore, do not need these globals # to be mutable. -# RUN: wasm-ld --no-gc-sections --experimental-pic -pie --extra-features=extended-const -o %t.extended.wasm %t.o %t.internal_func.o +# RUN: wasm-ld --no-gc-sections --experimental-pic -pie --unresolved-symbols=import-dynamic --extra-features=extended-const -o %t.extended.wasm %t.o %t.internal_func.o # RUN: obj2yaml %t.extended.wasm | FileCheck %s --check-prefix=EXTENDED-CONST # EXTENDED-CONST-NOT: __wasm_apply_global_relocs @@ -207,7 +207,7 @@ _start: # to be generated along with __wasm_start as the start # function. -# RUN: wasm-ld --no-gc-sections --shared-memory --experimental-pic -pie -o %t.shmem.wasm %t.o %t.internal_func.o +# RUN: wasm-ld --no-gc-sections --shared-memory --experimental-pic -pie --unresolved-symbols=import-dynamic -o %t.shmem.wasm %t.o %t.internal_func.o # RUN: obj2yaml %t.shmem.wasm | FileCheck %s --check-prefix=SHMEM # RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefix DISASSEM-SHMEM diff --git a/lld/test/wasm/shared-needed.s b/lld/test/wasm/shared-needed.s index 12c4597190a3..a9df361f2e8d 100644 --- a/lld/test/wasm/shared-needed.s +++ b/lld/test/wasm/shared-needed.s @@ -1,17 +1,28 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -# RUN: wasm-ld -shared --experimental-pic -o %t1.so %t.o -# RUN: obj2yaml %t1.so | FileCheck %s -check-prefix=SO1 +# RUN: wasm-ld -shared --experimental-pic -o %t.ret32.so %t.ret32.o +# RUN: obj2yaml %t.ret32.so | FileCheck %s -check-prefix=SO1 + +# Without linking against the ret32.so shared object we expect an undefined +# symbol error + +# RUN: not wasm-ld -shared --experimental-pic -o %t.so %t.o 2>&1 | FileCheck %s --check-prefix=ERROR +# ERROR: undefined symbol: ret32 + +# RUN: wasm-ld -shared --experimental-pic -o %t.so %t.o %t.ret32.so +# RUN: obj2yaml %t.so | FileCheck %s -check-prefix=SO2 -# RUN: wasm-ld -shared --experimental-pic -o %t2.so %t1.so %t.ret32.o -# RUN: obj2yaml %t2.so | FileCheck %s -check-prefix=SO2 .globl foo .globl data +.functype ret32 (f32) -> (i32) + foo: - .functype foo () -> () + .functype foo (f32) -> (i32) + local.get 0 + call ret32 end_function .section .data,"",@ @@ -24,8 +35,8 @@ data: # SO1: Sections: # SO1-NEXT: - Type: CUSTOM # SO1-NEXT: Name: dylink.0 -# SO1-NEXT: MemorySize: 4 -# SO1-NEXT: MemoryAlignment: 2 +# SO1-NEXT: MemorySize: 0 +# SO1-NEXT: MemoryAlignment: 0 # SO1-NEXT: TableSize: 0 # SO1-NEXT: TableAlignment: 0 # SO1-NEXT: Needed: [] @@ -34,10 +45,10 @@ data: # SO2: Sections: # SO2-NEXT: - Type: CUSTOM # SO2-NEXT: Name: dylink.0 -# SO2-NEXT: MemorySize: 0 -# SO2-NEXT: MemoryAlignment: 0 +# SO2-NEXT: MemorySize: 4 +# SO2-NEXT: MemoryAlignment: 2 # SO2-NEXT: TableSize: 0 # SO2-NEXT: TableAlignment: 0 # SO2-NEXT: Needed: -# SO2-NEXT: - shared-needed.s.tmp1.so +# SO2-NEXT: - shared-needed.s.tmp.ret32.so # SO2-NEXT: - Type: TYPE diff --git a/lld/test/wasm/shared.s b/lld/test/wasm/shared.s index a26f00163fea..5b40d4ebee7a 100644 --- a/lld/test/wasm/shared.s +++ b/lld/test/wasm/shared.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --experimental-pic -shared -o %t.wasm %t.o +# RUN: wasm-ld --experimental-pic --unresolved-symbols=import-dynamic -shared -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS @@ -7,8 +7,8 @@ # Linker-synthesized globals .globaltype __stack_pointer, i32 -.globaltype __table_base, i32, immutable -.globaltype __memory_base, i32, immutable +.globaltype __table_base, i32, immutable +.globaltype __memory_base, i32, immutable .section .data.data,"",@ data: diff --git a/lld/test/wasm/shared64.s b/lld/test/wasm/shared64.s index 73f77436cabf..831116d4d7fe 100644 --- a/lld/test/wasm/shared64.s +++ b/lld/test/wasm/shared64.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown -o %t.o %s -# RUN: wasm-ld -mwasm64 --experimental-pic -shared -o %t.wasm %t.o +# RUN: wasm-ld -mwasm64 --experimental-pic --unresolved-symbols=import-dynamic -shared -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS diff --git a/lld/test/wasm/signature-mismatch.s b/lld/test/wasm/signature-mismatch.s index 17f805a80727..89915dbdb30c 100644 --- a/lld/test/wasm/signature-mismatch.s +++ b/lld/test/wasm/signature-mismatch.s @@ -93,7 +93,7 @@ ret32_address_main: # RELOC-NEXT: - Index: 1 # RELOC-NEXT: Kind: FUNCTION # RELOC-NEXT: Name: ret32 -# RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] +# RELOC-NEXT: Flags: [ ] # RELOC-NEXT: Function: 2 # RELOC-NEXT: - Index: 2 # RELOC-NEXT: Kind: DATA diff --git a/lld/test/wasm/tag-section.ll b/lld/test/wasm/tag-section.ll index 4decdb58f952..20823c72c651 100644 --- a/lld/test/wasm/tag-section.ll +++ b/lld/test/wasm/tag-section.ll @@ -11,7 +11,7 @@ ; RUN: llc -filetype=obj -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic %p/Inputs/tag-section1.ll -o %t1.o ; RUN: llc -filetype=obj -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic %p/Inputs/tag-section2.ll -o %t2.o ; RUN: llc -filetype=obj -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -relocation-model=pic %s -o %t.o -; RUN: wasm-ld --import-undefined --experimental-pic -pie -o %t.wasm %t.o %t1.o %t2.o +; RUN: wasm-ld --import-undefined --experimental-pic --unresolved-symbols=import-dynamic -pie -o %t.wasm %t.o %t1.o %t2.o ; RUN: obj2yaml %t.wasm | FileCheck %s --check-prefix=PIC target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" diff --git a/lld/test/wasm/undef-shared.s b/lld/test/wasm/undef-shared.s new file mode 100644 index 000000000000..4c270880ef53 --- /dev/null +++ b/lld/test/wasm/undef-shared.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: not wasm-ld --experimental-pic %t.o -o /dev/null -shared 2>&1 | FileCheck %s + +# CHECK: error: {{.*}}: undefined symbol: hidden +.global hidden +.hidden hidden + +.global foo +.section .data,"",@ +foo: + .int32 hidden + .size foo,4 diff --git a/lld/test/wasm/undefined-data.s b/lld/test/wasm/undefined-data.s index d63b667c4ea3..5e2a41606612 100644 --- a/lld/test/wasm/undefined-data.s +++ b/lld/test/wasm/undefined-data.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s # RUN: not wasm-ld -o %t.wasm %t.o 2>&1 | FileCheck %s -check-prefix=UNDEF # RUN: wasm-ld --allow-undefined -o %t.wasm %t.o -# RUN: not wasm-ld --shared -o %t.wasm %t.o 2>&1 | FileCheck %s -check-prefix=SHARED +# RUN: not wasm-ld --experimental-pic -shared --unresolved-symbols=import-dynamic -o %t.wasm %t.o 2>&1 | FileCheck %s -check-prefix=SHARED .globl _start _start: diff --git a/lld/test/wasm/unresolved-symbols.s b/lld/test/wasm/unresolved-symbols.s index 7367e6fddf76..d83a63ab3c57 100644 --- a/lld/test/wasm/unresolved-symbols.s +++ b/lld/test/wasm/unresolved-symbols.s @@ -85,7 +85,7 @@ # RUN: llvm-readobj %t4.wasm > /dev/null 2>&1 ## import-dynamic should fail due to incompatible relocations. -# RUN: not wasm-ld %t/main.o -o %t5.wasm --unresolved-symbols=import-dynamic 2>&1 | FileCheck -check-prefix=ERRNOPIC %s +# RUN: not wasm-ld %t/main.o -o %t5.wasm --experimental-pic --unresolved-symbols=import-dynamic 2>&1 | FileCheck -check-prefix=ERRNOPIC %s # ERRNOPIC: relocation R_WASM_MEMORY_ADDR_SLEB cannot be used against symbol `undef_data`; recompile with -fPIC # ERRNOPIC: relocation R_WASM_TABLE_INDEX_SLEB cannot be used against symbol `undef_func`; recompile with -fPIC diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index d0ffa83d111e..915c53c43717 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -69,6 +69,7 @@ struct Configuration { bool relocatable; bool saveTemps; bool shared; + bool shlibSigCheck; bool stripAll; bool stripDebug; bool stackFirst; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index d099689911fc..b66b988005d5 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -532,6 +532,7 @@ static void readConfigs(opt::InputArgList &args) { config->saveTemps = args.hasArg(OPT_save_temps); config->searchPaths = args::getStrings(args, OPT_library_path); config->shared = args.hasArg(OPT_shared); + config->shlibSigCheck = !args.hasArg(OPT_no_shlib_sigcheck); config->stripAll = args.hasArg(OPT_strip_all); config->stripDebug = args.hasArg(OPT_strip_debug); config->stackFirst = args.hasArg(OPT_stack_first); diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 473208a08a81..ae557740a18b 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -175,7 +175,7 @@ uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc, uint64_t tombstone, case R_WASM_MEMORY_ADDR_TLS_SLEB: case R_WASM_MEMORY_ADDR_TLS_SLEB64: case R_WASM_MEMORY_ADDR_LOCREL_I32: { - if (isa(sym) || sym->isUndefWeak()) + if (isa(sym) || sym->isShared() || sym->isUndefWeak()) return 0; auto D = cast(sym); uint64_t value = D->getVA() + reloc.Addend; @@ -388,7 +388,8 @@ static bool shouldMerge(const WasmSegment &seg) { } void ObjFile::parseLazy() { - LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << "\n"); + LLVM_DEBUG(dbgs() << "ObjFile::parseLazy: " << toString(this) << " " + << wasmObj.get() << "\n"); for (const SymbolRef &sym : wasmObj->symbols()) { const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); if (!wasmSym.isDefined()) @@ -403,21 +404,55 @@ void ObjFile::parseLazy() { } ObjFile::ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy) - : InputFile(ObjectKind, m) { + : WasmFileBase(ObjectKind, m) { this->lazy = lazy; this->archiveName = std::string(archiveName); // If this isn't part of an archive, it's eagerly linked, so mark it live. if (archiveName.empty()) markLive(); +} +void SharedFile::parse() { + assert(wasmObj->isSharedObject()); + + for (const SymbolRef &sym : wasmObj->symbols()) { + const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); + if (wasmSym.isDefined()) { + StringRef name = wasmSym.Info.Name; + // Certain shared library exports are known to be DSO-local so we + // don't want to add them to the symbol table. + // TODO(sbc): Instead of hardcoding these here perhaps we could add + // this as extra metadata in the `dylink` section. + if (name == "__wasm_apply_data_relocs" || name == "__wasm_call_ctors" || + name.starts_with("__start_") || name.starts_with("__stop_")) + continue; + uint32_t flags = wasmSym.Info.Flags; + Symbol *s; + LLVM_DEBUG(dbgs() << "shared symbol: " << name << "\n"); + switch (wasmSym.Info.Kind) { + case WASM_SYMBOL_TYPE_FUNCTION: + s = symtab->addSharedFunction(name, flags, this, wasmSym.Signature); + break; + case WASM_SYMBOL_TYPE_DATA: + s = symtab->addSharedData(name, flags, this); + break; + default: + continue; + } + symbols.push_back(s); + } + } +} + +WasmFileBase::WasmFileBase(Kind k, MemoryBufferRef m) : InputFile(k, m) { + // Parse a memory buffer as a wasm file. + LLVM_DEBUG(dbgs() << "Reading object: " << toString(this) << "\n"); std::unique_ptr bin = CHECK(createBinary(mb), toString(this)); auto *obj = dyn_cast(bin.get()); if (!obj) fatal(toString(this) + ": not a wasm file"); - if (!obj->isRelocatableObject()) - fatal(toString(this) + ": not a relocatable wasm file"); bin.release(); wasmObj.reset(obj); @@ -429,6 +464,9 @@ void ObjFile::parse(bool ignoreComdats) { // Parse a memory buffer as a wasm file. LLVM_DEBUG(dbgs() << "ObjFile::parse: " << toString(this) << "\n"); + if (!wasmObj->isRelocatableObject()) + fatal(toString(this) + ": not a relocatable wasm file"); + // Build up a map of function indices to table indices for use when // verifying the existing table index relocations uint32_t totalFunctions = diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h index a129be36515d..c3a667523ee0 100644 --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -87,8 +87,19 @@ private: bool live; }; +class WasmFileBase : public InputFile { +public: + explicit WasmFileBase(Kind k, MemoryBufferRef m); + + // Returns the underlying wasm file. + const WasmObjectFile *getWasmObj() const { return wasmObj.get(); } + +protected: + std::unique_ptr wasmObj; +}; + // .o file (wasm object file) -class ObjFile : public InputFile { +class ObjFile : public WasmFileBase { public: ObjFile(MemoryBufferRef m, StringRef archiveName, bool lazy = false); static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } @@ -96,9 +107,6 @@ public: void parse(bool ignoreComdats = false); void parseLazy(); - // Returns the underlying wasm file. - const WasmObjectFile *getWasmObj() const { return wasmObj.get(); } - uint32_t calcNewIndex(const WasmRelocation &reloc) const; uint64_t calcNewValue(const WasmRelocation &reloc, uint64_t tombstone, const InputChunk *chunk) const; @@ -139,14 +147,15 @@ private: bool isExcludedByComdat(const InputChunk *chunk) const; void addLegacyIndirectFunctionTableIfNeeded(uint32_t tableSymbolCount); - - std::unique_ptr wasmObj; }; // .so file. -class SharedFile : public InputFile { +class SharedFile : public WasmFileBase { public: - explicit SharedFile(MemoryBufferRef m) : InputFile(SharedKind, m) {} + explicit SharedFile(MemoryBufferRef m) : WasmFileBase(SharedKind, m) {} + + void parse(); + static bool classof(const InputFile *f) { return f->kind() == SharedKind; } }; diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 0f1c50854fdf..1b99f03747fb 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -63,13 +63,15 @@ void MarkLive::enqueue(Symbol *sym) { sym->markLive(); if (markImplicitDeps) { - // Mark ctor functions in the object that defines this symbol live. - // The ctor functions are all referenced by the synthetic callCtors - // function. However, this function does not contain relocations so we - // have to manually mark the ctors as live. - enqueueInitFunctions(cast(file)); - // Mark retained segments in the object that defines this symbol live. - enqueueRetainedSegments(cast(file)); + if (auto obj = dyn_cast(file)) { + // Mark as live the ctor functions in the object that defines this symbol. + // The ctor functions are all referenced by the synthetic callCtors + // function. However, this function does not contain relocations so we + // have to manually mark the ctors as live. + enqueueInitFunctions(obj); + // Mark retained segments in the object that defines this symbol live. + enqueueRetainedSegments(obj); + } } if (InputChunk *chunk = sym->getChunk()) diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 7e954822ef64..bf8134dc33cc 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -236,6 +236,9 @@ def no_growable_memory: FF<"no-growable-memory">, def no_entry: FF<"no-entry">, HelpText<"Do not output any entry point">; +def no_shlib_sigcheck: FF<"no-shlib-sigcheck">, + HelpText<"Do not check signatures of functions defined in shared libraries.">; + def stack_first: FF<"stack-first">, HelpText<"Place stack at start of linear memory rather than after data">; diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 09861319e77d..6f33a4f28a9d 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -19,6 +19,8 @@ using namespace llvm::wasm; namespace lld::wasm { static bool requiresGOTAccess(const Symbol *sym) { + if (sym->isShared()) + return true; if (!ctx.isPic && config->unresolvedSymbols != UnresolvedPolicy::ImportDynamic) return false; @@ -163,13 +165,15 @@ void scanRelocations(InputChunk *chunk) { case R_WASM_MEMORY_ADDR_I32: case R_WASM_MEMORY_ADDR_I64: // These relocation types are only present in the data section and - // will be converted into code by `generateRelocationCode`. This code - // requires the symbols to have GOT entries. + // will be converted into code by `generateRelocationCode`. This + // code requires the symbols to have GOT entries. if (requiresGOTAccess(sym)) addGOTEntry(sym); break; } - } else if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) { + } + + if (sym->isUndefined() && !config->relocatable && !sym->isWeak()) { // Report undefined symbols reportUndefined(file, sym); } diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index 081f811cd139..a5d37a5eba6d 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -38,6 +38,9 @@ void SymbolTable::addFile(InputFile *file, StringRef symName) { // .so file if (auto *f = dyn_cast(file)) { + // If we are not reporting undefined symbols that we don't actualy + // parse the shared library symbol table. + f->parse(); ctx.sharedFiles.push_back(f); return; } @@ -309,6 +312,12 @@ static bool shouldReplace(const Symbol *existing, InputFile *newFile, return true; } + // Similarly with shared symbols + if (existing->isShared()) { + LLVM_DEBUG(dbgs() << "replacing existing shared symbol\n"); + return true; + } + // Neither symbol is week. They conflict. error("duplicate symbol: " + toString(*existing) + "\n>>> defined in " + toString(existing->getFile()) + "\n>>> defined in " + @@ -316,6 +325,95 @@ static bool shouldReplace(const Symbol *existing, InputFile *newFile, return true; } +static void reportFunctionSignatureMismatch(StringRef symName, + FunctionSymbol *sym, + const WasmSignature *signature, + InputFile *file, + bool isError = true) { + std::string msg = + ("function signature mismatch: " + symName + "\n>>> defined as " + + toString(*sym->signature) + " in " + toString(sym->getFile()) + + "\n>>> defined as " + toString(*signature) + " in " + toString(file)) + .str(); + if (isError) + error(msg); + else + warn(msg); +} + +static void reportFunctionSignatureMismatch(StringRef symName, + FunctionSymbol *a, + FunctionSymbol *b, + bool isError = true) { + reportFunctionSignatureMismatch(symName, a, b->signature, b->getFile(), + isError); +} + +Symbol *SymbolTable::addSharedFunction(StringRef name, uint32_t flags, + InputFile *file, + const WasmSignature *sig) { + LLVM_DEBUG(dbgs() << "addSharedFunction: " << name << " [" << toString(*sig) + << "]\n"); + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name, file); + + auto replaceSym = [&](Symbol *sym) { + replaceSymbol(sym, name, flags, file, sig); + }; + + if (wasInserted) { + replaceSym(s); + return s; + } + + auto existingFunction = dyn_cast(s); + if (!existingFunction) { + reportTypeError(s, file, WASM_SYMBOL_TYPE_FUNCTION); + return s; + } + + // Shared symbols should never replace locally-defined ones + if (s->isDefined()) { + return s; + } + + LLVM_DEBUG(dbgs() << "resolving existing undefined symbol: " << s->getName() + << "\n"); + + bool checkSig = true; + if (auto ud = dyn_cast(existingFunction)) + checkSig = ud->isCalledDirectly; + + if (checkSig && !signatureMatches(existingFunction, sig)) { + if (config->shlibSigCheck) { + reportFunctionSignatureMismatch(name, existingFunction, sig, file); + } else { + // With --no-shlib-sigcheck we ignore the signature of the function as + // defined by the shared library and instead use the signature as + // expected by the program being linked. + sig = existingFunction->signature; + } + } + + replaceSym(s); + return s; +} + +Symbol *SymbolTable::addSharedData(StringRef name, uint32_t flags, + InputFile *file) { + LLVM_DEBUG(dbgs() << "addSharedData: " << name << "\n"); + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name, file); + + if (wasInserted || s->isUndefined()) { + replaceSymbol(s, name, flags, file); + } + + return s; +} + Symbol *SymbolTable::addDefinedFunction(StringRef name, uint32_t flags, InputFile *file, InputFunction *function) { @@ -551,10 +649,18 @@ Symbol *SymbolTable::addUndefinedFunction(StringRef name, existingFunction->signature = sig; auto *existingUndefined = dyn_cast(existingFunction); if (isCalledDirectly && !signatureMatches(existingFunction, sig)) { + if (existingFunction->isShared()) { + // Special handling for when the existing function is a shared symbol + if (config->shlibSigCheck) { + reportFunctionSignatureMismatch(name, existingFunction, sig, file); + } else { + existingFunction->signature = sig; + } + } // If the existing undefined functions is not called directly then let // this one take precedence. Otherwise the existing function is either // directly called or defined, in which case we need a function variant. - if (existingUndefined && !existingUndefined->isCalledDirectly) + else if (existingUndefined && !existingUndefined->isCalledDirectly) replaceSym(); else if (getFunctionVariant(s, sig, file, &s)) replaceSym(); @@ -918,20 +1024,6 @@ DefinedFunction *SymbolTable::createUndefinedStub(const WasmSignature &sig) { return sym; } -static void reportFunctionSignatureMismatch(StringRef symName, - FunctionSymbol *a, - FunctionSymbol *b, bool isError) { - std::string msg = ("function signature mismatch: " + symName + - "\n>>> defined as " + toString(*a->signature) + " in " + - toString(a->getFile()) + "\n>>> defined as " + - toString(*b->signature) + " in " + toString(b->getFile())) - .str(); - if (isError) - error(msg); - else - warn(msg); -} - // Remove any variant symbols that were created due to function signature // mismatches. void SymbolTable::handleSymbolVariants() { @@ -965,7 +1057,7 @@ void SymbolTable::handleSymbolVariants() { if (!defined) { reportFunctionSignatureMismatch(symName, cast(variants[0]), - cast(variants[1]), true); + cast(variants[1])); return; } diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 42ebb8be8eb3..5d09d8b68571 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -50,6 +50,9 @@ public: void trace(StringRef name); + Symbol *addSharedFunction(StringRef name, uint32_t flags, InputFile *file, + const WasmSignature *sig); + Symbol *addSharedData(StringRef name, uint32_t flags, InputFile *file); Symbol *addDefinedFunction(StringRef name, uint32_t flags, InputFile *file, InputFunction *function); Symbol *addDefinedData(StringRef name, uint32_t flags, InputFile *file, diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 687728d00c85..f74699d0763f 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -68,6 +68,10 @@ std::string toString(wasm::Symbol::Kind kind) { return "SectionKind"; case wasm::Symbol::OutputSectionKind: return "OutputSectionKind"; + case wasm::Symbol::SharedFunctionKind: + return "SharedFunctionKind"; + case wasm::Symbol::SharedDataKind: + return "SharedDataKind"; } llvm_unreachable("invalid symbol kind"); } @@ -221,11 +225,12 @@ void Symbol::setHidden(bool isHidden) { } bool Symbol::isImported() const { - return isUndefined() && (importName.has_value() || forceImport); + return isShared() || + (isUndefined() && (importName.has_value() || forceImport)); } bool Symbol::isExported() const { - if (!isDefined() || isLocal()) + if (!isDefined() || isShared() || isLocal()) return false; // Shared libraries must export all weakly defined symbols diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 65a062b8321b..2ba575fddc87 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -60,6 +60,8 @@ public: UndefinedTableKind, UndefinedTagKind, LazyKind, + SharedFunctionKind, + SharedDataKind, }; Kind kind() const { return symbolKind; } @@ -74,6 +76,9 @@ public: } bool isLazy() const { return symbolKind == LazyKind; } + bool isShared() const { + return symbolKind == SharedFunctionKind || symbolKind == SharedDataKind; + } bool isLocal() const; bool isWeak() const; @@ -190,6 +195,7 @@ class FunctionSymbol : public Symbol { public: static bool classof(const Symbol *s) { return s->kind() == DefinedFunctionKind || + s->kind() == SharedFunctionKind || s->kind() == UndefinedFunctionKind; } @@ -285,7 +291,8 @@ public: class DataSymbol : public Symbol { public: static bool classof(const Symbol *s) { - return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind; + return s->kind() == DefinedDataKind || s->kind() == UndefinedDataKind || + s->kind() == SharedDataKind; } protected: @@ -323,6 +330,12 @@ protected: uint64_t size = 0; }; +class SharedData : public DataSymbol { +public: + SharedData(StringRef name, uint32_t flags, InputFile *f) + : DataSymbol(name, SharedDataKind, flags, f) {} +}; + class UndefinedData : public DataSymbol { public: UndefinedData(StringRef name, uint32_t flags, InputFile *file = nullptr) @@ -486,6 +499,16 @@ public: static bool classof(const Symbol *s) { return s->kind() == UndefinedTagKind; } }; +class SharedFunctionSymbol : public FunctionSymbol { +public: + SharedFunctionSymbol(StringRef name, uint32_t flags, InputFile *file, + const WasmSignature *sig) + : FunctionSymbol(name, SharedFunctionKind, flags, file, sig) {} + static bool classof(const Symbol *s) { + return s->kind() == SharedFunctionKind; + } +}; + // LazySymbol symbols represent symbols in object files between --start-lib and // --end-lib options. LLD also handles traditional archives as if all the files // in the archive are surrounded by --start-lib and --end-lib. @@ -630,6 +653,7 @@ union SymbolUnion { alignas(UndefinedGlobal) char i[sizeof(UndefinedGlobal)]; alignas(UndefinedTable) char j[sizeof(UndefinedTable)]; alignas(SectionSymbol) char k[sizeof(SectionSymbol)]; + alignas(SharedFunctionSymbol) char l[sizeof(SharedFunctionSymbol)]; }; // It is important to keep the size of SymbolUnion small for performance and diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index b359e0fdc856..f02f55519a25 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -139,7 +139,7 @@ void DylinkSection::writeBody() { uint32_t TypeSection::registerType(const WasmSignature &sig) { auto pair = typeIndices.insert(std::make_pair(sig, types.size())); if (pair.second) { - LLVM_DEBUG(llvm::dbgs() << "type " << toString(sig) << "\n"); + LLVM_DEBUG(llvm::dbgs() << "registerType " << toString(sig) << "\n"); types.push_back(&sig); } return pair.first->second; @@ -449,7 +449,7 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { writeU8(os, opcode_ptr_const, "CONST"); writeSleb128(os, f->getTableIndex(), "offset"); } else { - assert(isa(sym)); + assert(isa(sym) || isa(sym)); continue; } writeU8(os, opcode_ptr_add, "ADD"); @@ -519,7 +519,7 @@ void GlobalSection::writeBody() { else if (auto *f = dyn_cast(sym)) initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); else { - assert(isa(sym)); + assert(isa(sym) || isa(sym)); initExpr = intConst(0, is64); } writeInitExpr(os, initExpr); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 7a015764b77c..6a66a29d2498 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -736,6 +736,8 @@ static bool shouldImport(Symbol *sym) { if (config->shared && sym->isWeak() && !sym->isUndefined() && !sym->isHidden()) return true; + if (sym->isShared()) + return true; if (!sym->isUndefined()) return false; if (sym->isWeak() && !config->relocatable && !ctx.isPic) @@ -793,8 +795,11 @@ void Writer::calculateExports() { continue; if (!sym->isLive()) continue; + if (isa(sym) || sym->isShared()) + continue; StringRef name = sym->getName(); + LLVM_DEBUG(dbgs() << "Export: " << name << "\n"); WasmExport export_; if (auto *f = dyn_cast(sym)) { if (std::optional exportName = f->function->getExportName()) { @@ -822,7 +827,6 @@ void Writer::calculateExports() { export_ = {name, WASM_EXTERNAL_TABLE, t->getTableNumber()}; } - LLVM_DEBUG(dbgs() << "Export: " << name << "\n"); out.exportSec->exports.push_back(export_); out.exportSec->exportedSymbols.push_back(sym); } @@ -833,7 +837,7 @@ void Writer::populateSymtab() { return; for (Symbol *sym : symtab->symbols()) - if (sym->isUsedInRegularObj && sym->isLive()) + if (sym->isUsedInRegularObj && sym->isLive() && !sym->isShared()) out.linkingSec->addToSymtab(sym); for (ObjFile *file : ctx.objectFiles) { diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index 23381955c60a..f244099d664d 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -509,11 +509,14 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { llvm::DenseSet SeenGlobals; llvm::DenseSet SeenSegments; - // If there is symbol info from the export section, this info will supersede - // it, but not info from a linking section - if (!HasLinkingSection) { + // If we have linking section (symbol table) or if we are parsing a DSO + // then we don't use the name section for symbol information. + bool PopulateSymbolTable = !HasLinkingSection && !HasDylinkSection; + + // If we are using the name section for symbol information then it will + // supersede any symbols created by the export section. + if (PopulateSymbolTable) Symbols.clear(); - } while (Ctx.Ptr < Ctx.End) { uint8_t Type = readUint8(Ctx); @@ -589,7 +592,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) { Index, 0, DataSegments[Index].Data.Content.size()}; } DebugNames.push_back(wasm::WasmDebugName{nameType, Index, Name}); - if (!HasLinkingSection) + if (PopulateSymbolTable) Symbols.emplace_back(Info, GlobalType, TableType, Signature); } break;