[DTLTO][LLD][ELF] Support bitcode members of thin archives (#149425)

This patch adds support for bitcode members of thin archives to DTLTO
(https://llvm.org/docs/DTLTO.html) in ELF LLD.

For DTLTO, bitcode identifiers must be valid paths to bitcode files on
disk. Clang does not support archive inputs for ThinLTO backend
compilations. This patch adjusts the identifier for bitcode members of
thin archives in DTLTO links so that it is the path to the member file
on disk, allowing such members to be supported in DTLTO.

This patch is sufficient to allow for self-hosting an LLVM build with
DTLTO when thin archives are used.

Note: Bitcode members of non-thin archives remain unsupported. This will
be addressed in a future change.

Testing:
- LLD lit test coverage has been added to check that the identifier is
adjusted appropriately.
- A cross-project lit test has been added to show that a DTLTO link can
succeed when linking bitcode members of thin archives.

For the design discussion of the DTLTO feature, see: #126654.
This commit is contained in:
bd1976bris 2025-08-01 09:38:46 +01:00 committed by GitHub
parent 965231ca0a
commit 673476d96b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 216 additions and 12 deletions

View File

@ -19,11 +19,12 @@ set(CROSS_PROJECT_TEST_DEPS
FileCheck
check-gdb-llvm-support
count
llvm-dwarfdump
llvm-ar
llvm-config
llvm-dwarfdump
llvm-objdump
split-file
not
split-file
)
if ("clang" IN_LIST LLVM_ENABLE_PROJECTS)

View File

@ -0,0 +1,97 @@
REQUIRES: ld.lld,llvm-ar
## Test that a DTLTO link succeeds and outputs the expected set of files
## correctly when thin archives are present.
RUN: rm -rf %t && split-file %s %t && cd %t
## Compile bitcode. -O2 is required for cross-module importing.
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c \
RUN: foo.c bar.c dog.c cat.c start.c
## Generate thin archives.
RUN: llvm-ar rcs foo.a foo.o --thin
## Create this bitcode thin archive in a subdirectory to test the expansion of
## the path to a bitcode file that is referenced using "..", e.g., in this case
## "../bar.o".
RUN: mkdir lib
RUN: llvm-ar rcs lib/bar.a bar.o --thin
## Create this bitcode thin archive with an absolute path entry containing "..".
RUN: llvm-ar rcs dog.a %t/lib/../dog.o --thin
## The bitcode member of cat.a will not be used in the link.
RUN: llvm-ar rcs cat.a cat.o --thin
RUN: llvm-ar rcs start.a start.o --thin
## Link from a different directory to ensure that thin archive member paths are
## resolved correctly relative to the archive locations.
RUN: mkdir %t/out && cd %t/out
RUN: %clang --target=x86_64-linux-gnu -flto=thin -fuse-ld=lld %t/foo.a %t/lib/bar.a ../start.a %t/cat.a \
RUN: -Wl,--whole-archive ../dog.a \
RUN: -fthinlto-distributor=%python \
RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--save-temps -nostdlib -Werror
## Check that the required output files have been created.
RUN: ls | sort | FileCheck %s
## No files are expected before.
CHECK-NOT: {{.}}
## JSON jobs description.
CHECK: {{^}}a.[[PID:[a-zA-Z0-9_]+]].dist-file.json{{$}}
## Native output object files and individual summary index files.
CHECK: {{^}}bar.3.[[PID]].native.o{{$}}
CHECK: {{^}}bar.3.[[PID]].native.o.thinlto.bc{{$}}
CHECK: {{^}}dog.1.[[PID]].native.o{{$}}
CHECK: {{^}}dog.1.[[PID]].native.o.thinlto.bc{{$}}
CHECK: {{^}}foo.2.[[PID]].native.o{{$}}
CHECK: {{^}}foo.2.[[PID]].native.o.thinlto.bc{{$}}
CHECK: {{^}}start.4.[[PID]].native.o{{$}}
CHECK: {{^}}start.4.[[PID]].native.o.thinlto.bc{{$}}
## No files are expected after.
CHECK-NOT: {{.}}
## It is important that cross-module inlining occurs for this test to show that Clang can
## successfully load the bitcode file dependencies recorded in the summary indices.
## Explicitly check that the expected importing has occurred.
RUN: llvm-dis start.4.*.native.o.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=FOO,BAR,START
RUN: llvm-dis dog.1.*.native.o.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=FOO,BAR,DOG,START
RUN: llvm-dis foo.2.*.native.o.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=FOO,BAR,START
RUN: llvm-dis bar.3.*.native.o.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=FOO,BAR,START
FOO-DAG: foo.o
BAR-DAG: bar.o
DOG-DAG: dog.o
START-DAG: start.o
#--- foo.c
extern int bar(int), _start(int);
__attribute__((retain)) int foo(int x) { return x + bar(x) + _start(x); }
#--- bar.c
extern int foo(int), _start(int);
__attribute__((retain)) int bar(int x) { return x + foo(x) + _start(x); }
#--- dog.c
extern int foo(int), bar(int), _start(int);
__attribute__((retain)) int dog(int x) { return x + foo(x) + bar(x) + _start(x); }
#--- cat.c
__attribute__((retain)) void cat(int x) {}
#--- start.c
extern int foo(int), bar(int);
__attribute__((retain)) int _start(int x) { return x + foo(x) + bar(x); }

View File

@ -107,6 +107,8 @@ lldb_path = llvm_config.use_llvm_tool("lldb", search_env="LLDB")
if lldb_path is not None:
config.available_features.add("lldb")
if llvm_config.use_llvm_tool("llvm-ar"):
config.available_features.add("llvm-ar")
def configure_dexter_substitutions():
"""Configure substitutions for host platform and return list of dependencies"""

View File

@ -20,6 +20,7 @@
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/AArch64AttributeParser.h"
#include "llvm/Support/ARMAttributeParser.h"
@ -1811,6 +1812,39 @@ static uint8_t getOsAbi(const Triple &t) {
}
}
// For DTLTO, bitcode member names must be valid paths to files on disk.
// For thin archives, resolve `memberPath` relative to the archive's location.
// Returns true if adjusted; false otherwise. Non-thin archives are unsupported.
static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath,
std::string &memberPath) {
assert(!archivePath.empty());
if (ctx.arg.dtltoDistributor.empty())
return false;
// Read the archive header to determine if it's a thin archive.
auto bufferOrErr =
MemoryBuffer::getFileSlice(archivePath, sizeof(ThinArchiveMagic) - 1, 0);
if (std::error_code ec = bufferOrErr.getError()) {
ErrAlways(ctx) << "cannot open " << archivePath << ": " << ec.message();
return false;
}
if (!bufferOrErr->get()->getBuffer().starts_with(ThinArchiveMagic))
return false;
SmallString<128> resolvedPath;
if (path::is_relative(memberPath)) {
resolvedPath = path::parent_path(archivePath);
path::append(resolvedPath, memberPath);
} else
resolvedPath = memberPath;
path::remove_dots(resolvedPath, /*remove_dot_dot=*/true);
memberPath = resolvedPath.str();
return true;
}
BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy)
: InputFile(ctx, BitcodeKind, mb) {
@ -1821,17 +1855,22 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
if (ctx.arg.thinLTOIndexOnly)
path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier());
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two archives define two members with the same name, this
// causes a collision which result in only one of the objects being taken
// into consideration at LTO time (which very likely causes undefined
// symbols later in the link stage). So we append file offset to make
// filename unique.
StringSaver &ss = ctx.saver;
StringRef name = archiveName.empty()
? ss.save(path)
: ss.save(archiveName + "(" + path::filename(path) +
" at " + utostr(offsetInArchive) + ")");
StringRef name;
if (archiveName.empty() ||
dtltoAdjustMemberPathIfThinArchive(ctx, archiveName, path)) {
name = ss.save(path);
} else {
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two archives define two members with the same name, this
// causes a collision which result in only one of the objects being taken
// into consideration at LTO time (which very likely causes undefined
// symbols later in the link stage). So we append file offset to make
// filename unique.
name = ss.save(archiveName + "(" + path::filename(path) + " at " +
utostr(offsetInArchive) + ")");
}
MemoryBufferRef mbref(mb.getBuffer(), name);
obj = CHECK2(lto::InputFile::create(mbref), this);

View File

@ -0,0 +1,65 @@
REQUIRES: x86
## Test that a DTLTO link assigns Module IDs to thin archive members as expected.
RUN: rm -rf %t && split-file %s %t && cd %t
RUN: sed 's/@t1/@t2/g' t1.ll > t2.ll
RUN: sed 's/@t1/@t3/g' t1.ll > t3.ll
RUN: opt -thinlto-bc t1.ll -o t1.bc
RUN: opt -thinlto-bc t2.ll -o t2.bc
RUN: opt -thinlto-bc t3.ll -o t3.bc
RUN: llvm-ar rcs t1.a t1.bc --thin
## Create this bitcode thin archive in a subdirectory to test the expansion of
## the path to a bitcode file that is referenced using "..", e.g., in this case
## "../t2.bc".
RUN: mkdir lib
RUN: llvm-ar rcs lib/t2.a t2.bc --thin
## Create this bitcode thin archive with an absolute path entry containing "..".
RUN: llvm-ar rcs t3.a %t/lib/../t3.bc --thin
## Link from a different directory to ensure that thin archive member paths are
## resolved correctly relative to the archive locations.
RUN: mkdir %t/out && cd %t/out
## Build a response file to share common linking arguments.
## Note: validate.py does not perform any compilation. Instead, it validates the
## received JSON, pretty-prints the JSON and the supplied arguments, and then
## exits with an error. This allows FileCheck directives to verify the
## distributor inputs.
RUN: echo '%t/t1.a %t/lib/t2.a ../t3.a \
RUN: --thinlto-distributor="%python" \
RUN: --thinlto-distributor-arg="%llvm_src_root/utils/dtlto/validate.py"' > rsp
## Link thin archives using -u/--undefined.
RUN: not ld.lld @rsp -u t1 -u t2 -u t3 2>&1 | FileCheck %s
## Link thin archives using --whole-archive.
RUN: not ld.lld --whole-archive @rsp 2>&1 | FileCheck %s
## Check the module IDs in the JSON jobs description.
CHECK: "jobs": [
CHECK: "inputs": [
CHECK-NEXT: "{{([a-zA-Z]:)|/}}
CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t1.bc"
CHECK: "inputs": [
CHECK-NEXT: "{{([a-zA-Z]\:)|/}}
CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t2.bc"
CHECK: "inputs": [
CHECK-NEXT: "{{([a-zA-Z]:)|/}}
CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t3.bc"
## Ensure backend compilation fails as expected (due to validate.py dummy behavior).
CHECK: error: DTLTO backend compilation: cannot open native object file:
#--- t1.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @t1() {
ret void
}