From e45ea95dbe236e233ad978067688789e7478541a Mon Sep 17 00:00:00 2001 From: Rose Hudson Date: Wed, 28 Jan 2026 17:45:58 +0000 Subject: [PATCH] [DTLTO] support distributing bitcode from FatLTO objects (#176928) We already have code to extract bitcode files from archives so they can be distributed. Extend this code to extract bitcode from FatLTO objects too, which otherwise cannot be used with DTLTO. --- .../dtlto/fat-lto-objects.test | 55 +++++++++++++++++++ lld/ELF/Driver.cpp | 6 +- lld/test/ELF/dtlto/timetrace.test | 4 +- llvm/include/llvm/LTO/LTO.h | 21 +++++-- llvm/lib/DTLTO/DTLTO.cpp | 30 +++++----- 5 files changed, 93 insertions(+), 23 deletions(-) create mode 100644 cross-project-tests/dtlto/fat-lto-objects.test diff --git a/cross-project-tests/dtlto/fat-lto-objects.test b/cross-project-tests/dtlto/fat-lto-objects.test new file mode 100644 index 000000000000..22e3eed43b4e --- /dev/null +++ b/cross-project-tests/dtlto/fat-lto-objects.test @@ -0,0 +1,55 @@ +REQUIRES: ld.lld,llvm-ar + +# Test that a DTLTO link succeeds and outputs the expected set of files +# correctly when FatLTO objects are present. +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile bitcode. -O2 is required for cross-module importing. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects -c \ +RUN: foo.c boo.c start.c + +# We want to test FatLTO objects when included in archives. +RUN: llvm-ar rcs foo.a foo.o +RUN: llvm-ar rcsT boo.a boo.o + +# Build with DTLTO. +RUN: %clang --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects \ +RUN: -fuse-ld=lld -nostdlib foo.a boo.a start.o -Wl,--save-temps \ +RUN: -fthinlto-distributor=%python \ +RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py + +# Check that the required output files have been created. +RUN: ls | FileCheck %s + +# thin archive member: ( at ).....native.o +CHECK-DAG: {{^}}boo.a(boo.o at [[#BOO_OFFSET:]]).3.[[#%X,HEXPID:]].3.[[#PID:]].native.o{{$}} +# archive member: ( at ).....native.o +CHECK-DAG: {{^}}foo.a(foo.o at [[#FOO_OFFSET:]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o{{$}} +# FatLTO object: .....native.o. +CHECK-DAG: {{^}}start.o.1.[[#%X,HEXPID]].1.[[#PID]].native.o{{$}} + +# Check that all objects are named in all of the index files. +# We expect this to happen because each object references symbols from the +# others. +RUN: llvm-dis *.1.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS +RUN: llvm-dis *.2.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS +RUN: llvm-dis *.3.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS + +OBJECTS-DAG: foo.o +OBJECTS-DAG: boo.o +OBJECTS-DAG: start.o + +#--- foo.c +extern int boo(int), _start(int); +__attribute__((retain)) int foo(int x) { return x + boo(x) + _start(x); } + +#--- boo.c +extern int foo(int), _start(int); +__attribute__((retain)) int boo(int x) { return x + foo(x) + _start(x); } + +#--- start.c +extern int foo(int), boo(int); +__attribute__((retain)) int _start(int x) { return x + foo(x) + boo(x); } diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index c8c16bbde63a..d7bfa7357d4e 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -236,8 +236,10 @@ bool LinkerDriver::tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName, IRObjectFile::findBitcodeInMemBuffer(mb); if (errorToBool(fatLTOData.takeError())) return false; - files.push_back(std::make_unique(ctx, *fatLTOData, archiveName, - offsetInArchive, lazy)); + auto file = std::make_unique(ctx, *fatLTOData, archiveName, + offsetInArchive, lazy); + file->obj->fatLTOObject(true); + files.push_back(std::move(file)); return true; } diff --git a/lld/test/ELF/dtlto/timetrace.test b/lld/test/ELF/dtlto/timetrace.test index 639ad36f8019..4567b0a1d4b0 100644 --- a/lld/test/ELF/dtlto/timetrace.test +++ b/lld/test/ELF/dtlto/timetrace.test @@ -33,13 +33,13 @@ RUN: %python filter_order_and_pprint.py %t.json | FileCheck %s CHECK: "name": "Add input for DTLTO" CHECK: "name": "Add input for DTLTO" CHECK: "name": "Remove temporary inputs for DTLTO" -CHECK: "name": "Save input archive member for DTLTO" +CHECK: "name": "Serialize bitcode input for DTLTO" CHECK-SAME: "detail": "t1.a(t1.bc at [[#ARCHIVE_OFFSET:]]).1.[[PID:[A-F0-9]+]].o" CHECK: "name": "Total Add input for DTLTO" CHECK-SAME: "count": 2, CHECK: "name": "Total Remove temporary inputs for DTLTO" CHECK-SAME: "count": 1, -CHECK: "name": "Total Save input archive member for DTLTO" +CHECK: "name": "Total Serialize bitcode input for DTLTO" CHECK-SAME: "count": 1, #--- t1.ll diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 4011065ec37a..f992be9899e3 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -132,7 +132,12 @@ private: std::vector> ComdatTable; MemoryBufferRef MbRef; - bool IsMemberOfArchive = false; + bool IsFatLTOObject = false; + // For distributed compilation, each input must exist as an individual bitcode + // file on disk and be identified by its ModuleID. Archive members and FatLTO + // objects violate this. So, in these cases we flag that the bitcode must be + // written out to a new standalone file. + bool SerializeForDistribution = false; bool IsThinLTO = false; StringRef ArchivePath; StringRef MemberName; @@ -205,10 +210,16 @@ public: LLVM_ABI BitcodeModule &getPrimaryBitcodeModule(); // Returns the memory buffer reference for this input file. MemoryBufferRef getFileBuffer() const { return MbRef; } - // Returns true if this input file is a member of an archive. - bool isMemberOfArchive() const { return IsMemberOfArchive; } - // Mark this input file as a member of archive. - void memberOfArchive(bool MA) { IsMemberOfArchive = MA; } + // Returns true if this input should be serialized to disk for distribution. + // See the comment on SerializeForDistribution for details. + bool getSerializeForDistribution() const { return SerializeForDistribution; } + // Mark whether this input should be serialized to disk for distribution. + // See the comment on SerializeForDistribution for details. + void setSerializeForDistribution(bool SFD) { SerializeForDistribution = SFD; } + // Returns true if this bitcode came from a FatLTO object. + bool isFatLTOObject() const { return IsFatLTOObject; } + // Mark this bitcode as coming from a FatLTO object. + void fatLTOObject(bool FO) { IsFatLTOObject = FO; } // Returns true if bitcode is ThinLTO. bool isThinLTO() const { return IsThinLTO; } diff --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp index 4d8f8ba0fc4a..4a1107e76e47 100644 --- a/llvm/lib/DTLTO/DTLTO.cpp +++ b/llvm/lib/DTLTO/DTLTO.cpp @@ -21,7 +21,6 @@ #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" @@ -29,7 +28,6 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" -#include #include using namespace llvm; @@ -135,25 +133,29 @@ lto::DTLTO::addInput(std::unique_ptr InputPtr) { StringRef ModuleId = Input->getName(); StringRef ArchivePath = Input->getArchivePath(); - // Only process archive members. - if (ArchivePath.empty()) + // In most cases, the module ID already points to an individual bitcode file + // on disk, so no further preparation for distribution is required. + if (ArchivePath.empty() && !Input->isFatLTOObject()) return Input; SmallString<64> NewModuleId; BitcodeModule &BM = Input->getPrimaryBitcodeModule(); - // Check if the archive is a thin archive. - Expected IsThin = isThinArchive(ArchivePath); - if (!IsThin) - return IsThin.takeError(); + // For a member of a thin archive that is not a FatLTO object, there is an + // existing file on disk that can be used, so we can avoid having to + // materialize. + Expected UseThinMember = + Input->isFatLTOObject() ? false : isThinArchive(ArchivePath); + if (!UseThinMember) + return UseThinMember.takeError(); - if (*IsThin) { + if (*UseThinMember) { // For thin archives, use the path to the actual file. NewModuleId = computeThinArchiveMemberPath(ArchivePath, Input->getMemberName()); } else { - // For regular archives, generate a unique name. - Input->memberOfArchive(true); + // For regular archives and FatLTO objects, generate a unique name. + Input->setSerializeForDistribution(true); // Create unique identifier using process ID and sequence number. std::string PID = utohexstr(sys::Process::getProcessId()); @@ -175,8 +177,8 @@ lto::DTLTO::addInput(std::unique_ptr InputPtr) { // previously terminated linker process and can be safely overwritten. Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) { StringRef ModuleId = Input->getName(); - if (Input->isMemberOfArchive()) { - TimeTraceScope TimeScope("Save input archive member for DTLTO", ModuleId); + if (Input->getSerializeForDistribution()) { + TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId); // Cleanup this file on abnormal process exit. if (!SaveTemps) llvm::sys::RemoveFileOnSignal(ModuleId); @@ -216,7 +218,7 @@ void lto::DTLTO::cleanup() { if (!SaveTemps) { TimeTraceScope TimeScope("Remove temporary inputs for DTLTO"); for (auto &Input : InputFiles) { - if (!Input->isMemberOfArchive()) + if (!Input->getSerializeForDistribution()) continue; std::error_code EC = sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);