[DTLTO] support distributing bitcode from FatLTO objects (#176928)

We already have code to extract bitcode files from archives so they can
be distributed. Extend this code to extract bitcode from FatLTO objects
too, which otherwise cannot be used with DTLTO.
This commit is contained in:
Rose Hudson 2026-01-28 17:45:58 +00:00 committed by GitHub
parent 3db365d06e
commit e45ea95dbe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 93 additions and 23 deletions

View File

@ -0,0 +1,55 @@
REQUIRES: ld.lld,llvm-ar
# Test that a DTLTO link succeeds and outputs the expected set of files
# correctly when FatLTO objects are present.
RUN: rm -rf %t && split-file %s %t && cd %t
# Compile bitcode. -O2 is required for cross-module importing.
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects -c \
RUN: foo.c boo.c start.c
# We want to test FatLTO objects when included in archives.
RUN: llvm-ar rcs foo.a foo.o
RUN: llvm-ar rcsT boo.a boo.o
# Build with DTLTO.
RUN: %clang --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects \
RUN: -fuse-ld=lld -nostdlib foo.a boo.a start.o -Wl,--save-temps \
RUN: -fthinlto-distributor=%python \
RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py
# Check that the required output files have been created.
RUN: ls | FileCheck %s
# thin archive member: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o
CHECK-DAG: {{^}}boo.a(boo.o at [[#BOO_OFFSET:]]).3.[[#%X,HEXPID:]].3.[[#PID:]].native.o{{$}}
# archive member: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o
CHECK-DAG: {{^}}foo.a(foo.o at [[#FOO_OFFSET:]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o{{$}}
# FatLTO object: <file>.<task>.<pid>.<task>.<pid>.native.o.
CHECK-DAG: {{^}}start.o.1.[[#%X,HEXPID]].1.[[#PID]].native.o{{$}}
# Check that all objects are named in all of the index files.
# We expect this to happen because each object references symbols from the
# others.
RUN: llvm-dis *.1.*.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=OBJECTS
RUN: llvm-dis *.2.*.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=OBJECTS
RUN: llvm-dis *.3.*.thinlto.bc -o - | \
RUN: FileCheck %s --check-prefixes=OBJECTS
OBJECTS-DAG: foo.o
OBJECTS-DAG: boo.o
OBJECTS-DAG: start.o
#--- foo.c
extern int boo(int), _start(int);
__attribute__((retain)) int foo(int x) { return x + boo(x) + _start(x); }
#--- boo.c
extern int foo(int), _start(int);
__attribute__((retain)) int boo(int x) { return x + foo(x) + _start(x); }
#--- start.c
extern int foo(int), boo(int);
__attribute__((retain)) int _start(int x) { return x + foo(x) + boo(x); }

View File

@ -236,8 +236,10 @@ bool LinkerDriver::tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName,
IRObjectFile::findBitcodeInMemBuffer(mb);
if (errorToBool(fatLTOData.takeError()))
return false;
files.push_back(std::make_unique<BitcodeFile>(ctx, *fatLTOData, archiveName,
offsetInArchive, lazy));
auto file = std::make_unique<BitcodeFile>(ctx, *fatLTOData, archiveName,
offsetInArchive, lazy);
file->obj->fatLTOObject(true);
files.push_back(std::move(file));
return true;
}

View File

@ -33,13 +33,13 @@ RUN: %python filter_order_and_pprint.py %t.json | FileCheck %s
CHECK: "name": "Add input for DTLTO"
CHECK: "name": "Add input for DTLTO"
CHECK: "name": "Remove temporary inputs for DTLTO"
CHECK: "name": "Save input archive member for DTLTO"
CHECK: "name": "Serialize bitcode input for DTLTO"
CHECK-SAME: "detail": "t1.a(t1.bc at [[#ARCHIVE_OFFSET:]]).1.[[PID:[A-F0-9]+]].o"
CHECK: "name": "Total Add input for DTLTO"
CHECK-SAME: "count": 2,
CHECK: "name": "Total Remove temporary inputs for DTLTO"
CHECK-SAME: "count": 1,
CHECK: "name": "Total Save input archive member for DTLTO"
CHECK: "name": "Total Serialize bitcode input for DTLTO"
CHECK-SAME: "count": 1,
#--- t1.ll

View File

@ -132,7 +132,12 @@ private:
std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable;
MemoryBufferRef MbRef;
bool IsMemberOfArchive = false;
bool IsFatLTOObject = false;
// For distributed compilation, each input must exist as an individual bitcode
// file on disk and be identified by its ModuleID. Archive members and FatLTO
// objects violate this. So, in these cases we flag that the bitcode must be
// written out to a new standalone file.
bool SerializeForDistribution = false;
bool IsThinLTO = false;
StringRef ArchivePath;
StringRef MemberName;
@ -205,10 +210,16 @@ public:
LLVM_ABI BitcodeModule &getPrimaryBitcodeModule();
// Returns the memory buffer reference for this input file.
MemoryBufferRef getFileBuffer() const { return MbRef; }
// Returns true if this input file is a member of an archive.
bool isMemberOfArchive() const { return IsMemberOfArchive; }
// Mark this input file as a member of archive.
void memberOfArchive(bool MA) { IsMemberOfArchive = MA; }
// Returns true if this input should be serialized to disk for distribution.
// See the comment on SerializeForDistribution for details.
bool getSerializeForDistribution() const { return SerializeForDistribution; }
// Mark whether this input should be serialized to disk for distribution.
// See the comment on SerializeForDistribution for details.
void setSerializeForDistribution(bool SFD) { SerializeForDistribution = SFD; }
// Returns true if this bitcode came from a FatLTO object.
bool isFatLTOObject() const { return IsFatLTOObject; }
// Mark this bitcode as coming from a FatLTO object.
void fatLTOObject(bool FO) { IsFatLTOObject = FO; }
// Returns true if bitcode is ThinLTO.
bool isThinLTO() const { return IsThinLTO; }

View File

@ -21,7 +21,6 @@
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
@ -29,7 +28,6 @@
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/raw_ostream.h"
#include <iostream>
#include <string>
using namespace llvm;
@ -135,25 +133,29 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
StringRef ModuleId = Input->getName();
StringRef ArchivePath = Input->getArchivePath();
// Only process archive members.
if (ArchivePath.empty())
// In most cases, the module ID already points to an individual bitcode file
// on disk, so no further preparation for distribution is required.
if (ArchivePath.empty() && !Input->isFatLTOObject())
return Input;
SmallString<64> NewModuleId;
BitcodeModule &BM = Input->getPrimaryBitcodeModule();
// Check if the archive is a thin archive.
Expected<bool> IsThin = isThinArchive(ArchivePath);
if (!IsThin)
return IsThin.takeError();
// For a member of a thin archive that is not a FatLTO object, there is an
// existing file on disk that can be used, so we can avoid having to
// materialize.
Expected<bool> UseThinMember =
Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
if (!UseThinMember)
return UseThinMember.takeError();
if (*IsThin) {
if (*UseThinMember) {
// For thin archives, use the path to the actual file.
NewModuleId =
computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
} else {
// For regular archives, generate a unique name.
Input->memberOfArchive(true);
// For regular archives and FatLTO objects, generate a unique name.
Input->setSerializeForDistribution(true);
// Create unique identifier using process ID and sequence number.
std::string PID = utohexstr(sys::Process::getProcessId());
@ -175,8 +177,8 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
// previously terminated linker process and can be safely overwritten.
Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
StringRef ModuleId = Input->getName();
if (Input->isMemberOfArchive()) {
TimeTraceScope TimeScope("Save input archive member for DTLTO", ModuleId);
if (Input->getSerializeForDistribution()) {
TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
// Cleanup this file on abnormal process exit.
if (!SaveTemps)
llvm::sys::RemoveFileOnSignal(ModuleId);
@ -216,7 +218,7 @@ void lto::DTLTO::cleanup() {
if (!SaveTemps) {
TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
for (auto &Input : InputFiles) {
if (!Input->isMemberOfArchive())
if (!Input->getSerializeForDistribution())
continue;
std::error_code EC =
sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);