[DTLTO][ELF][COFF] Add archive support for DTLTO. (#157043)

This patch implements support for handling archive members in DTLTO.
 
Unlike ThinLTO, where archive members are passed as in-memory buffers,
DTLTO requires archive members to be materialized as individual files on
the filesystem.
This is necessary because DTLTO invokes clang externally, which expects
file-based inputs.
To support this, this implementation identifies archive members among
the input files,
saves them to the filesystem, and updates their module_id to match their
file paths.
This commit is contained in:
Konstantin Belochapka 2025-12-31 00:40:30 -08:00 committed by GitHub
parent cc49ab77d8
commit b66557d8f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 542 additions and 62 deletions

View File

@ -0,0 +1,80 @@
REQUIRES: x86-registered-target,ld.lld,llvm-ar
# Test that a DTLTO link succeeds and outputs the expected set of files
# correctly when archives are present.
RUN: rm -rf %t && split-file %s %t && cd %t
# Compile sources into bitcode. -O2 is required for cross-module importing.
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c boo.c moo.c loo.c voo.c main.c
RUN: llvm-ar rcs archive.a foo.o boo.o moo.o
RUN: llvm-ar rcsT archive.thin.a loo.o voo.o
# Build with DTLTO.
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin \
RUN: -fuse-ld=lld -nostdlib -e main \
RUN: main.o archive.a archive.thin.a -o main.elf \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--save-temps
# Check that the required output files have been created.
RUN: ls | FileCheck %s --check-prefix=OUTPUTS
# JSON jobs description.
OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json
# Main source.
OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}main.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
# Regular archive members.
# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}archive.a(boo.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}archive.a(foo.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}archive.a(moo.o at {{[0-9]+}}).4.[[HEXPID]].4.[[PID]].native.o.thinlto.bc{{$}}
# Thin archive members.
OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}voo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}loo.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
# Executable file.
OUTPUTS-DAG: {{^}}main.elf{{$}}
#--- foo.c
volatile int foo_int;
__attribute__((retain)) int foo(int x) { return x + foo_int; }
#--- boo.c
extern int foo(int x);
__attribute__((retain)) int boo(int x) { return foo(x); }
#--- moo.c
__attribute__((retain)) int moo() { return 3; }
#--- loo.c
extern int moo(int x);
__attribute__((retain)) int loo(int x) { return moo(x); }
#--- voo.c
extern int foo(int x);
extern int loo(int x);
__attribute__((retain)) int voo(int x) { return foo(x) + loo(x + 1) + 7; }
#--- main.c
extern int boo(int x);
extern int moo();
extern int voo(int x);
__attribute__((retain)) int main(int argc, char** argv) {
return boo(argc) + moo() + voo(argc + 3);
}

View File

@ -0,0 +1,35 @@
REQUIRES: x86-registered-target,ld.lld,llvm-ar
# Test that DTLTO works with a mixture of FullLTO and ThinLTO bitcode archive members
# where there is more than one LTO partition.
RUN: rm -rf %t && split-file %s %t && cd %t
RUN: %clang --target=x86_64-linux-gnu -flto -c one.c two.c
RUN: %clang --target=x86_64-linux-gnu -flto=thin -c three.c
RUN: llvm-ar rc archive.a one.o two.o three.o
# Build with DTLTO.
RUN: %clang --target=x86_64-linux-gnu -Werror -flto -fuse-ld=lld -nostdlib \
RUN: -Wl,--whole-archive archive.a \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--save-temps,--lto-partitions=2
# Show that the FullLTO modules have been prepared for distribution, this is
# not optimal but has no functional impact.
RUN: FileCheck %s --input-file=a.out.resolution.txt
CHECK: archive.a(one.o at {{.*}}).1.[[PID:[a-zA-Z0-9_]+]].o
CHECK: archive.a(two.o at {{.*}}).2.[[PID]].o
CHECK: archive.a(three.o at {{.*}}).3.[[PID]].o
#--- one.c
__attribute__((retain)) void one() {}
#--- two.c
__attribute__((retain)) void two() {}
#--- three.c
__attribute__((retain)) void three() {}

View File

@ -0,0 +1,55 @@
REQUIRES: x86-registered-target,ld.lld,llvm-ar
# Test that a DTLTO link succeeds when there are two archive member files with
# the same filename path component.
# Split this file into several sources.
RUN: rm -rf %t && split-file %s %t && cd %t
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c start.c
# Create first archive.
RUN: mkdir archive1 && cd archive1
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
RUN: llvm-ar rc archive.a t3.o t1.o
RUN: cd ..
# Create second archive.
RUN: mkdir archive2 && cd archive2
RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c ../t1.c ../t3.c
RUN: llvm-ar rc archive.a t3.o t1.o
RUN: cd ..
RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld \
RUN: -nostdlib -Wl,--undefined=t1,--undefined=t3 \
RUN: start.o archive1/archive.a archive2/archive.a -o main.elf \
RUN: -Wl,--save-temps \
RUN: -Wl,--thinlto-distributor=%python \
RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \
RUN: -Wl,--thinlto-remote-compiler=%clang
# Check that the required output files have been created.
RUN: ls | FileCheck %s --check-prefix=OUTPUTS
# JSON jobs description.
OUTPUTS-DAG: {{^}}main.[[PID:[0-9]+]].dist-file.json
# Sources.
OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}start.{{[0-9]+}}.[[PID]].native.o.thinlto.bc{{$}}
# Archive members.
# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o[.thinlto.bc].
OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID:[a-fA-F0-9]+]].2.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}archive.a(t3.o at {{[0-9]+}}).2.[[HEXPID]].2.[[PID]].native.o.thinlto.bc{{$}}
OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o{{$}}
OUTPUTS-DAG: {{^}}archive.a(t1.o at {{[0-9]+}}).3.[[HEXPID]].3.[[PID]].native.o.thinlto.bc{{$}}
#--- t1.c
__attribute__((retain)) void t1() { }
#--- start.c
__attribute__((retain)) void _start() { }
#--- t3.c
__attribute__((retain)) void t3() { }

View File

@ -1380,6 +1380,7 @@ BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
utostr(offsetInArchive)));
std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
lazy);
}

View File

@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
@ -133,8 +134,12 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
llvm::heavyweight_hardware_concurrency(ctx.config.thinLTOJobs));
}
ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
ctx.config.ltoPartitions);
if (ctx.config.dtltoDistributor.empty())
ltoObj = std::make_unique<lto::LTO>(createConfig(), backend,
ctx.config.ltoPartitions);
else
ltoObj = std::make_unique<lto::DTLTO>(createConfig(), backend,
ctx.config.ltoPartitions);
}
BitcodeCompiler::~BitcodeCompiler() = default;

View File

@ -20,7 +20,6 @@
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/AArch64AttributeParser.h"
#include "llvm/Support/ARMAttributeParser.h"
@ -1813,39 +1812,6 @@ static uint8_t getOsAbi(const Triple &t) {
}
}
// For DTLTO, bitcode member names must be valid paths to files on disk.
// For thin archives, resolve `memberPath` relative to the archive's location.
// Returns true if adjusted; false otherwise. Non-thin archives are unsupported.
static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath,
std::string &memberPath) {
assert(!archivePath.empty());
if (ctx.arg.dtltoDistributor.empty())
return false;
// Read the archive header to determine if it's a thin archive.
auto bufferOrErr =
MemoryBuffer::getFileSlice(archivePath, sizeof(ThinArchiveMagic) - 1, 0);
if (std::error_code ec = bufferOrErr.getError()) {
ErrAlways(ctx) << "cannot open " << archivePath << ": " << ec.message();
return false;
}
if (!bufferOrErr->get()->getBuffer().starts_with(ThinArchiveMagic))
return false;
SmallString<128> resolvedPath;
if (path::is_relative(memberPath)) {
resolvedPath = path::parent_path(archivePath);
path::append(resolvedPath, memberPath);
} else
resolvedPath = memberPath;
path::remove_dots(resolvedPath, /*remove_dot_dot=*/true);
memberPath = resolvedPath.str();
return true;
}
BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
uint64_t offsetInArchive, bool lazy)
: InputFile(ctx, BitcodeKind, mb) {
@ -1856,25 +1822,22 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
if (ctx.arg.thinLTOIndexOnly)
path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier());
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two archives define two members with the same name, this
// causes a collision which result in only one of the objects being taken
// into consideration at LTO time (which very likely causes undefined
// symbols later in the link stage). So we append file offset to make
// filename unique.
StringSaver &ss = ctx.saver;
StringRef name;
if (archiveName.empty() ||
dtltoAdjustMemberPathIfThinArchive(ctx, archiveName, path)) {
name = ss.save(path);
} else {
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
// name. If two archives define two members with the same name, this
// causes a collision which result in only one of the objects being taken
// into consideration at LTO time (which very likely causes undefined
// symbols later in the link stage). So we append file offset to make
// filename unique.
name = ss.save(archiveName + "(" + path::filename(path) + " at " +
utostr(offsetInArchive) + ")");
}
StringRef name = archiveName.empty()
? ss.save(path)
: ss.save(archiveName + "(" + path::filename(path) +
" at " + utostr(offsetInArchive) + ")");
MemoryBufferRef mbref(mb.getBuffer(), name);
obj = CHECK2(lto::InputFile::create(mbref), this);
obj->setArchivePathAndName(archiveName, mb.getBufferIdentifier());
Triple t(obj->getTargetTriple());
ekind = getBitcodeELFKind(t);

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Caching.h"
@ -195,14 +196,18 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
ctx.arg.thinLTOEmitImportsFiles);
}
constexpr llvm::lto::LTO::LTOKind ltoModes[3] =
{llvm::lto::LTO::LTOKind::LTOK_UnifiedThin,
llvm::lto::LTO::LTOKind::LTOK_UnifiedRegular,
llvm::lto::LTO::LTOKind::LTOK_Default};
ltoObj = std::make_unique<lto::LTO>(createConfig(ctx), backend,
ctx.arg.ltoPartitions,
ltoModes[ctx.arg.ltoKind]);
constexpr llvm::lto::LTO::LTOKind ltoModes[3] = {
llvm::lto::LTO::LTOKind::LTOK_UnifiedThin,
llvm::lto::LTO::LTOKind::LTOK_UnifiedRegular,
llvm::lto::LTO::LTOKind::LTOK_Default};
if (ctx.arg.dtltoDistributor.empty())
ltoObj = std::make_unique<lto::LTO>(createConfig(ctx), backend,
ctx.arg.ltoPartitions,
ltoModes[ctx.arg.ltoKind]);
else
ltoObj = std::make_unique<lto::DTLTO>(createConfig(ctx), backend,
ctx.arg.ltoPartitions,
ltoModes[ctx.arg.ltoKind]);
// Initialize usedStartStop.
if (ctx.bitcodeFiles.empty())
return;

View File

@ -18,6 +18,7 @@
#include "lld/Common/Strings.h"
#include "lld/Common/TargetOptionsCommandFlags.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/Caching.h"

View File

@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/LTO/Config.h"
#include "llvm/LTO/LTO.h"

View File

@ -137,6 +137,11 @@ struct ParserCallbacks {
StringRef getModuleIdentifier() const { return ModuleIdentifier; }
// Assign a new module identifier to this bitcode module.
void setModuleIdentifier(llvm::StringRef ModuleId) {
ModuleIdentifier = ModuleId;
}
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
/// If IsImporting is true, this module is being parsed for ThinLTO

View File

@ -0,0 +1,61 @@
//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
#ifndef LLVM_DTLTO_H
#define LLVM_DTLTO_H
#include "llvm/LTO/LTO.h"
#include "llvm/Support/MemoryBuffer.h"
namespace llvm {
namespace lto {
class DTLTO : public LTO {
public:
// Inherit contructors from LTO base class.
using LTO::LTO;
~DTLTO() { removeTempFiles(); }
private:
// Bump allocator for a purpose of saving updated module IDs.
BumpPtrAllocator PtrAlloc;
StringSaver Saver{PtrAlloc};
// Removes temporary files.
LLVM_ABI void removeTempFiles();
// Determines if a file at the given path is a thin archive file.
Expected<bool> isThinArchive(const StringRef ArchivePath);
// Write the archive member content to a file named after the module ID.
Error saveInputArchiveMember(lto::InputFile *Input);
// Iterates through all input files and saves their content
// to files if they are regular archive members.
Error saveInputArchiveMembers();
// Array of input bitcode files for LTO.
std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
// A cache to avoid repeatedly reading the same archive file.
StringMap<bool> ArchiveFiles;
public:
// Adds the input file to the LTO object's list of input files.
// For archive members, generates a new module ID which is a path to a real
// file on a filesystem.
LLVM_ABI virtual Expected<std::shared_ptr<lto::InputFile>>
addInput(std::unique_ptr<lto::InputFile> InputPtr) override;
// Entry point for DTLTO archives support.
LLVM_ABI virtual llvm::Error handleArchiveInputs() override;
};
} // namespace lto
} // namespace llvm
#endif // LLVM_DTLTO_H

View File

@ -130,6 +130,12 @@ private:
std::vector<StringRef> DependentLibraries;
std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable;
MemoryBufferRef MbRef;
bool IsMemberOfArchive = false;
bool IsThinLTO = false;
StringRef ArchivePath;
StringRef MemberName;
public:
LLVM_ABI ~InputFile();
@ -188,6 +194,23 @@ public:
// Returns the only BitcodeModule from InputFile.
LLVM_ABI BitcodeModule &getSingleBitcodeModule();
// Returns the memory buffer reference for this input file.
MemoryBufferRef getFileBuffer() const { return MbRef; }
// Returns true if this input file is a member of an archive.
bool isMemberOfArchive() const { return IsMemberOfArchive; }
// Mark this input file as a member of archive.
void memberOfArchive(bool MA) { IsMemberOfArchive = MA; }
// Returns true if bitcode is ThinLTO.
bool isThinLTO() const { return IsThinLTO; }
// Store an archive path and a member name.
void setArchivePathAndName(StringRef Path, StringRef Name) {
ArchivePath = Path;
MemberName = Name;
}
StringRef getArchivePath() const { return ArchivePath; }
StringRef getMemberName() const { return MemberName; }
private:
ArrayRef<Symbol> module_symbols(unsigned I) const {
@ -392,7 +415,7 @@ public:
LLVM_ABI LTO(Config Conf, ThinBackend Backend = {},
unsigned ParallelCodeGenParallelismLevel = 1,
LTOKind LTOMode = LTOK_Default);
LLVM_ABI ~LTO();
LLVM_ABI virtual ~LTO();
/// Add an input file to the LTO link, using the provided symbol resolutions.
/// The symbol resolutions must appear in the enumeration order given by
@ -591,6 +614,14 @@ private:
// Diagnostic optimization remarks file
LLVMRemarkFileHandle DiagnosticOutputFile;
public:
virtual Expected<std::shared_ptr<lto::InputFile>>
addInput(std::unique_ptr<lto::InputFile> InputPtr) {
return std::shared_ptr<lto::InputFile>(InputPtr.release());
}
virtual llvm::Error handleArchiveInputs() { return llvm::Error::success(); }
};
/// The resolution for a symbol. The linker must provide a SymbolResolution for

View File

@ -22,6 +22,7 @@ add_subdirectory(Frontend)
add_subdirectory(Transforms)
add_subdirectory(Linker)
add_subdirectory(Analysis)
add_subdirectory(DTLTO)
add_subdirectory(LTO)
add_subdirectory(MC)
add_subdirectory(MCA)

View File

@ -0,0 +1,7 @@
add_llvm_component_library(LLVMDTLTO
DTLTO.cpp
LINK_COMPONENTS
Core
Support
)

212
llvm/lib/DTLTO/DTLTO.cpp Normal file
View File

@ -0,0 +1,212 @@
//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// \file
// This file implements support functions for Distributed ThinLTO, focusing on
// archive file handling.
//
//===----------------------------------------------------------------------===//
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Object/Archive.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include <iostream>
#include <string>
using namespace llvm;
namespace {
// Writes the content of a memory buffer into a file.
llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
std::error_code EC;
raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
if (EC) {
return createStringError(inconvertibleErrorCode(),
"Failed to create file %s: %s", FilePath.data(),
EC.message().c_str());
}
OS.write(FileBuffer.data(), FileBuffer.size());
if (OS.has_error()) {
return createStringError(inconvertibleErrorCode(),
"Failed writing to file %s", FilePath.data());
}
return Error::success();
}
// Compute the file path for a thin archive member.
//
// For thin archives, an archive member name is typically a file path relative
// to the archive file's directory. This function resolves that path.
SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
const StringRef MemberName) {
assert(!ArchivePath.empty() && "An archive file path must be non empty.");
SmallString<64> MemberPath;
if (sys::path::is_relative(MemberName)) {
MemberPath = sys::path::parent_path(ArchivePath);
sys::path::append(MemberPath, MemberName);
} else
MemberPath = MemberName;
sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
return MemberPath;
}
} // namespace
// Determines if a file at the given path is a thin archive file.
//
// This function uses a cache to avoid repeatedly reading the same file.
// It reads only the header portion (magic bytes) of the file to identify
// the archive type.
Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
// Return cached result if available.
auto Cached = ArchiveFiles.find(ArchivePath);
if (Cached != ArchiveFiles.end())
return Cached->second;
uint64_t FileSize = -1;
bool IsThin = false;
std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
if (EC)
return createStringError(inconvertibleErrorCode(),
"Failed to get file size from archive %s: %s",
ArchivePath.data(), EC.message().c_str());
if (FileSize < sizeof(object::ThinArchiveMagic))
return createStringError(inconvertibleErrorCode(),
"Archive file size is too small %s",
ArchivePath.data());
// Read only the first few bytes containing the magic signature.
ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
MemoryBuffer::getFileSlice(ArchivePath, sizeof(object::ThinArchiveMagic),
0);
if ((EC = MemBufferOrError.getError()))
return createStringError(inconvertibleErrorCode(),
"Failed to read from archive %s: %s",
ArchivePath.data(), EC.message().c_str());
StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
if (file_magic::archive != identify_magic(MemBuf))
return createStringError(inconvertibleErrorCode(),
"Unknown format for archive %s",
ArchivePath.data());
IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
// Cache the result
ArchiveFiles[ArchivePath] = IsThin;
return IsThin;
}
// Removes any temporary regular archive member files that were created during
// processing.
void lto::DTLTO::removeTempFiles() {
for (auto &Input : InputFiles) {
if (Input->isMemberOfArchive())
sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
}
}
// This function performs the following tasks:
// 1. Adds the input file to the LTO object's list of input files.
// 2. For thin archive members, generates a new module ID which is a path to a
// thin archive member file.
// 3. For regular archive members, generates a new unique module ID.
// 4. Updates the bitcode module's identifier.
Expected<std::shared_ptr<lto::InputFile>>
lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
// Add the input file to the LTO object.
InputFiles.emplace_back(InputPtr.release());
std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
StringRef ModuleId = Input->getName();
StringRef ArchivePath = Input->getArchivePath();
// Only process archive members.
if (ArchivePath.empty())
return Input;
SmallString<64> NewModuleId;
BitcodeModule &BM = Input->getSingleBitcodeModule();
// Check if the archive is a thin archive.
Expected<bool> IsThin = isThinArchive(ArchivePath);
if (!IsThin)
return IsThin.takeError();
if (*IsThin) {
// For thin archives, use the path to the actual file.
NewModuleId =
computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
} else {
// For regular archives, generate a unique name.
Input->memberOfArchive(true);
// Create unique identifier using process ID and sequence number.
std::string PID = utohexstr(sys::Process::getProcessId());
std::string Seq = std::to_string(InputFiles.size());
NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"};
}
// Update the module identifier and save it.
BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
return Input;
}
// Write the archive member content to a file named after the module ID.
// If a file with that name already exists, it's likely a leftover from a
// previously terminated linker process and can be safely overwritten.
Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
StringRef ModuleId = Input->getName();
if (Input->isMemberOfArchive()) {
MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
return EC;
}
return Error::success();
}
// Iterates through all ThinLTO-enabled input files and saves their content
// to separate files if they are regular archive members.
Error lto::DTLTO::saveInputArchiveMembers() {
for (auto &Input : InputFiles) {
if (!Input->isThinLTO())
continue;
if (Error EC = saveInputArchiveMember(Input.get()))
return EC;
}
return Error::success();
}
// Entry point for DTLTO archives support.
//
// Sets up the temporary file remover and processes archive members.
// Must be called after all inputs are added but before optimization begins.
llvm::Error lto::DTLTO::handleArchiveInputs() {
// Process and save archive members to separate files if needed.
if (Error EC = saveInputArchiveMembers())
return EC;
return Error::success();
}

View File

@ -25,6 +25,7 @@ add_llvm_component_library(LLVMLTO
CodeGen
CodeGenTypes
Core
DTLTO
Extensions
IPO
InstCombine

View File

@ -25,6 +25,7 @@
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/DTLTO/DTLTO.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Intrinsics.h"
@ -578,6 +579,8 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
File->ComdatTable = FOrErr->TheReader.getComdatTable();
File->MbRef =
Object; // Save a memory buffer reference to an input file object.
for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
size_t Begin = File->Symbols.size();
@ -738,13 +741,19 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
assert(ResI == Res.end());
}
Error LTO::add(std::unique_ptr<InputFile> Input,
Error LTO::add(std::unique_ptr<InputFile> InputPtr,
ArrayRef<SymbolResolution> Res) {
llvm::TimeTraceScope timeScope("LTO add input", Input->getName());
llvm::TimeTraceScope timeScope("LTO add input", InputPtr->getName());
assert(!CalledGetMaxTasks);
Expected<std::shared_ptr<InputFile>> InputOrErr =
addInput(std::move(InputPtr));
if (!InputOrErr)
return InputOrErr.takeError();
InputFile *Input = (*InputOrErr).get();
if (Conf.ResolutionFile)
writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
writeToResolutionFile(*Conf.ResolutionFile, Input, Res);
if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
Triple InputTriple(Input->getTargetTriple());
@ -793,6 +802,10 @@ LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
LTOMode = LTOK_UnifiedThin;
bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
// If any of the modules inside of a input bitcode file was compiled with
// ThinLTO, we assume that the whole input file also was compiled with
// ThinLTO.
Input.IsThinLTO = IsThinLTO;
auto ModSyms = Input.module_symbols(ModI);
addModuleToGlobalRes(ModSyms, Res,
@ -1203,6 +1216,9 @@ Error LTO::checkPartiallySplit() {
}
Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
if (Error EC = handleArchiveInputs())
return EC;
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;