This change fixes two issues when processing multi-module bitcode files in DTLTO: 1. The DTLTO archive handling code incorrectly uses getSingleBitcodeModule(), which asserts when the bitcode file contains more than one module. 2. The temporary file containing the contents of an input archive member was not emitted for multi-module bitcode files. This was due to incorrect logic for recording whether a bitcode input contains any ThinLTO modules. In a typical multi-module bitcode file, the first module is a ThinLTO module while a subsequent auxiliary module is non-ThinLTO. When modules are processed in order, the auxiliary module causes the entire bitcode file to be classified as non-ThinLTO, and the archive-member emission logic then incorrectly skips it. In addition, this patch adds a test that verifies that multi-module bitcode files can be successfully linked with DTLTO. The test reproduces both issues as they existed prior to this change. SIE Tracker: TOOLCHAIN-21008
213 lines
7.5 KiB
C++
213 lines
7.5 KiB
C++
//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// \file
|
|
// This file implements support functions for Distributed ThinLTO, focusing on
|
|
// archive file handling.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/DTLTO/DTLTO.h"
|
|
|
|
#include "llvm/ADT/SmallString.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/BinaryFormat/Magic.h"
|
|
#include "llvm/LTO/LTO.h"
|
|
#include "llvm/Object/Archive.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/ManagedStatic.h"
|
|
#include "llvm/Support/MemoryBufferRef.h"
|
|
#include "llvm/Support/Path.h"
|
|
#include "llvm/Support/Process.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
// Writes the content of a memory buffer into a file.
|
|
llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
|
|
std::error_code EC;
|
|
raw_fd_ostream OS(FilePath.str(), EC, sys::fs::OpenFlags::OF_None);
|
|
if (EC) {
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Failed to create file %s: %s", FilePath.data(),
|
|
EC.message().c_str());
|
|
}
|
|
OS.write(FileBuffer.data(), FileBuffer.size());
|
|
if (OS.has_error()) {
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Failed writing to file %s", FilePath.data());
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
// Compute the file path for a thin archive member.
|
|
//
|
|
// For thin archives, an archive member name is typically a file path relative
|
|
// to the archive file's directory. This function resolves that path.
|
|
SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
|
|
const StringRef MemberName) {
|
|
assert(!ArchivePath.empty() && "An archive file path must be non empty.");
|
|
SmallString<64> MemberPath;
|
|
if (sys::path::is_relative(MemberName)) {
|
|
MemberPath = sys::path::parent_path(ArchivePath);
|
|
sys::path::append(MemberPath, MemberName);
|
|
} else
|
|
MemberPath = MemberName;
|
|
sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
|
|
return MemberPath;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// Determines if a file at the given path is a thin archive file.
|
|
//
|
|
// This function uses a cache to avoid repeatedly reading the same file.
|
|
// It reads only the header portion (magic bytes) of the file to identify
|
|
// the archive type.
|
|
Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
|
|
// Return cached result if available.
|
|
auto Cached = ArchiveFiles.find(ArchivePath);
|
|
if (Cached != ArchiveFiles.end())
|
|
return Cached->second;
|
|
|
|
uint64_t FileSize = -1;
|
|
bool IsThin = false;
|
|
std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
|
|
if (EC)
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Failed to get file size from archive %s: %s",
|
|
ArchivePath.data(), EC.message().c_str());
|
|
if (FileSize < sizeof(object::ThinArchiveMagic))
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Archive file size is too small %s",
|
|
ArchivePath.data());
|
|
|
|
// Read only the first few bytes containing the magic signature.
|
|
ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
|
|
MemoryBuffer::getFileSlice(ArchivePath, sizeof(object::ThinArchiveMagic),
|
|
0);
|
|
|
|
if ((EC = MemBufferOrError.getError()))
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Failed to read from archive %s: %s",
|
|
ArchivePath.data(), EC.message().c_str());
|
|
|
|
StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
|
|
if (file_magic::archive != identify_magic(MemBuf))
|
|
return createStringError(inconvertibleErrorCode(),
|
|
"Unknown format for archive %s",
|
|
ArchivePath.data());
|
|
|
|
IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
|
|
|
|
// Cache the result
|
|
ArchiveFiles[ArchivePath] = IsThin;
|
|
return IsThin;
|
|
}
|
|
|
|
// Removes any temporary regular archive member files that were created during
|
|
// processing.
|
|
void lto::DTLTO::removeTempFiles() {
|
|
for (auto &Input : InputFiles) {
|
|
if (Input->isMemberOfArchive())
|
|
sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
|
|
}
|
|
}
|
|
|
|
// This function performs the following tasks:
|
|
// 1. Adds the input file to the LTO object's list of input files.
|
|
// 2. For thin archive members, generates a new module ID which is a path to a
|
|
// thin archive member file.
|
|
// 3. For regular archive members, generates a new unique module ID.
|
|
// 4. Updates the bitcode module's identifier.
|
|
Expected<std::shared_ptr<lto::InputFile>>
|
|
lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
|
|
|
|
// Add the input file to the LTO object.
|
|
InputFiles.emplace_back(InputPtr.release());
|
|
std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
|
|
|
|
StringRef ModuleId = Input->getName();
|
|
StringRef ArchivePath = Input->getArchivePath();
|
|
|
|
// Only process archive members.
|
|
if (ArchivePath.empty())
|
|
return Input;
|
|
|
|
SmallString<64> NewModuleId;
|
|
BitcodeModule &BM = Input->getPrimaryBitcodeModule();
|
|
|
|
// Check if the archive is a thin archive.
|
|
Expected<bool> IsThin = isThinArchive(ArchivePath);
|
|
if (!IsThin)
|
|
return IsThin.takeError();
|
|
|
|
if (*IsThin) {
|
|
// For thin archives, use the path to the actual file.
|
|
NewModuleId =
|
|
computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
|
|
} else {
|
|
// For regular archives, generate a unique name.
|
|
Input->memberOfArchive(true);
|
|
|
|
// Create unique identifier using process ID and sequence number.
|
|
std::string PID = utohexstr(sys::Process::getProcessId());
|
|
std::string Seq = std::to_string(InputFiles.size());
|
|
|
|
NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"};
|
|
}
|
|
|
|
// Update the module identifier and save it.
|
|
BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
|
|
|
|
return Input;
|
|
}
|
|
|
|
// Write the archive member content to a file named after the module ID.
|
|
// If a file with that name already exists, it's likely a leftover from a
|
|
// previously terminated linker process and can be safely overwritten.
|
|
Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
|
|
StringRef ModuleId = Input->getName();
|
|
if (Input->isMemberOfArchive()) {
|
|
MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
|
|
if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
|
|
return EC;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
// Iterates through all ThinLTO-enabled input files and saves their content
|
|
// to separate files if they are regular archive members.
|
|
Error lto::DTLTO::saveInputArchiveMembers() {
|
|
for (auto &Input : InputFiles) {
|
|
if (!Input->isThinLTO())
|
|
continue;
|
|
if (Error EC = saveInputArchiveMember(Input.get()))
|
|
return EC;
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
// Entry point for DTLTO archives support.
|
|
//
|
|
// Sets up the temporary file remover and processes archive members.
|
|
// Must be called after all inputs are added but before optimization begins.
|
|
llvm::Error lto::DTLTO::handleArchiveInputs() {
|
|
|
|
// Process and save archive members to separate files if needed.
|
|
if (Error EC = saveInputArchiveMembers())
|
|
return EC;
|
|
return Error::success();
|
|
}
|