[LLVM] Introduce 'llvm-offload-wrapper' tool (#153504)

Summary:
This is a standalone tool that does the wrapper stage of the
`clang-linker-wrapper`. We want this to be an external tool because
currently there's no easy way to split apart what the
clang-linker-wrapper is doing under the hood. With this tool, users can
manually extract files with `clang-offload-packager`, feed them through
`clang --target=<triple>` and then use this tool to generate a `.bc`
file they can give to the linker. The goal here is to make reproducing
the linker wrapper steps easier.
This commit is contained in:
Joseph Huber 2025-08-19 11:05:48 -05:00 committed by GitHub
parent a0eb9958eb
commit 4c9b7ff04c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 207 additions and 0 deletions

View File

@ -60,6 +60,10 @@ only for the linker wrapper will be forwarded to the wrapped linker job.
--v Display the version number and exit
-- The separator for the wrapped linker arguments
The linker wrapper will generate the appropriate runtime calls to register the
generated device binary with the offloading runtime. To do this step manually we
provide the ``llvm-offload-wrapper`` utility.
Relocatable Linking
===================

View File

@ -118,6 +118,7 @@ set(LLVM_TEST_DEPENDS
llvm-objdump
llvm-opt-fuzzer
llvm-opt-report
llvm-offload-wrapper
llvm-otool
llvm-pdbutil
llvm-profdata

View File

@ -0,0 +1,52 @@
; RUN: llvm-offload-wrapper --triple=x86-64 -kind=hip %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=HIP
; HIP: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
; HIP-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
; HIP-NEXT: @.fatbin_image = internal constant {{.*}}, section ".hip_fatbin"
; HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
; HIP-NEXT: @.hip.binary_handle = internal global ptr null
; HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
; HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
; HIP-NEXT: entry:
; HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
; HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
; HIP-NEXT: call void @.hip.globals_reg(ptr %0)
; HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
; HIP-NEXT: ret void
; HIP-NEXT: }
; HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
; HIP-NEXT: entry:
; HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
; HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
; HIP-NEXT: ret void
; HIP-NEXT: }
; RUN: llvm-offload-wrapper --triple=x86-64 -kind=cuda %s -o %t.bc
; RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=CUDA
; CUDA: @__start_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OA"
; CUDA-NEXT: @__stop_llvm_offload_entries = external hidden constant [0 x %struct.__tgt_offload_entry], section "llvm_offload_entries$OZ"
; CUDA-NEXT: @.fatbin_image = internal constant {{.*}}, section ".nv_fatbin"
; CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
; CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
; CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
; CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
; CUDA-NEXT: entry:
; CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
; CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
; CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
; CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
; CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
; CUDA-NEXT: ret void
; CUDA-NEXT: }
; CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
; CUDA-NEXT: entry:
; CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
; CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
; CUDA-NEXT: ret void
; CUDA-NEXT: }

View File

@ -0,0 +1,15 @@
set(LLVM_LINK_COMPONENTS
BitWriter
Object
Option
FrontendOffloading
Support
TargetParser
)
add_llvm_tool(llvm-offload-wrapper
llvm-offload-wrapper.cpp
DEPENDS
intrinsics_gen
)

View File

@ -0,0 +1,135 @@
//===- llvm-offload-wrapper: Create runtime registration code for devices -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Provides a utility for generating runtime registration code for device code.
// We take a binary image (CUDA fatbinary, HIP offload bundle, LLVM binary) and
// create a new IR module that calls the respective runtime to load it on the
// device.
//
//===----------------------------------------------------------------------===//
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Frontend/Offloading/OffloadWrapper.h"
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/TargetParser/Host.h"
using namespace llvm;
static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
static cl::OptionCategory
OffloadWrapeprCategory("llvm-offload-wrapper options");
static cl::opt<object::OffloadKind> Kind(
"kind", cl::desc("Wrap for offload kind:"), cl::cat(OffloadWrapeprCategory),
cl::Required,
cl::values(clEnumValN(object::OFK_OpenMP, "openmp", "Wrap OpenMP binaries"),
clEnumValN(object::OFK_Cuda, "cuda", "Wrap CUDA binaries"),
clEnumValN(object::OFK_HIP, "hip", "Wrap HIP binaries")));
static cl::opt<std::string> OutputFile("o", cl::desc("Write output to <file>."),
cl::value_desc("file"),
cl::cat(OffloadWrapeprCategory));
static cl::list<std::string> InputFiles(cl::Positional,
cl::desc("Wrap input from <file>"),
cl::value_desc("file"), cl::OneOrMore,
cl::cat(OffloadWrapeprCategory));
static cl::opt<std::string>
TheTriple("triple", cl::desc("Target triple for the wrapper module"),
cl::init(sys::getDefaultTargetTriple()),
cl::cat(OffloadWrapeprCategory));
static Error wrapImages(ArrayRef<ArrayRef<char>> BuffersToWrap) {
if (BuffersToWrap.size() > 1 &&
(Kind == llvm::object::OFK_Cuda || Kind == llvm::object::OFK_HIP))
return createStringError(
"CUDA / HIP offloading uses a single fatbinary or offload bundle");
LLVMContext Context;
Module M("offload.wrapper.module", Context);
M.setTargetTriple(Triple());
switch (Kind) {
case llvm::object::OFK_OpenMP:
if (Error Err = offloading::wrapOpenMPBinaries(
M, BuffersToWrap, offloading::getOffloadEntryArray(M),
/*Suffix=*/"", /*Relocatable=*/false))
return Err;
break;
case llvm::object::OFK_Cuda:
if (Error Err = offloading::wrapCudaBinary(
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M),
/*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
return Err;
break;
case llvm::object::OFK_HIP:
if (Error Err = offloading::wrapHIPBinary(
M, BuffersToWrap.front(), offloading::getOffloadEntryArray(M)))
return Err;
break;
default:
return createStringError(getOffloadKindName(Kind) +
" wrapping is not supported");
}
int FD = -1;
if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD))
return errorCodeToError(EC);
llvm::raw_fd_ostream OS(FD, true);
WriteBitcodeToFile(M, OS);
return Error::success();
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
cl::HideUnrelatedOptions(OffloadWrapeprCategory);
cl::ParseCommandLineOptions(
argc, argv,
"Generate runtime registration code for a device binary image\n");
if (Help) {
cl::PrintHelpMessage();
return EXIT_SUCCESS;
}
auto ReportError = [argv](Error E) {
logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
exit(EXIT_FAILURE);
};
SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
SmallVector<ArrayRef<char>> BuffersToWrap;
for (StringRef Input : InputFiles) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
MemoryBuffer::getFileOrSTDIN(Input);
if (std::error_code EC = BufferOrErr.getError())
ReportError(createFileError(Input, EC));
std::unique_ptr<MemoryBuffer> &Buffer =
Buffers.emplace_back(std::move(*BufferOrErr));
BuffersToWrap.emplace_back(
ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
}
if (Error Err = wrapImages(BuffersToWrap))
ReportError(std::move(Err));
return EXIT_SUCCESS;
}