[HIP] Support managed variables using the new driver (#123437)

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.
This commit is contained in:
Joseph Huber 2025-01-22 09:13:14 -06:00 committed by GitHub
parent a7a8694c5a
commit 70a16b90ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 112 additions and 58 deletions

View File

@ -1221,12 +1221,34 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast<int32_t>(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
(I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
: llvm::offloading::OffloadGlobalEntry) |
Flags,
/*Data=*/0, Section);
// TODO: Update the offloading entries struct to avoid this indirection.
if (I.Flags.isManaged()) {
assert(I.Var->getName().ends_with(".managed") &&
"HIP managed variables not transformed");
// Create a struct to contain the two variables.
auto *ManagedVar = M.getNamedGlobal(
I.Var->getName().drop_back(StringRef(".managed").size()));
llvm::Constant *StructData[] = {ManagedVar, I.Var};
llvm::Constant *Initializer = llvm::ConstantStruct::get(
llvm::offloading::getManagedTy(M), StructData);
auto *Struct = new llvm::GlobalVariable(
M, llvm::offloading::getManagedTy(M),
/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, Initializer,
I.Var->getName(), /*InsertBefore=*/nullptr,
llvm::GlobalVariable::NotThreadLocal,
M.getDataLayout().getDefaultGlobalsAddressSpace());
llvm::offloading::emitOffloadingEntry(
M, Struct, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalManagedEntry | Flags,
/*Data=*/static_cast<uint32_t>(I.Var->getAlignment()), Section);
} else {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
llvm::offloading::OffloadGlobalEntry | Flags,
/*Data=*/0, Section);
}
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,

View File

@ -1,4 +1,4 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
#define __managed__ __attribute__((managed))
//.
// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
//.
// HIP: @managed.managed = global i32 0, align 4
// HIP: @managed = externally_initialized global ptr null
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
// HIP: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
//.
// CUDA-COFF: @managed = dso_local global i32 undef, align 4
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed, ptr @.offloading.entry_name.3, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.4, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.5, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
//.
// HIP-COFF: @managed.managed = dso_local global i32 0, align 4
// HIP-COFF: @managed = dso_local externally_initialized global ptr null
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @managed.managed.3 = private constant %struct.__managed_var { ptr @managed, ptr @managed.managed }
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { ptr @managed.managed.3, ptr @.offloading.entry_name.4, i64 4, i32 1, i32 4 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.5, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.6 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.6, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
@ -91,6 +109,7 @@ __global__ void foo() {}
__device__ int var = 1;
const __device__ int constant = 1;
extern __device__ int external;
__device__ __managed__ int managed = 0;
// CUDA-LABEL: @_Z21__device_stub__kernelv(
// CUDA-NEXT: entry:
@ -137,28 +156,3 @@ template <typename T, int dim = 1, int mode = 0>
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
texture<void> tex;
//.
// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.

View File

@ -87,7 +87,7 @@
// CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
// CUDA: while.entry:
// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %12, %if.end ]
// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %13, %if.end ]
// CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
// CUDA-NEXT: %addr = load ptr, ptr %2, align 8
// CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
@ -125,7 +125,11 @@
// CUDA-NEXT: br label %if.end
// CUDA: sw.managed:
// CUDA-NEXT: br label %if.end
// CUDA-NEXT: %managed.addr = load ptr, ptr %addr, align 8
// CUDA-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1
// CUDA-NEXT: %managed.addr2 = load ptr, ptr %12, align 8
// CUDA-NEXT: call void @__cudaRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype)
// CUDA-NEXT: br label %if.end
// CUDA: sw.surface:
// CUDA-NEXT: br label %if.end
@ -134,9 +138,9 @@
// CUDA-NEXT: br label %if.end
// CUDA: if.end:
// CUDA-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// CUDA-NEXT: %13 = icmp eq ptr %12, @__stop_cuda_offloading_entries
// CUDA-NEXT: br i1 %13, label %while.end, label %while.entry
// CUDA-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// CUDA-NEXT: %14 = icmp eq ptr %13, @__stop_cuda_offloading_entries
// CUDA-NEXT: br i1 %14, label %while.end, label %while.entry
// CUDA: while.end:
// CUDA-NEXT: ret void
@ -187,7 +191,7 @@
// HIP-NEXT: br i1 %1, label %while.entry, label %while.end
// HIP: while.entry:
// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %12, %if.end ]
// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %13, %if.end ]
// HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
// HIP-NEXT: %addr = load ptr, ptr %2, align 8
// HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
@ -225,7 +229,11 @@
// HIP-NEXT: br label %if.end
// HIP: sw.managed:
// HIP-NEXT: br label %if.end
// HIP-NEXT: %managed.addr = load ptr, ptr %addr, align 8
// HIP-NEXT: %12 = getelementptr inbounds ptr, ptr %addr, i64 1
// HIP-NEXT: %managed.addr2 = load ptr, ptr %12, align 8
// HIP-NEXT: call void @__hipRegisterManagedVar(ptr %0, ptr %managed.addr, ptr %managed.addr2, ptr %name, i64 %size, i32 %textype)
// HIP-NEXT: br label %if.end
// HIP: sw.surface:
// HIP-NEXT: call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %textype, i32 %extern)
@ -236,9 +244,9 @@
// HIP-NEXT: br label %if.end
// HIP: if.end:
// HIP-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// HIP-NEXT: %13 = icmp eq ptr %12, @__stop_hip_offloading_entries
// HIP-NEXT: br i1 %13, label %while.end, label %while.entry
// HIP-NEXT: %13 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
// HIP-NEXT: %14 = icmp eq ptr %13, @__stop_hip_offloading_entries
// HIP-NEXT: br i1 %14, label %while.end, label %while.entry
// HIP: while.end:
// HIP-NEXT: ret void

View File

@ -55,6 +55,10 @@ enum OffloadEntryKindFlag : uint32_t {
/// globals that will be registered with the offloading runtime.
StructType *getEntryTy(Module &M);
/// Returns the struct type we store the two pointers for CUDA / HIP managed
/// variables in. Necessary until we widen the offload entry struct.
StructType *getManagedTy(Module &M);
/// Create an offloading section struct used to register this global at
/// runtime.
///

View File

@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
FunctionCallee RegVar = M.getOrInsertFunction(
IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
// Get the __cudaRegisterSurface function declaration.
FunctionType *RegManagedVarTy =
FunctionType::get(Type::getVoidTy(C),
{Int8PtrPtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy,
getSizeTTy(M), Type::getInt32Ty(C)},
/*isVarArg=*/false);
FunctionCallee RegManagedVar = M.getOrInsertFunction(
IsHIP ? "__hipRegisterManagedVar" : "__cudaRegisterManagedVar",
RegManagedVarTy);
// Get the __cudaRegisterSurface function declaration.
FunctionType *RegSurfaceTy =
FunctionType::get(Type::getVoidTy(C),
@ -466,6 +476,12 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP,
// Create managed variable registration code.
Builder.SetInsertPoint(SwManagedBB);
auto *ManagedVar = Builder.CreateLoad(Int8PtrTy, Addr, "managed.addr");
auto *ManagedAddr = Builder.CreateInBoundsGEP(
Int8PtrTy, Addr, {ConstantInt::get(Builder.getInt64Ty(), 1)});
auto *Managed = Builder.CreateLoad(Int8PtrTy, ManagedAddr, "managed.addr");
Builder.CreateCall(RegManagedVar, {RegGlobalsFn->arg_begin(), ManagedVar,
Managed, Name, Size, Data});
Builder.CreateBr(IfEndBB);
Switch->addCase(Builder.getInt32(llvm::offloading::OffloadGlobalManagedEntry),
SwManagedBB);

View File

@ -33,6 +33,16 @@ StructType *offloading::getEntryTy(Module &M) {
return EntryTy;
}
StructType *offloading::getManagedTy(Module &M) {
LLVMContext &C = M.getContext();
StructType *StructTy = StructType::getTypeByName(C, "struct.__managed_var");
if (!StructTy)
StructTy = llvm::StructType::create("struct.__managed_var",
PointerType::getUnqual(M.getContext()),
PointerType::getUnqual(M.getContext()));
return StructTy;
}
// TODO: Rework this interface to be more generic.
std::pair<Constant *, GlobalVariable *>
offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr,