llvm-project/clang/test/CodeGenCUDA/host-used-device-var.cu
Yaxun (Sam) Liu 33a6ce1837
[HIP] Allow partial linking for -fgpu-rdc (#81700)
`-fgpu-rdc` mode allows device functions call device functions in
different TU. However, currently all device objects have to be linked
together since only one fat binary is supported. This is time consuming
for AMDGPU backend since it only supports LTO.

There are use cases that objects can be divided into groups in which
device functions are self-contained but host functions are not. It is
desirable to link/optimize/codegen the device code and generate a fatbin
for each group, whereas partially link the host code with `ld -r` or
generate a static library by using the `--emit-static-lib` option of
clang. This avoids linking all device code together, therefore decreases
the linking time for `-fgpu-rdc`.

Previously, clang emits an external symbol `__hip_fatbin` for all
objects for `-fgpu-rdc`. With this patch, clang emits an unique external
symbol `__hip_fatbin_{cuid}` for the fat binary for each object. When a
group of objects are linked together to generate a fatbin, the symbols
are merged by alias and point to the same fat binary. Each group has its
own fat binary. One executable or shared library can have multiple fat
binaries. Device linking is done for undefined fab binary symbols only
to avoid repeated linking. `__hip_gpubin_handle` is also uniquefied and
merged to avoid repeated registering. Symbol `__hip_cuid_{cuid}` is
introduced to facilitate debugging and tooling.

Fixes: https://github.com/llvm/llvm-project/issues/77018
2024-02-22 13:51:31 -05:00

215 lines
7.1 KiB
Plaintext

// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: -cuid=123 | FileCheck -check-prefix=DEV %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
// RUN: -std=c++17 -O3 -emit-llvm -o - -cuid=123 | FileCheck -check-prefix=HOST %s
// Negative tests.
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
// RUN: -std=c++17 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
// RUN: | FileCheck -check-prefix=DEV-NEG %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x hip %s \
// RUN: -std=c++17 -O3 -emit-llvm -o - | FileCheck -check-prefix=HOST-NEG %s
#include "Inputs/cuda.h"
// DEV-DAG: @v1
__device__ int v1;
// DEV-DAG: @v2
__constant__ int v2;
// Check device variables used by neither host nor device functioins are not kept.
// DEV-NEG-NOT: @_ZL2v3
static __device__ int v3;
// Check device variables used by host functions are kept.
// DEV-DAG: @u1
__device__ int u1;
// DEV-DAG: @u2
__constant__ int u2;
// Check host-used static device var is in llvm.compiler.used.
// DEV-DAG: @_ZL2u3
static __device__ int u3;
// Check device-used static device var is emitted but is not in llvm.compiler.used.
// DEV-DAG: @_ZL2u4
static __device__ int u4;
// Check device variables with used attribute are always kept.
// DEV-DAG: @u5
__device__ __attribute__((used)) int u5;
// Test external device variable ODR-used by host code is not emitted or registered.
// DEV-NEG-NOT: @ext_var
extern __device__ int ext_var;
// DEV-DAG: @inline_var = linkonce_odr addrspace(1) externally_initialized global i32 0
__device__ inline int inline_var;
template<typename T>
using func_t = T (*) (T, T);
template <typename T>
__device__ T add_func (T x, T y)
{
return x + y;
}
// DEV-DAG: @_Z10p_add_funcIiE = linkonce_odr addrspace(1) externally_initialized global ptr @_Z8add_funcIiET_S0_S0_
template <typename T>
__device__ func_t<T> p_add_func = add_func<T>;
// Check non-constant constexpr variables ODR-used by host code only is not emitted.
// DEV-NEG-NOT: constexpr_var1a
// DEV-NEG-NOT: constexpr_var1b
constexpr int constexpr_var1a = 1;
inline constexpr int constexpr_var1b = 1;
// Check constant constexpr variables ODR-used by host code only.
// Device-side constexpr variables accessed by host code should be externalized and kept.
// DEV-DAG: @_ZL15constexpr_var2a = addrspace(4) externally_initialized constant i32 2
// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2
__constant__ constexpr int constexpr_var2a = 2;
inline __constant__ constexpr int constexpr_var2b = 2;
void use(func_t<int> p);
__host__ __device__ void use(const int *p);
// Check static device variable in host function.
// DEV-DAG: @_ZZ4fun1vE11static_var1 = addrspace(1) externally_initialized global i32 3
void fun1() {
static __device__ int static_var1 = 3;
use(&u1);
use(&u2);
use(&u3);
use(&ext_var);
use(&inline_var);
use(p_add_func<int>);
use(&constexpr_var1a);
use(&constexpr_var1b);
use(&constexpr_var2a);
use(&constexpr_var2b);
use(&static_var1);
}
// Check static variable in host device function.
// DEV-DAG: @_ZZ4fun2vE11static_var2 = internal addrspace(1) global i32 4
// DEV-DAG: @_ZZ4fun2vE11static_var3 = addrspace(1) global i32 4
__host__ __device__ void fun2() {
static int static_var2 = 4;
static __device__ int static_var3 = 4;
use(&static_var2);
use(&static_var3);
}
__global__ void kern1(int **x) {
*x = &u4;
fun2();
}
// Check static variables of lambda functions.
// Lambda functions are implicit host device functions.
// Default static variables in lambda functions should be treated
// as host variables on host side, therefore should not be forced
// to be emitted on device.
// DEV-DAG: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2 = addrspace(1) externally_initialized global i32 5
// DEV-NEG-NOT: @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
namespace TestStaticVarInLambda {
class A {
public:
A(char *);
};
void fun() {
(void) [](char *c) {
static A var1(c);
static __device__ int var2 = 5;
(void) var1;
(void) var2;
};
}
}
// Check implicit constant variable ODR-used by host code is not emitted.
// AST contains instantiation of al<ar>, which triggers AST instantiation
// of x::al<ar>::am, which triggers AST instatiation of x::ap<ar>,
// which triggers AST instantiation of aw<ar>::c, which has type
// ar. ar has base class x which has member ah. x::ah is initialized
// with function pointer pointing to ar:as, which returns an object
// of type ou. The constexpr aw<ar>::c is an implicit constant variable
// which is ODR-used by host function x::ap<ar>. An incorrect implementation
// will force aw<ar>::c to be emitted on device side, which will trigger
// emit of x::as and further more ctor of ou and variable o.
// The ODR-use of aw<ar>::c in x::ap<ar> should be treated as a host variable
// instead of device variable.
// DEV-NEG-NOT: _ZN16TestConstexprVar1oE
namespace TestConstexprVar {
char o;
class ou {
public:
ou(char) { __builtin_strlen(&o); }
};
template < typename ao > struct aw { static constexpr ao c; };
class x {
protected:
typedef ou (*y)(const x *);
constexpr x(y ag) : ah(ag) {}
template < bool * > struct ak;
template < typename > struct al {
static bool am;
static ak< &am > an;
};
template < typename ao > static x ap() { (void)aw< ao >::c; return x(nullptr); }
y ah;
};
template < typename ao > bool x::al< ao >::am(&ap< ao >);
class ar : x {
public:
constexpr ar() : x(as) {}
static ou as(const x *) { return 0; }
al< ar > av;
};
}
// Check the exact list of variables to ensure @_ZL2u4 is not among them.
// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE
// DEV-SAME: {{^[^@]*}} @_ZL15constexpr_var2a
// DEV-SAME: {{^[^@]*}} @_ZL2u3
// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1
// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
// DEV-SAME: {{^[^@]*}} @__hip_cuid_{{[0-9a-f]+}}
// DEV-SAME: {{^[^@]*}} @constexpr_var2b
// DEV-SAME: {{^[^@]*}} @inline_var
// DEV-SAME: {{^[^@]*}} @u1
// DEV-SAME: {{^[^@]*}} @u2
// DEV-SAME: {{^[^@]*}} @u5
// DEV-SAME: {{^[^@]*$}}
// HOST-DAG: hipRegisterVar{{.*}}@u1
// HOST-DAG: hipRegisterVar{{.*}}@u2
// HOST-DAG: hipRegisterVar{{.*}}@_ZL2u3
// HOST-DAG: hipRegisterVar{{.*}}@constexpr_var2b
// HOST-DAG: hipRegisterVar{{.*}}@u5
// HOST-DAG: hipRegisterVar{{.*}}@inline_var
// HOST-DAG: hipRegisterVar{{.*}}@_Z10p_add_funcIiE
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun1vE11static_var1
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var2
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZ4fun2vE11static_var3
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var1
// HOST-NEG-NOT: hipRegisterVar{{.*}}@ext_var
// HOST-NEG-NOT: hipRegisterVar{{.*}}@_ZL2u4
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1a
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var1b
// HOST-NEG-NOT: hipRegisterVar{{.*}}@constexpr_var2a