
`-fgpu-rdc` mode allows device functions call device functions in different TU. However, currently all device objects have to be linked together since only one fat binary is supported. This is time consuming for AMDGPU backend since it only supports LTO. There are use cases that objects can be divided into groups in which device functions are self-contained but host functions are not. It is desirable to link/optimize/codegen the device code and generate a fatbin for each group, whereas partially link the host code with `ld -r` or generate a static library by using the `--emit-static-lib` option of clang. This avoids linking all device code together, therefore decreases the linking time for `-fgpu-rdc`. Previously, clang emits an external symbol `__hip_fatbin` for all objects for `-fgpu-rdc`. With this patch, clang emits an unique external symbol `__hip_fatbin_{cuid}` for the fat binary for each object. When a group of objects are linked together to generate a fatbin, the symbols are merged by alias and point to the same fat binary. Each group has its own fat binary. One executable or shared library can have multiple fat binaries. Device linking is done for undefined fab binary symbols only to avoid repeated linking. `__hip_gpubin_handle` is also uniquefied and merged to avoid repeated registering. Symbol `__hip_cuid_{cuid}` is introduced to facilitate debugging and tooling. Fixes: https://github.com/llvm/llvm-project/issues/77018
26 lines
1.1 KiB
C
26 lines
1.1 KiB
C
/* Minimal declarations for HIP support. Testing purposes only. */
|
|
|
|
#define __constant__ __attribute__((constant))
|
|
#define __device__ __attribute__((device))
|
|
#define __global__ __attribute__((global))
|
|
#define __host__ __attribute__((host))
|
|
#define __shared__ __attribute__((shared))
|
|
#define __managed__ __attribute__((managed))
|
|
|
|
struct dim3 {
|
|
unsigned x, y, z;
|
|
__host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
|
|
};
|
|
|
|
typedef struct hipStream *hipStream_t;
|
|
typedef enum hipError {} hipError_t;
|
|
int hipConfigureCall(dim3 gridSize, dim3 blockSize, unsigned long long sharedSize = 0,
|
|
hipStream_t stream = 0);
|
|
extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
|
|
unsigned long long sharedSize = 0,
|
|
hipStream_t stream = 0);
|
|
extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
|
|
dim3 blockDim, void **args,
|
|
unsigned long long sharedMem,
|
|
hipStream_t stream);
|