At the moment AMDGCN flavoured SPIRV uses the SPIRV ABI with some tweaks revolving around passing aggregates as direct. This is problematic in multiple ways: - it leads to divergence from code compiled for a concrete target, which makes it difficult to debug; - it incurs a run time cost, when dealing with larger aggregates; - it incurs a compile time cost, when dealing with larger aggregates. This patch switches over AMDGCN flavoured SPIRV to implement the AMDGPU ABI (except for dealing with variadic functions, which will be added in the future). One additional complication (and the primary motivation behind the current less than ideal state of affairs) stems from `byref`, which AMDGPU uses, not being expressible in SPIR-V. We deal with this by CodeGen-ing for `byref`, lowering it to the `FuncParamAttr ByVal` in SPIR-V, and restoring it when doing reverse translation from AMDGCN flavoured SPIR-V.
47 lines
2.3 KiB
Plaintext
47 lines
2.3 KiB
Plaintext
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
|
|
// RUN: %clang_cc1 -x hip -triple spirv64-amd-amdhsa -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCNSPIRV %s
|
|
// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
|
|
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
|
|
#include "Inputs/cuda.h"
|
|
|
|
struct A {
|
|
int a[32];
|
|
float *p;
|
|
};
|
|
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
|
|
__global__ void kernel(A x) {
|
|
}
|
|
|
|
class Kernel {
|
|
public:
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
|
|
static __global__ void memberKernel(A x){}
|
|
template<typename T> static __global__ void templateMemberKernel(T x) {}
|
|
};
|
|
|
|
|
|
template <typename T>
|
|
__global__ void templateKernel(T x) {}
|
|
|
|
void launch(void*);
|
|
|
|
void test() {
|
|
Kernel K;
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}})
|
|
// NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
|
|
launch((void*)templateKernel<A>);
|
|
|
|
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
|
|
// AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}}
|
|
// NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
|
|
launch((void*)Kernel::templateMemberKernel<A>);
|
|
}
|