[HIP] fix host-used external kernel (#83870)
In -fgpu-rdc mode, when an external kernel is used by a host function with weak_odr linkage (e.g. explicitly instantiated template function), the kernel should not be marked as host-used external kernel, since the host function may be dropped by the linker. Mark the external kernel as host-used external kernel will force a reference to the external kernel, which the user may not define in other TU. Fixes: https://github.com/llvm/llvm-project/issues/83771
This commit is contained in:
parent
29afd64615
commit
b46f980454
@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
|
||||
if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
|
||||
// For -fgpu-rdc, keep track of external kernels used by host functions.
|
||||
if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
|
||||
Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
|
||||
Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
|
||||
(!Caller || (!Caller->getDescribedFunctionTemplate() &&
|
||||
getASTContext().GetGVALinkageForFunction(Caller) ==
|
||||
GVA_StrongExternal)))
|
||||
getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
|
||||
// externalize the static device side variable ODR-used by host code.
|
||||
if (!Var->hasExternalStorage())
|
||||
SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
|
||||
else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
|
||||
else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
|
||||
(!FD || (!FD->getDescribedFunctionTemplate() &&
|
||||
SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
|
||||
GVA_StrongExternal)))
|
||||
SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
|
||||
}
|
||||
}
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
|
||||
// NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
|
||||
@ -44,6 +45,10 @@ __global__ void kernel3();
|
||||
// kernel4 is marked as used even though it is not called.
|
||||
__global__ void kernel4();
|
||||
|
||||
// kernel5 is not marked as used since it is called by host function
|
||||
// with weak_odr linkage, which may be dropped by linker.
|
||||
__global__ void kernel5();
|
||||
|
||||
extern __device__ int var1;
|
||||
|
||||
__device__ int var2;
|
||||
@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
|
||||
};
|
||||
lambda();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void template_caller() {
|
||||
kernel5<<<1, 1>>>();
|
||||
var1 = 1;
|
||||
}
|
||||
|
||||
template void template_caller<int>();
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user