
nvcc allows using std::malloc and std::free in device code. When std::malloc or std::free is passed as a template function argument with template argument deduction, there is no diagnostics. e.g. __global__ void kern() { void *p = std::malloc(1); std::free(p); } int main() { std::shared_ptr<float> a; a = std::shared_ptr<float>( (float*)std::malloc(sizeof(float) * 100), std::free ); return 0; } However, the same code fails to compile with clang (https://godbolt.org/z/1roGvo6YY). The reason is that clang does not have logic to choose a function argument from an overloaded set of candidates based on host/device attributes for template argument deduction. Currently, clang does have a logic to choose a candidate based on the constraints of the candidates. This patch extends that logic to account for the CUDA host/device-based preference. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D154300
28 lines
505 B
Plaintext
28 lines
505 B
Plaintext
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
|
|
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
|
|
|
|
// expected-no-diagnostics
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
void foo();
|
|
__device__ void foo();
|
|
|
|
template<class F>
|
|
void host_temp(F f);
|
|
|
|
template<class F>
|
|
__device__ void device_temp(F f);
|
|
|
|
void host_caller() {
|
|
host_temp(foo);
|
|
}
|
|
|
|
__global__ void kernel_caller() {
|
|
device_temp(foo);
|
|
}
|
|
|
|
__device__ void device_caller() {
|
|
device_temp(foo);
|
|
}
|