
the `ptx_kernel` calling convention is a more idiomatic and standard way of specifying a NVPTX kernel than using the metadata which is not supposed to change the meaning of the program. Further, checking the calling convention is significantly faster than traversing the metadata, improving compile time. This change updates the clang and mlir frontends as well as the NVPTXCtorDtorLowering pass to emit kernels using the calling convention. In addition, this updates all NVPTX unit tests to use the calling convention as well.
33 lines
1014 B
Plaintext
33 lines
1014 B
Plaintext
// Make sure that __global__ functions are emitted along with correct
|
|
// annotations and are added to @llvm.used to prevent their elimination.
|
|
// REQUIRES: nvptx-registered-target
|
|
//
|
|
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
// CHECK-LABEL: define{{.*}} void @device_function
|
|
extern "C"
|
|
__device__ void device_function() {}
|
|
|
|
// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function
|
|
extern "C"
|
|
__global__ void global_function() {
|
|
// CHECK: call void @device_function
|
|
device_function();
|
|
}
|
|
|
|
// Make sure host-instantiated kernels are preserved on device side.
|
|
template <typename T> __global__ void templated_kernel(T param) {}
|
|
// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_(
|
|
|
|
namespace {
|
|
__global__ void anonymous_ns_kernel() {}
|
|
// CHECK-DAG: define{{.*}} void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
|
|
}
|
|
|
|
void host_function() {
|
|
templated_kernel<<<0, 0>>>(0);
|
|
anonymous_ns_kernel<<<0,0>>>();
|
|
}
|