llvm-project/clang/test/CodeGenCUDA/offloading-entries.cu
Joseph Huber b370be37cc [CUDA] Allow the new driver to compile CUDA in non-RDC mode
The new driver primarily allows us to support RDC-mode compilations with
proper linking. This is not needed for non-RDC mode compilation, but we
still would like the new driver to be able to handle this mode so we can
transition away from the old driver in the future. This patch adds the
necessary code to support creating a fatbinary for CUDA code generation
as well as removing old assumptions and errors about RDC-mode with the
new driver.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D129655
2022-07-13 21:49:15 -04:00

58 lines
3.2 KiB
Plaintext

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".omp_offloading.entry.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \
// RUN: --check-prefix=HIP %s
#include "Inputs/cuda.h"
//.
// CUDA: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// CUDA: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
// CUDA: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
// CUDA: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
//.
// HIP: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// HIP: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
// HIP: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z3barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
// HIP: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
// CUDA-NEXT: br label [[SETUP_END:%.*]]
// CUDA: setup.end:
// CUDA-NEXT: ret void
//
// HIP-LABEL: @_Z18__device_stub__foov(
// HIP-NEXT: entry:
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov)
// HIP-NEXT: br label [[SETUP_END:%.*]]
// HIP: setup.end:
// HIP-NEXT: ret void
//
__global__ void foo() {}
// CUDA-LABEL: @_Z18__device_stub__barv(
// CUDA-NEXT: entry:
// CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv)
// CUDA-NEXT: br label [[SETUP_END:%.*]]
// CUDA: setup.end:
// CUDA-NEXT: ret void
//
// HIP-LABEL: @_Z18__device_stub__barv(
// HIP-NEXT: entry:
// HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3barv)
// HIP-NEXT: br label [[SETUP_END:%.*]]
// HIP: setup.end:
// HIP-NEXT: ret void
//
__global__ void bar() {}
__device__ int x = 1;