llvm-project/clang/test/Driver/openmp-offload-gpu.c
Joseph Huber 9f89769cd7 [Clang] Add offload kind to embedded offload object
This patch adds the offload kind to the embedded section name in
preparation for offloading to different kinda like CUDA or HIP.

Depends on D120288

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D120271
2022-03-14 20:08:27 -04:00

349 lines
22 KiB
C

///
/// Perform several driver tests for OpenMP offloading
///
// REQUIRES: clang-driver
// REQUIRES: x86-registered-target
// REQUIRES: powerpc-registered-target
// REQUIRES: nvptx-registered-target
// REQUIRES: amdgpu-registered-target
// UNSUPPORTED: aix
/// ###########################################################################
/// Check -Xopenmp-target uses one of the archs provided when several archs are used.
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 -Xopenmp-target -march=sm_60 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s
// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60"
// CHK-FOPENMP-TARGET-ARCHS: nvlink{{.*}}" "-arch" "sm_60"
/// ###########################################################################
/// Check -Xopenmp-target -march=sm_35 works as expected when two triples are present.
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp \
// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_35 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s
// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_35"
// CHK-FOPENMP-TARGET-COMPILATION: nvlink{{.*}}" "-arch" "sm_35"
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -save-temps %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]"
// CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
// CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
/// Check unbundlink of assembly file, cubin file generation and usage by nvlink
// RUN: touch %t.s
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -no-canonical-prefixes -save-temps %t.s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK %s
/// Use DAG to ensure that assembly file has been unbundled.
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX:.*\.s]]"
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=s" {{.*}}"-outputs={{.*}}[[PTX]]
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK-DAG-SAME: "-unbundle"
// CHK-UNBUNDLING-PTXAS-CUBIN-NVLINK: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
/// ###########################################################################
/// Check cubin file generation and bundling
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -no-canonical-prefixes -save-temps %s -c 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-CUBIN-BUNDLING %s
// CHK-PTXAS-CUBIN-BUNDLING: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
// CHK-PTXAS-CUBIN-BUNDLING-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
// CHK-PTXAS-CUBIN-BUNDLING: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-inputs={{.*}}[[CUBIN]]
/// ###########################################################################
/// Check cubin file unbundling and usage by nvlink
// RUN: touch %t.o
// RUN: %clang -### -target powerpc64le-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -no-canonical-prefixes -save-temps %t.o %S/Inputs/in.so 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUBIN-UNBUNDLING-NVLINK %s
/// Use DAG to ensure that cubin file has been unbundled.
// CHK-CUBIN-UNBUNDLING-NVLINK-NOT: clang-offload-bundler{{.*}}" "-type=o"{{.*}}in.so
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: nvlink{{.*}}" {{.*}}"[[CUBIN:.*\.cubin]]"
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG: clang-offload-bundler{{.*}}" "-type=o" {{.*}}"-outputs={{.*}}[[CUBIN]]
// CHK-CUBIN-UNBUNDLING-NVLINK-DAG-SAME: "-unbundle"
// CHK-CUBIN-UNBUNDLING-NVLINK-NOT: clang-offload-bundler{{.*}}" "-type=o"{{.*}}in.so
/// ###########################################################################
/// Check cubin file generation and usage by nvlink
// RUN: touch %t1.o
// RUN: touch %t2.o
// RUN: %clang -### -no-canonical-prefixes -target powerpc64le-unknown-linux-gnu -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
/// Check cubin file generation and usage by nvlink when toolchain has BindArchAction
// RUN: %clang -### -no-canonical-prefixes -target x86_64-apple-darwin17.0.0 -fopenmp=libomp \
// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %t1.o %t2.o 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-TWOCUBIN %s
// CHK-TWOCUBIN: nvlink{{.*}}openmp-offload-{{.*}}.cubin" "{{.*}}openmp-offload-{{.*}}.cubin"
/// ###########################################################################
/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s
// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c"
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \
// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s
// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c"
/// ###########################################################################
/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \
// RUN: -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s
// CHK-PTXAS-RELO: ptxas{{.*}}" "-c"
/// ###########################################################################
/// Check that error is not thrown by toolchain when no cuda lib flag is used.
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
// RUN: -nocudalib -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s
// CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60
/// ###########################################################################
/// Check that error is not thrown by toolchain when no cuda lib device is found when using -S.
/// Check that the flag is passed when -fopenmp-relocatable-target is used.
// RUN: %clang -### -S -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NOLIBDEVICE %s
// CHK-NOLIBDEVICE-NOT: error:{{.*}}sm_60
/// ###########################################################################
/// Check that the runtime bitcode library is part of the compile line.
/// Create a bogus bitcode library and specify it with libomptarget-nvptx-bc-path
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB %s
/// Specify the directory containing the bitcode lib, check clang picks the right one
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps \
// RUN: -no-canonical-prefixes %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s
/// Create a bogus bitcode library and find it with LIBRARY_PATH
// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps \
// RUN: -no-canonical-prefixes %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s
// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx-sm_35.bc
// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx-sm_35.bc
// CHK-BCLIB-NOT: {{error:|warning:}}
/// ###########################################################################
/// Check that the warning is thrown when the libomptarget bitcode library is not found.
/// Libomptarget requires sm_35 or newer so an sm_35 bitcode library should never exist.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-WARN %s
// CHK-BCLIB-WARN: no library 'libomptarget-nvptx-sm_35.bc' found in the default clang lib directory or in LIBRARY_PATH; use '--libomptarget-nvptx-bc-path' to specify nvptx bitcode library
/// ###########################################################################
/// Check that the error is thrown when the libomptarget bitcode library does not exist.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
// RUN: --libomptarget-nvptx-bc-path=not-exist.bc \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s
// CHK-BCLIB-ERROR: bitcode library 'not-exist.bc' does not exist
/// ###########################################################################
/// Check that the error is thrown when CUDA 9.1 or lower version is used.
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \
// RUN: -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s
// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected
/// Check that debug info is emitted in dwarf-2
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=NO_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \
// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s
// DEBUG_DIRECTIVES-NOT: warning: debug
// NO_DEBUG-NOT: warning: debug
// NO_DEBUG: "-fopenmp-is-device"
// NO_DEBUG-NOT: "-debug-info-kind=
// NO_DEBUG: ptxas
// DEBUG_DIRECTIVES: "-triple" "nvptx64-nvidia-cuda"
// DEBUG_DIRECTIVES-SAME: "-debug-info-kind=line-directives-only"
// DEBUG_DIRECTIVES-SAME: "-fopenmp-is-device"
// DEBUG_DIRECTIVES: ptxas
// DEBUG_DIRECTIVES: "-lineinfo"
// NO_DEBUG-NOT: "-g"
// NO_DEBUG: nvlink
// NO_DEBUG-NOT: "-g"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \
// RUN: | FileCheck -check-prefix=HAS_DEBUG %s
// HAS_DEBUG-NOT: warning: debug
// HAS_DEBUG: "-triple" "nvptx64-nvidia-cuda"
// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}"
// HAS_DEBUG-SAME: "-dwarf-version=2"
// HAS_DEBUG-SAME: "-fopenmp-is-device"
// HAS_DEBUG: ptxas
// HAS_DEBUG-SAME: "-g"
// HAS_DEBUG-SAME: "--dont-merge-basicblocks"
// HAS_DEBUG-SAME: "--return-at-end"
// HAS_DEBUG: nvlink
// HAS_DEBUG-SAME: "-g"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_MODE %s
// CUDA_MODE: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
// CUDA_MODE-SAME: "-fopenmp-cuda-mode"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \
// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s
// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime -fopenmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=FULL_RUNTIME %s
// FULL_RUNTIME: clang{{.*}}"-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}"
// FULL_RUNTIME-SAME: "-fopenmp-cuda-force-full-runtime"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-force-full-runtime -fno-openmp-cuda-force-full-runtime 2>&1 \
// RUN: | FileCheck -check-prefix=NO_FULL_RUNTIME %s
// NO_FULL_RUNTIME-NOT: "-{{fno-|f}}openmp-cuda-force-full-runtime"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \
// RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s
// CUDA_RED_RECS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048"
// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s
// OPENMP_NVPTX_WRAPPERS: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"
// OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers"
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -save-temps -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s
// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"]
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_35 \
// RUN: -save-temps -no-canonical-prefixes %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=TRIPLE %s
// TRIPLE: "-triple" "nvptx64-nvidia-cuda"
// TRIPLE: "-target-cpu" "sm_35"
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
// RUN: -fopenmp-new-driver -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=NEW_DRIVER %s
// NEW_DRIVER: "[[HOST_TRIPLE:.+]]" - "clang", inputs: ["[[HOST_INPUT:.+]]"], output: "[[HOST_BC:.+]]"
// NEW_DRIVER: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[DEVICE_INPUT:.+]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.+]]"
// NEW_DRIVER: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_OBJ:.+]]"
// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \
// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \
// RUN: -fopenmp-new-driver -no-canonical-prefixes -nogpulib %s -o openmp-offload-gpu 2>&1 \
// RUN: | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s
// NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],openmp.nvptx64-nvidia-cuda.sm_70