With this patch, OpenMP on AMDGCN will use the math functions provided by ROCm ocml library. Linking device code to the ocml will be done in the next patch. Reviewed By: JonChesterfield, jdoerfert, scchan Differential Revision: https://reviews.llvm.org/D104904
43 lines
5.6 KiB
C++
43 lines
5.6 KiB
C++
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD_BOOL_RETURN
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=AMD_BOOL_RETURN
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN
|
|
// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN
|
|
// expected-no-diagnostics
|
|
|
|
#include <cmath>
|
|
|
|
double math(float f, double d) {
|
|
double r = 0;
|
|
// INT_RETURN: call i32 @__nv_isnanf(float
|
|
// AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float
|
|
// BOOL_RETURN: call i32 @__nv_isnanf(float
|
|
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float
|
|
r += std::isnan(f);
|
|
// INT_RETURN: call i32 @__nv_isnand(double
|
|
// AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double
|
|
// BOOL_RETURN: call i32 @__nv_isnand(double
|
|
// AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double
|
|
r += std::isnan(d);
|
|
return r;
|
|
}
|
|
|
|
long double foo(float f, double d, long double ld) {
|
|
double r = ld;
|
|
r += math(f, d);
|
|
#pragma omp target map(r)
|
|
{ r += math(f, d); }
|
|
return r;
|
|
}
|