llvm-project/clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp
Joseph Huber 237adfca4e
[OpenMP] Rework handling of global ctor/dtors in OpenMP (#71739)
Summary:
This patch reworks how we handle global constructors in OpenMP.
Previously, we emitted individual kernels that were all registered and
called individually. In order to provide more generic support, this
patch moves all handling of this to the target backend and the runtime
plugin. This has the benefit of supporting the GNU extensions for
constructors an destructors, removing a class of failures related to
shared library destruction order, and allows targets other than OpenMP
to use the same support without needing to change the frontend.

This is primarily done by calling kernels that the backend emits to
iterate a list of ctor / dtor functions. For x64, this is automatic and
we get it for free with the standard `dlopen` handling. For AMDGPU, we
emit `amdgcn.device.init` and `amdgcn.device.fini` functions which
handle everything atuomatically and simply need to be called. For NVPTX,
a patch https://github.com/llvm/llvm-project/pull/71549 provides the
kernels to call, but the runtime needs to set up the array manually by
pulling out all the known constructor / destructor functions.

One concession that this patch requires is the change that for GPU
targets in OpenMP offloading we will use `llvm.global_dtors` instead of
using `atexit`. This is because `atexit` is a separate runtime function
that does not mesh well with the handling we're trying to do here. This
should be equivalent in all cases except for cases where we would need
to destruct manually such as:

```
struct S { ~S() { foo(); } };
void foo() {
  static S s;
}
```

However this is broken in many other ways on the GPU, so it is not
regressing any support, simply increasing the scope of what we can
handle.

This changes the handling of ctors / dtors. This patch now outputs a
information message regarding the deprecation if the old format is used.
This will be completely removed in a later release.

Depends on: https://github.com/llvm/llvm-project/pull/71549
2023-11-10 14:53:53 -06:00

305 lines
19 KiB
C++

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-globals --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" --global-value-regex "\![0-9]+"
// RUN: %clang_cc1 -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -x c++ -fopenmp -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
#ifdef __cplusplus
#include <cmath>
#else
#include <math.h>
#endif
#pragma omp begin declare target
const float constexpr_fabsf_f32 = fabsf(-2.0f);
const float constexpr_fabs_f32 = fabs(-2.0f);
const float constexpr_sinf_f32 = sinf(-2.0f);
const float constexpr_sin_f32 = sin(-2.0f);
const float constexpr_cosf_f32 = cosf(-2.0f);
const float constexpr_cos_f32 = cos(-2.0f);
const float constexpr_fmaf_f32 = fmaf(2.0f, 4.0f, 1.0f);
const float constexpr_fma_f32 = fma(2.0f, 4.0f, 1.0f);
const float constexpr_min_f32 = min(2.0f, -4.0f);
const float constexpr_max_f32 = max(2.0f, -4.0f);
const float constexpr_fmin_f32 = fmin(2.0f, -4.0f);
const float constexpr_fmax_f32 = fmax(2.0f, -4.0f);
const float constexpr_fminf_f32 = fminf(2.0f, -4.0f);
const float constexpr_fmaxf_f32 = fmaxf(2.0f, -4.0f);
#pragma omp end declare target
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call noundef float @llvm.fabs.f32(float [[TMP0]])
// CHECK-NEXT: store float [[TMP1]], ptr addrspacecast (ptr addrspace(1) @_ZL19constexpr_fabsf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.1
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call noundef float @llvm.fabs.f32(float [[TMP1]])
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fabs_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.2
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[CALL_I:%.*]] = call noundef float @__ocml_sin_f32(float noundef [[TMP0]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: store float [[CALL_I]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_sinf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.3
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[CALL_I_I:%.*]] = call noundef float @__ocml_sin_f32(float noundef [[TMP1]]) #[[ATTR3]]
// CHECK-NEXT: store float [[CALL_I_I]], ptr addrspacecast (ptr addrspace(1) @_ZL17constexpr_sin_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.4
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[CALL_I:%.*]] = call noundef float @__ocml_cos_f32(float noundef [[TMP0]]) #[[ATTR3]]
// CHECK-NEXT: store float [[CALL_I]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_cosf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.5
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: store float -2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[CALL_I_I:%.*]] = call noundef float @__ocml_cos_f32(float noundef [[TMP1]]) #[[ATTR3]]
// CHECK-NEXT: store float [[CALL_I_I]], ptr addrspacecast (ptr addrspace(1) @_ZL17constexpr_cos_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.6
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float 4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float 1.000000e+00, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call noundef float @llvm.fma.f32(float [[TMP0]], float [[TMP1]], float [[TMP2]])
// CHECK-NEXT: store float [[TMP3]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fmaf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.7
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Z_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float 4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float 1.000000e+00, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[RETVAL_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I_I]] to ptr
// CHECK-NEXT: [[__Z_ADDR_ASCAST_I_I:%.*]] = addrspacecast ptr addrspace(5) [[__Z_ADDR_I_I]] to ptr
// CHECK-NEXT: store float [[TMP0]], ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: store float [[TMP1]], ptr [[__Y_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: store float [[TMP2]], ptr [[__Z_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[__Z_ADDR_ASCAST_I_I]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = call noundef float @llvm.fma.f32(float [[TMP3]], float [[TMP4]], float [[TMP5]])
// CHECK-NEXT: store float [[TMP6]], ptr addrspacecast (ptr addrspace(1) @_ZL17constexpr_fma_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.8
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float -4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call noundef float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]])
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL17constexpr_min_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.9
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float -4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call noundef float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]])
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL17constexpr_max_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.10
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fminff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: store float [[CALL]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fmin_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.11
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[CALL:%.*]] = call noundef float @_Z4fmaxff(float noundef 2.000000e+00, float noundef -4.000000e+00) #[[ATTR4]]
// CHECK-NEXT: store float [[CALL]], ptr addrspacecast (ptr addrspace(1) @_ZL18constexpr_fmax_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.12
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float -4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call noundef float @llvm.minnum.f32(float [[TMP0]], float [[TMP1]])
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL19constexpr_fminf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.13
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__X_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[__Y_ADDR_I:%.*]] = alloca float, align 4, addrspace(5)
// CHECK-NEXT: [[RETVAL_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL_I]] to ptr
// CHECK-NEXT: [[__X_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__X_ADDR_I]] to ptr
// CHECK-NEXT: [[__Y_ADDR_ASCAST_I:%.*]] = addrspacecast ptr addrspace(5) [[__Y_ADDR_I]] to ptr
// CHECK-NEXT: store float 2.000000e+00, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: store float -4.000000e+00, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__X_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__Y_ADDR_ASCAST_I]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call noundef float @llvm.maxnum.f32(float [[TMP0]], float [[TMP1]])
// CHECK-NEXT: store float [[TMP2]], ptr addrspacecast (ptr addrspace(1) @_ZL19constexpr_fmaxf_f32 to ptr), align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_amdgcn_openmp_device_math_constexpr.cpp
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @__cxx_global_var_init()
// CHECK-NEXT: call void @__cxx_global_var_init.1()
// CHECK-NEXT: call void @__cxx_global_var_init.2()
// CHECK-NEXT: call void @__cxx_global_var_init.3()
// CHECK-NEXT: call void @__cxx_global_var_init.4()
// CHECK-NEXT: call void @__cxx_global_var_init.5()
// CHECK-NEXT: call void @__cxx_global_var_init.6()
// CHECK-NEXT: call void @__cxx_global_var_init.7()
// CHECK-NEXT: call void @__cxx_global_var_init.8()
// CHECK-NEXT: call void @__cxx_global_var_init.9()
// CHECK-NEXT: call void @__cxx_global_var_init.10()
// CHECK-NEXT: call void @__cxx_global_var_init.11()
// CHECK-NEXT: call void @__cxx_global_var_init.12()
// CHECK-NEXT: call void @__cxx_global_var_init.13()
// CHECK-NEXT: ret void
//