
Use this to replace the emission of the amdgpu-unsafe-fp-atomics attribute in favor of per-instruction metadata. In the future new fine grained controls should be introduced that also cover the integer cases. Add a wrapper around CreateAtomicRMW that appends the metadata, and update a few use contexts to use it.
60 lines
3.3 KiB
C++
60 lines
3.3 KiB
C++
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=DEFAULT %s
|
|
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -munsafe-fp-atomics -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
|
|
|
|
#pragma omp declare target
|
|
|
|
float fv, fx;
|
|
double dv, dx;
|
|
|
|
// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f32v(
|
|
// DEFAULT-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// DEFAULT-NEXT: [[ENTRY:.*:]]
|
|
// DEFAULT-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
|
|
// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4
|
|
// DEFAULT-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
|
|
// DEFAULT-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
|
|
// DEFAULT-NEXT: ret void
|
|
//
|
|
// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f32v(
|
|
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load float, ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @fx to ptr), float [[TMP0]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]], !amdgpu.ignore.denormal.mode [[META5]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[TMP0]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: store float [[ADD]], ptr addrspacecast (ptr addrspace(1) @fv to ptr), align 4
|
|
// UNSAFE-FP-ATOMICS-NEXT: ret void
|
|
//
|
|
void atomic_fadd_f32() {
|
|
#pragma omp atomic capture
|
|
fv = fx = fx + fv;
|
|
}
|
|
|
|
// DEFAULT-LABEL: define hidden void @_Z15atomic_fadd_f64v(
|
|
// DEFAULT-SAME: ) #[[ATTR0]] {
|
|
// DEFAULT-NEXT: [[ENTRY:.*:]]
|
|
// DEFAULT-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
|
|
// DEFAULT-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8
|
|
// DEFAULT-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
|
|
// DEFAULT-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
|
|
// DEFAULT-NEXT: ret void
|
|
//
|
|
// UNSAFE-FP-ATOMICS-LABEL: define hidden void @_Z15atomic_fadd_f64v(
|
|
// UNSAFE-FP-ATOMICS-SAME: ) #[[ATTR0]] {
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[ENTRY:.*:]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[TMP0:%.*]] = load double, ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @dx to ptr), double [[TMP0]] monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP0]]
|
|
// UNSAFE-FP-ATOMICS-NEXT: store double [[ADD]], ptr addrspacecast (ptr addrspace(1) @dv to ptr), align 8
|
|
// UNSAFE-FP-ATOMICS-NEXT: ret void
|
|
//
|
|
void atomic_fadd_f64() {
|
|
#pragma omp atomic capture
|
|
dv = dx = dx + dv;
|
|
}
|
|
|
|
#pragma omp end declare target
|
|
//.
|
|
// UNSAFE-FP-ATOMICS: [[META5]] = !{}
|
|
//.
|