James Y Knight 8043d5a964 NFC: update clang tests to check ordering and alignment for atomicrmw/cmpxchg.
The ability to specify alignment was recently added, and it's an
important property which we should ensure is set as expected by
Clang. (Especially before making further changes to Clang's code in
this area.) But, because it's on the end of the lines, the existing
tests all ignore it.

Therefore, update all the tests to also verify the expected alignment
for atomicrmw and cmpxchg. While I was in there, I also updated uses
of 'load atomic' and 'store atomic', and added the memory ordering,
where that was missing.
2021-02-11 17:35:09 -05:00

292 lines
12 KiB
Common Lisp

// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
// Also test serialization of atomic operations here, to avoid duplicating the test.
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl
// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s
#ifndef ALREADY_INCLUDED
#define ALREADY_INCLUDED
typedef __INTPTR_TYPE__ intptr_t;
typedef int int8 __attribute__((ext_vector_type(8)));
typedef enum memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} memory_order;
typedef enum memory_scope {
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
#endif
} memory_scope;
atomic_int j;
void fi1(atomic_int *i) {
// CHECK-LABEL: @fi1
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices);
// CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4
x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
}
void fi2(atomic_int *i) {
// CHECK-LABEL: @fi2
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
}
void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) {
// CHECK-LABEL: @test_addr
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
__opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group);
}
void fi3(atomic_int *i, atomic_uint *ui) {
// CHECK-LABEL: @fi3
// CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group);
// CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4
x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group);
}
bool fi4(atomic_int *i) {
// CHECK-LABEL: @fi4(
// CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
// CHECK: store i32 [[OLD]]
int cmp = 0;
return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group);
}
void fi5(atomic_int *i, int scope) {
// CHECK-LABEL: @fi5
// CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [
// CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]]
// CHECK-NEXT: i32 2, label %[[opencl_device:.*]]
// CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]]
// CHECK-NEXT: ]
// CHECK: [[opencl_workgroup]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4
// CHECK: br label %[[continue:.*]]
// CHECK: [[opencl_device]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4
// CHECK: br label %[[continue]]
// CHECK: [[opencl_allsvmdevices]]:
// CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4
// CHECK: br label %[[continue]]
// CHECK: [[opencl_subgroup]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4
// CHECK: br label %[[continue]]
// CHECK: [[continue]]:
int x = __opencl_atomic_load(i, memory_order_seq_cst, scope);
}
void fi6(atomic_int *i, int order, int scope) {
// CHECK-LABEL: @fi6
// CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [
// CHECK-NEXT: i32 1, label %[[acquire:.*]]
// CHECK-NEXT: i32 2, label %[[acquire:.*]]
// CHECK-NEXT: i32 5, label %[[seqcst:.*]]
// CHECK-NEXT: ]
// CHECK: [[monotonic]]:
// CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [
// CHECK-NEXT: i32 1, label %[[MON_WG:.*]]
// CHECK-NEXT: i32 2, label %[[MON_DEV:.*]]
// CHECK-NEXT: i32 4, label %[[MON_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[acquire]]:
// CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [
// CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]]
// CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]]
// CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[seqcst]]:
// CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [
// CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]]
// CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]]
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[MON_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
// CHECK: [[MON_DEV]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4
// CHECK: [[MON_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} monotonic, align 4
// CHECK: [[MON_SUB]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
// CHECK: [[ACQ_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire, align 4
// CHECK: [[ACQ_DEV]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire, align 4
// CHECK: [[ACQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} acquire, align 4
// CHECK: [[ACQ_SUB]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire, align 4
// CHECK: [[SEQ_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4
// CHECK: [[SEQ_DEV]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4
// CHECK: [[SEQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4
// CHECK: [[SEQ_SUB]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4
int x = __opencl_atomic_load(i, order, scope);
}
float ff1(global atomic_float *d) {
// CHECK-LABEL: @ff1
// CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic, align 4
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
void ff2(atomic_float *d) {
// CHECK-LABEL: @ff2
// CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
float ff3(atomic_float *d) {
// CHECK-LABEL: @ff3
// CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst, align 4
return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group);
}
// CHECK-LABEL: @atomic_init_foo
void atomic_init_foo()
{
// CHECK-NOT: atomic
// CHECK: store
__opencl_atomic_init(&j, 42);
// CHECK-NOT: atomic
// CHECK: }
}
// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
// CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4
__opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group);
}
// CHECK-LABEL: @generalFailureOrder
void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) {
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group);
// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 2, label %[[ACQUIRE]]
// CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]]
// CHECK: [[MONOTONIC]]
// CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: ]
// CHECK: [[ACQUIRE]]
// CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: ]
// CHECK: [[RELEASE]]
// CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: ]
// CHECK: [[ACQREL]]
// CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: ]
// CHECK: [[SEQCST]]
// CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [
// CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]]
// CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]]
// CHECK-NEXT: ]
// CHECK: [[MONOTONIC_MONOTONIC]]
// CHECK: cmpxchg {{.*}} monotonic monotonic, align 4
// CHECK: br
// CHECK: [[ACQUIRE_MONOTONIC]]
// CHECK: cmpxchg {{.*}} acquire monotonic, align 4
// CHECK: br
// CHECK: [[ACQUIRE_ACQUIRE]]
// CHECK: cmpxchg {{.*}} acquire acquire, align 4
// CHECK: br
// CHECK: [[ACQREL_MONOTONIC]]
// CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4
// CHECK: br
// CHECK: [[ACQREL_ACQUIRE]]
// CHECK: cmpxchg {{.*}} acq_rel acquire, align 4
// CHECK: br
// CHECK: [[SEQCST_MONOTONIC]]
// CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4
// CHECK: br
// CHECK: [[SEQCST_ACQUIRE]]
// CHECK: cmpxchg {{.*}} seq_cst acquire, align 4
// CHECK: br
// CHECK: [[SEQCST_SEQCST]]
// CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4
// CHECK: br
}
int test_volatile(volatile atomic_int *i) {
// CHECK-LABEL: @test_volatile
// CHECK: %[[i_addr:.*]] = alloca i32
// CHECK-NEXT: %[[atomicdst:.*]] = alloca i32
// CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]]
// CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]]
// CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst, align 4
// CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]]
// CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]]
// CHECK-NEXT: ret i32 %[[retval]]
return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
}
#endif