This just added unnecessary work to the IR, since they are only used for load and store, which just causes some IR noise. Tests updated by UTC script to remove the extra lines.
44 lines
1.9 KiB
Common Lisp
44 lines
1.9 KiB
Common Lisp
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
|
|
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx942 \
|
|
// RUN: %s -emit-llvm -o - -disable-llvm-passes | FileCheck %s
|
|
|
|
// REQUIRES: amdgpu-registered-target
|
|
|
|
// `Ptr.getElementType()` in `CheckAtomicAlignment` returns
|
|
// %struct.__half2 = type { %union.anon }
|
|
// Check we do not crash when handling that.
|
|
|
|
typedef half __attribute__((ext_vector_type(2))) half2;
|
|
typedef short __attribute__((ext_vector_type(2))) short2;
|
|
|
|
struct __half2 {
|
|
union {
|
|
struct {
|
|
half x;
|
|
half y;
|
|
};
|
|
half2 data;
|
|
};
|
|
};
|
|
|
|
// CHECK-LABEL: define dso_local <2 x half> @test_flat_add_2f16(
|
|
// CHECK-SAME: ptr noundef [[ADDR:%.*]], <2 x half> noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
|
|
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
|
|
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ADDR_ADDR]] to ptr
|
|
// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VAL_ADDR]] to ptr
|
|
// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: store <2 x half> [[VAL]], ptr [[VAL_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR_ASCAST]], align 8
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[VAL_ADDR_ASCAST]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr [[TMP0]], <2 x half> [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]]
|
|
// CHECK-NEXT: ret <2 x half> [[TMP2]]
|
|
//
|
|
half2 test_flat_add_2f16(short2 *addr, half2 val) {
|
|
return __builtin_amdgcn_flat_atomic_fadd_v2f16((struct __half2*)addr, val);
|
|
}
|
|
//.
|
|
// CHECK: [[META4]] = !{}
|
|
//.
|