
Since e39f6c1844fab59c638d8059a6cf139adb42279a opt will infer the correct datalayout when given a triple. Avoid explicitly specifying it in tests that depend on the AMDGPU target being present to avoid the string becoming out of sync with the TargetInfo value. Only tests with REQUIRES: amdgpu-registered-target or a local lit.cfg were updated to ensure that tests for non-target-specific passes that happen to use the AMDGPU layout still pass when building with a limited set of targets. Reviewed By: shiltian, arsenm Pull Request: https://github.com/llvm/llvm-project/pull/137921
189 lines
12 KiB
LLVM
189 lines
12 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
|
|
target triple = "amdgcn--"
|
|
|
|
define void @loads(ptr addrspace(8) %buf) {
|
|
; CHECK-LABEL: define void @loads
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0:![0-9]+]]
|
|
; CHECK-NEXT: [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
|
|
; CHECK-NEXT: [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
|
|
; CHECK-NEXT: [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
|
|
; CHECK-NEXT: [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]]
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: [[ATOMIC_MONOTONIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: [[ATOMIC_ACQUIRE:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence acquire
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
%p = getelementptr float, ptr addrspace(7) %base, i32 4
|
|
|
|
%scalar = load float, ptr addrspace(7) %p, align 4
|
|
%vec2 = load <2 x float>, ptr addrspace(7) %p, align 8
|
|
%vec4 = load <4 x float>, ptr addrspace(7) %p, align 16
|
|
|
|
%nontemporal = load float, ptr addrspace(7) %p, !nontemporal !0
|
|
%invariant = load float, ptr addrspace(7) %p, !invariant.load !1
|
|
%nontemporal.invariant = load float, ptr addrspace(7) %p, !nontemporal !0, !invariant.load !1
|
|
|
|
%volatile = load volatile float, ptr addrspace(7) %p
|
|
%volatile.nontemporal = load volatile float, ptr addrspace(7) %p, !nontemporal !0
|
|
|
|
%atomic = load atomic volatile float, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
|
|
%atomic.monotonic = load atomic float, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
|
|
%atomic.acquire = load atomic float, ptr addrspace(7) %p acquire, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
|
|
; CHECK-LABEL: define void @stores
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !nontemporal [[META0]]
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648), !nontemporal [[META0]]
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence release
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
%p = getelementptr float, ptr addrspace(7) %base, i32 4
|
|
|
|
store float %f, ptr addrspace(7) %p, align 4
|
|
store <4 x float> %f4, ptr addrspace(7) %p, align 16
|
|
|
|
store float %f, ptr addrspace(7) %p, !nontemporal !0
|
|
|
|
store volatile float %f, ptr addrspace(7) %p
|
|
store volatile float %f, ptr addrspace(7) %p, !nontemporal !0
|
|
|
|
store atomic volatile float %f, ptr addrspace(7) %p syncscope("wavefront") seq_cst, align 4
|
|
store atomic float %f, ptr addrspace(7) %p syncscope("wavefront") monotonic, align 4
|
|
store atomic float %f, ptr addrspace(7) %p release, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define void @atomicrmw(ptr addrspace(8) %buf, float %f, i32 %i) {
|
|
; CHECK-LABEL: define void @atomicrmw
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[XCHG:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[ADD:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[SUB:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[AND:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[XOR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[FADD:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[FMAX:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[FMIN:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 [[I]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
%p = getelementptr float, ptr addrspace(7) %base, i32 4
|
|
|
|
; Fence insertion is tested by loads and stores
|
|
%xchg = atomicrmw xchg ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%add = atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%sub = atomicrmw sub ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%and = atomicrmw and ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%or = atomicrmw or ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%xor = atomicrmw xor ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%min = atomicrmw min ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%max = atomicrmw max ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%umin = atomicrmw umin ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
%umax = atomicrmw umax ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
|
|
%fadd = atomicrmw fadd ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
|
|
%fmax = atomicrmw fmax ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
|
|
%fmin = atomicrmw fmin ptr addrspace(7) %p, float %f syncscope("wavefront") seq_cst, align 4
|
|
|
|
; Check a no-return atomic
|
|
atomicrmw add ptr addrspace(7) %p, i32 %i syncscope("wavefront") seq_cst, align 4
|
|
|
|
ret void
|
|
}
|
|
|
|
define {i32, i1} @cmpxchg(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
|
|
; CHECK-LABEL: define { i32, i1 } @cmpxchg
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[RET]], [[WANTED]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } [[TMP1]], i1 [[TMP2]], 1
|
|
; CHECK-NEXT: ret { i32, i1 } [[TMP3]]
|
|
;
|
|
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
%p = getelementptr i32, ptr addrspace(7) %base, i32 4
|
|
|
|
%ret = cmpxchg volatile ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
|
|
ret {i32, i1} %ret
|
|
}
|
|
|
|
define {i32, i1} @cmpxchg_weak(ptr addrspace(8) %buf, i32 %wanted, i32 %new) {
|
|
; CHECK-LABEL: define { i32, i1 } @cmpxchg_weak
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[WANTED:%.*]], i32 [[NEW:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: fence syncscope("wavefront") release
|
|
; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 [[NEW]], i32 [[WANTED]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
|
|
; CHECK-NEXT: fence syncscope("wavefront") acquire
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, i1 } poison, i32 [[RET]], 0
|
|
; CHECK-NEXT: ret { i32, i1 } [[TMP1]]
|
|
;
|
|
%base = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
%p = getelementptr i32, ptr addrspace(7) %base, i32 4
|
|
|
|
%ret = cmpxchg weak ptr addrspace(7) %p, i32 %wanted, i32 %new syncscope("wavefront") acq_rel monotonic, align 4
|
|
ret {i32, i1} %ret
|
|
}
|
|
|
|
!0 = ! { i32 1 }
|
|
!1 = ! { }
|