Implements the base of the MemoryLegalizer for a roughly correct GFX1250 memory model. Documentation will come later, and some remaining changes still have to be added, but this is the backbone of the model.
498 lines
22 KiB
LLVM
498 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s
|
|
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
|
|
|
|
define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !0)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !0)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !0)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !0)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !0)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !0)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !0)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define void @test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !0)
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 0, metadata !1)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 0, metadata !1)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 0, metadata !1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 0, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 0, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_relaxed:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 0, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 2, metadata !1)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 2, metadata !1)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_acquire:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 2, metadata !1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 3, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 3, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_release:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 3, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define i32 @test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_32x4B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b32 v0, v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call i32 @llvm.amdgcn.cooperative.atomic.load.32x4B.p0(ptr %addr, i32 5, metadata !1)
|
|
ret i32 %0
|
|
}
|
|
|
|
define <2 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_16x8B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b64 v[0:1], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <2 x i32> @llvm.amdgcn.cooperative.atomic.load.16x8B.p0(ptr %addr, i32 5, metadata !1)
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst(ptr noundef readonly %addr) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_load_8x16B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_load_b128 v[0:3], v[0:1] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.amdgcn.cooperative.atomic.load.8x16B.p0(ptr %addr, i32 5, metadata !1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst(ptr noundef %addr, i32 noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_32x4B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.32x4B.p0(ptr %addr, i32 %val, i32 5, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst(ptr noundef %addr, <2 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_16x8B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b64 v[0:1], v[2:3] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.16x8B.p0(ptr %addr, <2 x i32> %val, i32 5, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
define void @test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst(ptr noundef %addr, <4 x i32> noundef %val) {
|
|
; GFX1250-LABEL: test_one_as_flat_amdgcn_cooperative_atomic_store_8x16B_seq_cst:
|
|
; GFX1250: ; %bb.0: ; %entry
|
|
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_wait_storecnt 0x0
|
|
; GFX1250-NEXT: flat_store_b128 v[0:1], v[2:5] scope:SCOPE_DEV
|
|
; GFX1250-NEXT: s_wait_dscnt 0x0
|
|
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
|
entry:
|
|
tail call void @llvm.amdgcn.cooperative.atomic.store.8x16B.p0(ptr %addr, <4 x i32> %val, i32 5, metadata !1)
|
|
ret void
|
|
}
|
|
|
|
!0 = !{ !"workgroup" }
|
|
!1 = !{ !"workgroup-one-as" }
|
|
|
|
;; GFX1250: {{.*}}
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; GFX1250-GISEL: {{.*}}
|
|
; GFX1250-SDAG: {{.*}}
|