[AMDGPU][gfx1250] Add memory legalizer tests (NFC) (#154725)
This commit is contained in:
parent
f1aee598e7
commit
4ab5efd48d
@ -12,6 +12,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @workgroup_acquire_fence() {
|
||||
; GFX6-LABEL: workgroup_acquire_fence:
|
||||
@ -78,6 +79,10 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -145,6 +150,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -217,6 +226,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -289,6 +302,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -359,6 +376,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -426,6 +447,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -498,6 +523,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -570,6 +599,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -662,6 +695,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -744,6 +784,14 @@ define amdgpu_kernel void @agent_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -842,6 +890,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -940,6 +997,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1032,6 +1098,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1114,6 +1187,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1212,6 +1293,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1310,6 +1400,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1404,6 +1503,13 @@ define amdgpu_kernel void @system_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1490,6 +1596,15 @@ define amdgpu_kernel void @system_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1594,6 +1709,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1698,6 +1823,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1792,6 +1927,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1878,6 +2020,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -1982,6 +2133,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
@ -2086,6 +2247,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
|
||||
ret void
|
||||
|
@ -12,6 +12,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @workgroup_acquire_fence() {
|
||||
; GFX6-LABEL: workgroup_acquire_fence:
|
||||
@ -76,6 +77,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -142,6 +148,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -208,6 +218,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -274,6 +288,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -331,6 +349,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -388,6 +410,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -445,6 +471,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -502,6 +532,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -570,6 +604,11 @@ define amdgpu_kernel void @agent_acquire_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -636,6 +675,10 @@ define amdgpu_kernel void @agent_release_fence() {
|
||||
; GFX12-CU-LABEL: agent_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -702,6 +745,10 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: agent_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -768,6 +815,10 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: agent_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -825,6 +876,10 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -882,6 +937,10 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: agent_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -939,6 +998,10 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -996,6 +1059,10 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1064,6 +1131,11 @@ define amdgpu_kernel void @system_acquire_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1130,6 +1202,10 @@ define amdgpu_kernel void @system_release_fence() {
|
||||
; GFX12-CU-LABEL: system_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1196,6 +1272,10 @@ define amdgpu_kernel void @system_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: system_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1262,6 +1342,10 @@ define amdgpu_kernel void @system_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: system_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1319,6 +1403,10 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: system_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1376,6 +1464,10 @@ define amdgpu_kernel void @system_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: system_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1433,6 +1525,10 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
@ -1490,6 +1586,10 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
|
||||
ret void
|
||||
|
@ -12,6 +12,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @singlethread_acquire_fence() {
|
||||
; GFX6-LABEL: singlethread_acquire_fence:
|
||||
@ -65,6 +66,10 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") acquire
|
||||
ret void
|
||||
@ -122,6 +127,10 @@ define amdgpu_kernel void @singlethread_release_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") release
|
||||
ret void
|
||||
@ -179,6 +188,10 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") acq_rel
|
||||
ret void
|
||||
@ -236,6 +249,10 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
@ -293,6 +310,10 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
@ -350,6 +371,10 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
@ -407,6 +432,10 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
@ -464,6 +493,10 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: singlethread_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
@ -521,6 +554,10 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") acquire
|
||||
ret void
|
||||
@ -578,6 +615,10 @@ define amdgpu_kernel void @wavefront_release_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") release
|
||||
ret void
|
||||
@ -635,6 +676,10 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") acq_rel
|
||||
ret void
|
||||
@ -692,6 +737,10 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") seq_cst
|
||||
ret void
|
||||
@ -749,6 +798,10 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
@ -806,6 +859,10 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
@ -863,6 +920,10 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
@ -920,6 +981,10 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: wavefront_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
@ -998,6 +1063,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acquire
|
||||
ret void
|
||||
@ -1073,6 +1143,11 @@ define amdgpu_kernel void @workgroup_release_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") release
|
||||
ret void
|
||||
@ -1153,6 +1228,11 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel
|
||||
ret void
|
||||
@ -1233,6 +1313,11 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst
|
||||
ret void
|
||||
@ -1303,6 +1388,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
@ -1370,6 +1459,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
@ -1442,6 +1535,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
@ -1514,6 +1611,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX12-CU: ; %bb.0: ; %entry
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
@ -1606,6 +1707,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acquire
|
||||
ret void
|
||||
@ -1688,6 +1796,14 @@ define amdgpu_kernel void @agent_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") release
|
||||
ret void
|
||||
@ -1786,6 +1902,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel
|
||||
ret void
|
||||
@ -1884,6 +2009,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst
|
||||
ret void
|
||||
@ -1976,6 +2110,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
@ -2058,6 +2199,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
ret void
|
||||
@ -2156,6 +2305,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
@ -2254,6 +2412,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
@ -2348,6 +2515,13 @@ define amdgpu_kernel void @system_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acquire
|
||||
ret void
|
||||
@ -2434,6 +2608,15 @@ define amdgpu_kernel void @system_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence release
|
||||
ret void
|
||||
@ -2538,6 +2721,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acq_rel
|
||||
ret void
|
||||
@ -2642,6 +2835,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence seq_cst
|
||||
ret void
|
||||
@ -2736,6 +2939,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acquire_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
ret void
|
||||
@ -2822,6 +3032,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_release_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
ret void
|
||||
@ -2926,6 +3145,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
ret void
|
||||
@ -3030,6 +3259,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
ret void
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
|
||||
; GFX12-LABEL: flat_last_use_load_0:
|
||||
@ -16,6 +17,17 @@ define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_last_use_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
|
||||
store i32 %val, ptr %out
|
||||
@ -55,6 +67,21 @@ define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_last_use_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
|
||||
@ -80,6 +107,19 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
|
||||
; GFX12-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_last_use_and_volatile_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
|
||||
store i32 %val, ptr %out
|
||||
@ -100,6 +140,17 @@ define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out)
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_last_use_and_nontemporal_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
|
||||
store i32 %val, ptr %out
|
||||
|
@ -11,6 +11,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX7-LABEL: flat_nontemporal_load_0:
|
||||
@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load i32, ptr %in, align 4, !nontemporal !0
|
||||
@ -555,6 +567,21 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -739,6 +766,17 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] th:TH_STORE_NT scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load i32, ptr %in, align 4
|
||||
@ -1095,6 +1133,20 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -1293,6 +1345,19 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_volatile_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr %in, align 4, !nontemporal !0
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX7-LABEL: flat_nontemporal_load_0:
|
||||
@ -143,6 +144,19 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr %in, align 4
|
||||
@ -415,6 +429,23 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -563,6 +594,18 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load i32, ptr %in, align 4
|
||||
@ -831,6 +874,21 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_nontemporal_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -971,6 +1029,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_volatile_workgroup_acquire_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr %in, ptr %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, ptr %in syncscope("workgroup") acquire, align 4
|
||||
@ -1090,6 +1159,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_volatile_workgroup_release_store:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
i32 %in, ptr %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
; GFX12-LABEL: global_last_use_load_0:
|
||||
@ -14,6 +15,18 @@ define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addr
|
||||
; GFX12-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_last_use_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
|
||||
store i32 %val, ptr addrspace(1) %out
|
||||
@ -37,6 +50,21 @@ define amdgpu_kernel void @global_last_use_load_1(ptr addrspace(1) %in, ptr addr
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_last_use_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
|
||||
@ -58,6 +86,19 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_last_use_and_volatile_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
|
||||
store i32 %val, ptr addrspace(1) %out
|
||||
@ -81,6 +122,21 @@ define amdgpu_kernel void @global_last_use_and_nontemporal_load(ptr addrspace(1)
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_last_use_and_nontemporal_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
|
||||
|
@ -12,6 +12,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @global_nontemporal_load_0(
|
||||
; GFX6-LABEL: global_nontemporal_load_0:
|
||||
@ -189,6 +190,18 @@ define amdgpu_kernel void @global_nontemporal_load_0(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_nontemporal_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4, !nontemporal !0
|
||||
@ -448,6 +461,21 @@ define amdgpu_kernel void @global_nontemporal_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_nontemporal_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -633,6 +661,18 @@ define amdgpu_kernel void @global_nontemporal_store_0(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_nontemporal_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4
|
||||
@ -866,6 +906,20 @@ define amdgpu_kernel void @global_nontemporal_store_1(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_nontemporal_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -1056,6 +1110,19 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_nontemporal_volatile_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr addrspace(1) %in, align 4, !nontemporal !0
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -8,6 +8,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @global_volatile_load_0(
|
||||
; GFX6-LABEL: global_volatile_load_0:
|
||||
@ -146,6 +147,19 @@ define amdgpu_kernel void @global_volatile_load_0(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr addrspace(1) %in, align 4
|
||||
@ -345,6 +359,23 @@ define amdgpu_kernel void @global_volatile_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_bvhcnt 0x0
|
||||
; GFX1250-NEXT: s_wait_samplecnt 0x0
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -501,6 +532,19 @@ define amdgpu_kernel void @global_volatile_store_0(
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4
|
||||
@ -693,6 +737,21 @@ define amdgpu_kernel void @global_volatile_store_1(
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
|
||||
; GFX12-CU-NEXT: s_wait_storecnt 0x0
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
|
||||
; GFX1250-NEXT: s_wait_storecnt 0x0
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -838,6 +897,17 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
|
||||
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_workgroup_acquire_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
|
||||
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
|
||||
@ -969,6 +1039,17 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: global_volatile_workgroup_release_store:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
i32 %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -12,6 +12,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @local_nontemporal_load_0(
|
||||
; GFX6-LABEL: local_nontemporal_load_0:
|
||||
@ -193,6 +194,18 @@ define amdgpu_kernel void @local_nontemporal_load_0(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_nontemporal_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(3) %in, align 4, !nontemporal !0
|
||||
@ -428,6 +441,22 @@ define amdgpu_kernel void @local_nontemporal_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_nontemporal_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 2
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -597,6 +626,18 @@ define amdgpu_kernel void @local_nontemporal_store_0(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_nontemporal_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4
|
||||
@ -802,6 +843,22 @@ define amdgpu_kernel void @local_nontemporal_store_1(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_nontemporal_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 2
|
||||
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -991,6 +1048,18 @@ define amdgpu_kernel void @local_nontemporal_volatile_load(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_nontemporal_volatile_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr addrspace(3) %in, align 4, !nontemporal !0
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -8,6 +8,7 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
|
||||
|
||||
define amdgpu_kernel void @local_volatile_load_0(
|
||||
; GFX6-LABEL: local_volatile_load_0:
|
||||
@ -141,6 +142,18 @@ define amdgpu_kernel void @local_volatile_load_0(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_load_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%val = load volatile i32, ptr addrspace(3) %in, align 4
|
||||
@ -308,6 +321,22 @@ define amdgpu_kernel void @local_volatile_load_1(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_load_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
|
||||
; GFX1250-NEXT: s_mov_b32 s2, 2
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v1
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -429,6 +458,18 @@ define amdgpu_kernel void @local_volatile_store_0(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_store_0:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
%val = load i32, ptr addrspace(1) %in, align 4
|
||||
@ -570,6 +611,22 @@ define amdgpu_kernel void @local_volatile_store_1(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_store_1:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX1250-NEXT: s_wait_xcnt 0x0
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
|
||||
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
|
||||
; GFX1250-NEXT: s_mov_b32 s1, 2
|
||||
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(1) %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
@ -698,6 +755,18 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
|
||||
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_workgroup_acquire_load:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1250-NEXT: ds_load_b32 v1, v0
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
ptr addrspace(3) %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4
|
||||
@ -813,6 +882,17 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
|
||||
; GFX12-CU-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX12-CU-NEXT: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: local_volatile_workgroup_release_store:
|
||||
; GFX1250: ; %bb.0: ; %entry
|
||||
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
|
||||
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX1250-NEXT: s_wait_dscnt 0x0
|
||||
; GFX1250-NEXT: ds_store_b32 v0, v1
|
||||
; GFX1250-NEXT: s_endpgm
|
||||
i32 %in, ptr addrspace(3) %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user