Drop -CU suffix

This commit is contained in:
pvanhout 2025-08-21 12:44:08 +02:00
parent e83355bd69
commit 3b51b225ba
22 changed files with 14528 additions and 14528 deletions

View File

@ -80,11 +80,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_acquire_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -153,11 +153,11 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -231,11 +231,11 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -309,11 +309,11 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -385,11 +385,11 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -458,11 +458,11 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -536,11 +536,11 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -614,11 +614,11 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -801,12 +801,12 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -906,13 +906,13 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1012,13 +1012,13 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1201,12 +1201,12 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1306,13 +1306,13 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1412,13 +1412,13 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1607,12 +1607,12 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1718,13 +1718,13 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1830,13 +1830,13 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -2025,12 +2025,12 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -2136,13 +2136,13 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -2248,13 +2248,13 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void

View File

@ -1064,11 +1064,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_acquire_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@ -1145,11 +1145,11 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@ -1231,11 +1231,11 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@ -1317,11 +1317,11 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@ -1393,11 +1393,11 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@ -1466,11 +1466,11 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@ -1544,11 +1544,11 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@ -1622,11 +1622,11 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@ -1809,12 +1809,12 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@ -1914,13 +1914,13 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@ -2020,13 +2020,13 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@ -2209,12 +2209,12 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@ -2314,13 +2314,13 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@ -2420,13 +2420,13 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_DEV
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@ -2615,12 +2615,12 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence release
ret void
@ -2726,13 +2726,13 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@ -2838,13 +2838,13 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst
ret void
@ -3033,12 +3033,12 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_release_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release
ret void
@ -3144,13 +3144,13 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_acq_rel_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel
ret void
@ -3256,13 +3256,13 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: system_one_as_seq_cst_fence:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst
ret void

File diff suppressed because it is too large Load Diff

View File

@ -108,16 +108,16 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: flat_last_use_and_volatile_load:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: flat_last_use_and_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out

View File

@ -1346,16 +1346,16 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: flat_nontemporal_volatile_load:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: flat_nontemporal_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4, !nontemporal !0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -145,16 +145,16 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: flat_nontemporal_load_0:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: flat_nontemporal_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4
@ -428,20 +428,20 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: flat_nontemporal_load_1:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_wait_xcnt 0x0
; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: flat_nontemporal_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1156,18 +1156,18 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: flat_volatile_workgroup_release_store:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_xcnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: flat_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -87,16 +87,16 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: global_last_use_and_volatile_load:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: global_last_use_and_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out

View File

@ -1111,16 +1111,16 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: global_nontemporal_volatile_load:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: global_nontemporal_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !nontemporal !0

File diff suppressed because it is too large Load Diff

View File

@ -148,16 +148,16 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: global_volatile_load_0:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: global_volatile_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4
@ -358,20 +358,20 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: global_volatile_load_1:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_wait_xcnt 0x0
; GFX1250-CU-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-CU-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-CU-NEXT: s_wait_loadcnt 0x0
; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: global_volatile_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1036,18 +1036,18 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: global_volatile_workgroup_release_store:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_xcnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: global_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -883,17 +883,17 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-CU-LABEL: local_volatile_workgroup_release_store:
; GFX1250-CU: ; %bb.0: ; %entry
; GFX1250-CU-NEXT: s_load_b32 s0, s[4:5], 0x0
; GFX1250-CU-NEXT: s_load_b32 s1, s[4:5], 0x4
; GFX1250-CU-NEXT: s_wait_kmcnt 0x0
; GFX1250-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-CU-NEXT: s_wait_storecnt 0x0
; GFX1250-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-CU-NEXT: ds_store_b32 v0, v1
; GFX1250-CU-NEXT: s_endpgm
; GFX1250-LABEL: local_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff