[AMDGPU][gfx1250] Add memory legalizer tests (NFC) (#154725)

This commit is contained in:
Pierre van Houtryve 2025-08-22 10:14:09 +02:00 committed by GitHub
parent f1aee598e7
commit 4ab5efd48d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 20931 additions and 1119 deletions

View File

@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@ -78,6 +79,10 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU-LABEL: workgroup_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -145,6 +150,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -217,6 +226,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -289,6 +302,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -359,6 +376,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -426,6 +447,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -498,6 +523,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -570,6 +599,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -662,6 +695,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -744,6 +784,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -842,6 +890,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -940,6 +997,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1032,6 +1098,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1114,6 +1187,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1212,6 +1293,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1310,6 +1400,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1404,6 +1503,13 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1490,6 +1596,15 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1594,6 +1709,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1698,6 +1823,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1792,6 +1927,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1878,6 +2020,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -1982,6 +2133,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void
@ -2086,6 +2247,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"global"}
ret void

View File

@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX6-LABEL: workgroup_acquire_fence:
@ -76,6 +77,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -142,6 +148,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -208,6 +218,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -274,6 +288,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -331,6 +349,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -388,6 +410,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -445,6 +471,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -502,6 +532,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -570,6 +604,11 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -636,6 +675,10 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -702,6 +745,10 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -768,6 +815,10 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -825,6 +876,10 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-LABEL: agent_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -882,6 +937,10 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-LABEL: agent_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -939,6 +998,10 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -996,6 +1059,10 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1064,6 +1131,11 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1130,6 +1202,10 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1196,6 +1272,10 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1262,6 +1342,10 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1319,6 +1403,10 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-LABEL: system_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1376,6 +1464,10 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-LABEL: system_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1433,6 +1525,10 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void
@ -1490,6 +1586,10 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst, !mmra !{!"amdgpu-synchronize-as", !"local"}
ret void

View File

@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX6-LABEL: singlethread_acquire_fence:
@ -65,6 +66,10 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
; GFX12-CU-LABEL: singlethread_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acquire
ret void
@ -122,6 +127,10 @@ define amdgpu_kernel void @singlethread_release_fence() {
; GFX12-CU-LABEL: singlethread_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") release
ret void
@ -179,6 +188,10 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
; GFX12-CU-LABEL: singlethread_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") acq_rel
ret void
@ -236,6 +249,10 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
; GFX12-CU-LABEL: singlethread_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread") seq_cst
ret void
@ -293,6 +310,10 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
; GFX12-CU-LABEL: singlethread_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acquire
ret void
@ -350,6 +371,10 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
; GFX12-CU-LABEL: singlethread_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") release
ret void
@ -407,6 +432,10 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: singlethread_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") acq_rel
ret void
@ -464,6 +493,10 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: singlethread_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: singlethread_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("singlethread-one-as") seq_cst
ret void
@ -521,6 +554,10 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
; GFX12-CU-LABEL: wavefront_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acquire
ret void
@ -578,6 +615,10 @@ define amdgpu_kernel void @wavefront_release_fence() {
; GFX12-CU-LABEL: wavefront_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") release
ret void
@ -635,6 +676,10 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
; GFX12-CU-LABEL: wavefront_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") acq_rel
ret void
@ -692,6 +737,10 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
; GFX12-CU-LABEL: wavefront_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront") seq_cst
ret void
@ -749,6 +798,10 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
; GFX12-CU-LABEL: wavefront_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acquire
ret void
@ -806,6 +859,10 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
; GFX12-CU-LABEL: wavefront_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") release
ret void
@ -863,6 +920,10 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: wavefront_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") acq_rel
ret void
@ -920,6 +981,10 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: wavefront_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: wavefront_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("wavefront-one-as") seq_cst
ret void
@ -998,6 +1063,11 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acquire
ret void
@ -1073,6 +1143,11 @@ define amdgpu_kernel void @workgroup_release_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release
ret void
@ -1153,6 +1228,11 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel
ret void
@ -1233,6 +1313,11 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst
ret void
@ -1303,6 +1388,10 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acquire_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acquire
ret void
@ -1370,6 +1459,10 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
; GFX12-CU-LABEL: workgroup_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") release
ret void
@ -1442,6 +1535,10 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
; GFX12-CU-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") acq_rel
ret void
@ -1514,6 +1611,10 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
; GFX12-CU-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: workgroup_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("workgroup-one-as") seq_cst
ret void
@ -1606,6 +1707,13 @@ define amdgpu_kernel void @agent_acquire_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acquire
ret void
@ -1688,6 +1796,14 @@ define amdgpu_kernel void @agent_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") release
ret void
@ -1786,6 +1902,15 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel
ret void
@ -1884,6 +2009,15 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst
ret void
@ -1976,6 +2110,13 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acquire
ret void
@ -2058,6 +2199,14 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") release
ret void
@ -2156,6 +2305,15 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") acq_rel
ret void
@ -2254,6 +2412,15 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: agent_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_DEV
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("agent-one-as") seq_cst
ret void
@ -2348,6 +2515,13 @@ define amdgpu_kernel void @system_acquire_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acquire
ret void
@ -2434,6 +2608,15 @@ define amdgpu_kernel void @system_release_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence release
ret void
@ -2538,6 +2721,16 @@ define amdgpu_kernel void @system_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence acq_rel
ret void
@ -2642,6 +2835,16 @@ define amdgpu_kernel void @system_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence seq_cst
ret void
@ -2736,6 +2939,13 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acquire_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acquire
ret void
@ -2822,6 +3032,15 @@ define amdgpu_kernel void @system_one_as_release_fence() {
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_release_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") release
ret void
@ -2926,6 +3145,16 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_acq_rel_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") acq_rel
ret void
@ -3030,6 +3259,16 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: system_one_as_seq_cst_fence:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: global_wb scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_inv scope:SCOPE_SYS
; GFX1250-NEXT: s_endpgm
entry:
fence syncscope("one-as") seq_cst
ret void

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-LABEL: flat_last_use_load_0:
@ -16,6 +17,17 @@ define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_last_use_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@ -55,6 +67,21 @@ define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_last_use_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
@ -80,6 +107,19 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_last_use_and_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr %out
@ -100,6 +140,17 @@ define amdgpu_kernel void @flat_last_use_and_nontemporal_load(ptr %in, ptr %out)
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[0:1], v2
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_last_use_and_nontemporal_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr %in, align 4, !amdgpu.last.use !{}, !nontemporal !0
store i32 %val, ptr %out

View File

@ -11,6 +11,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@ -187,6 +188,17 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4, !nontemporal !0
@ -555,6 +567,21 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -739,6 +766,17 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] th:TH_STORE_NT scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@ -1095,6 +1133,20 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1293,6 +1345,19 @@ define amdgpu_kernel void @flat_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4, !nontemporal !0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX7-LABEL: flat_nontemporal_load_0:
@ -143,6 +144,19 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load volatile i32, ptr %in, align 4
@ -415,6 +429,23 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -563,6 +594,18 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load i32, ptr %in, align 4
@ -831,6 +874,21 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_nontemporal_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v1, s[2:3]
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -971,6 +1029,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_volatile_workgroup_acquire_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: flat_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
ptr %in, ptr %out) {
entry:
%val = load atomic volatile i32, ptr %in syncscope("workgroup") acquire, align 4
@ -1090,6 +1159,17 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: flat_store_b32 v[0:1], v2
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: flat_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: flat_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX1250-NEXT: s_endpgm
i32 %in, ptr %out) {
entry:
store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GFX12-LABEL: global_last_use_load_0:
@ -14,6 +15,18 @@ define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: v_mov_b32_e32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: global_last_use_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@ -37,6 +50,21 @@ define amdgpu_kernel void @global_last_use_load_1(ptr addrspace(1) %in, ptr addr
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: global_last_use_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
@ -58,6 +86,19 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: global_last_use_and_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
store i32 %val, ptr addrspace(1) %out
@ -81,6 +122,21 @@ define amdgpu_kernel void @global_last_use_and_nontemporal_load(ptr addrspace(1)
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: global_last_use_and_nontemporal_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_LU
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid

View File

@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_nontemporal_load_0(
; GFX6-LABEL: global_nontemporal_load_0:
@ -189,6 +190,18 @@ define amdgpu_kernel void @global_nontemporal_load_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_nontemporal_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4, !nontemporal !0
@ -448,6 +461,21 @@ define amdgpu_kernel void @global_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_nontemporal_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset th:TH_LOAD_NT
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -633,6 +661,18 @@ define amdgpu_kernel void @global_nontemporal_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_nontemporal_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@ -866,6 +906,20 @@ define amdgpu_kernel void @global_nontemporal_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s2
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] th:TH_STORE_NT
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_nontemporal_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset th:TH_STORE_NT
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -1056,6 +1110,19 @@ define amdgpu_kernel void @global_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_nontemporal_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4, !nontemporal !0

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @global_volatile_load_0(
; GFX6-LABEL: global_volatile_load_0:
@ -146,6 +147,19 @@ define amdgpu_kernel void @global_volatile_load_0(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(1) %in, align 4
@ -345,6 +359,23 @@ define amdgpu_kernel void @global_volatile_load_1(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s4, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v1, s[2:3] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_bvhcnt 0x0
; GFX1250-NEXT: s_wait_samplecnt 0x0
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -501,6 +532,19 @@ define amdgpu_kernel void @global_volatile_store_0(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@ -693,6 +737,21 @@ define amdgpu_kernel void @global_volatile_store_1(
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -838,6 +897,17 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_workgroup_acquire_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX1250-NEXT: s_wait_loadcnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(1) %in syncscope("workgroup") acquire, align 4
@ -969,6 +1039,17 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: global_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(1) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_nontemporal_load_0(
; GFX6-LABEL: local_nontemporal_load_0:
@ -193,6 +194,18 @@ define amdgpu_kernel void @local_nontemporal_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_nontemporal_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load i32, ptr addrspace(3) %in, align 4, !nontemporal !0
@ -428,6 +441,22 @@ define amdgpu_kernel void @local_nontemporal_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_nontemporal_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
; GFX1250-NEXT: s_mov_b32 s2, 2
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -597,6 +626,18 @@ define amdgpu_kernel void @local_nontemporal_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_nontemporal_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@ -802,6 +843,22 @@ define amdgpu_kernel void @local_nontemporal_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_nontemporal_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
; GFX1250-NEXT: s_mov_b32 s1, 2
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -991,6 +1048,18 @@ define amdgpu_kernel void @local_nontemporal_volatile_load(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_nontemporal_volatile_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4, !nontemporal !0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GFX1250 %s
define amdgpu_kernel void @local_volatile_load_0(
; GFX6-LABEL: local_volatile_load_0:
@ -141,6 +142,18 @@ define amdgpu_kernel void @local_volatile_load_0(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_load_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s2
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%val = load volatile i32, ptr addrspace(3) %in, align 4
@ -308,6 +321,22 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_load_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
; GFX1250-NEXT: s_mov_b32 s2, 2
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(1) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -429,6 +458,18 @@ define amdgpu_kernel void @local_volatile_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_store_0:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%val = load i32, ptr addrspace(1) %in, align 4
@ -570,6 +611,22 @@ define amdgpu_kernel void @local_volatile_store_1(
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_store_1:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
; GFX1250-NEXT: s_mov_b32 s1, 2
; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
@ -698,6 +755,18 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_workgroup_acquire_load:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: ds_load_b32 v1, v0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
ptr addrspace(3) %in, ptr addrspace(3) %out) {
entry:
%val = load atomic volatile i32, ptr addrspace(3) %in syncscope("workgroup") acquire, align 4
@ -813,6 +882,17 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v0, s1
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: ds_store_b32 v0, v1
; GFX1250-NEXT: s_endpgm
i32 %in, ptr addrspace(3) %out) {
entry:
store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff