llvm-project/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir
Carl Ritson a3a3e6997b
[AMDGPU] Rewrite GFX12 SGPR hazard handling to dedicated pass (#118750)
- Algorithm operates over whole IR to attempt to minimize waits.
- Add support for VALU->VALU SGPR hazards via VA_SDST/VA_VCC.
2025-01-30 11:21:11 +09:00

348 lines
14 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass post-RA-hazard-rec,amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefix=GCN %s
--- |
define amdgpu_gs void @hazard_disable() #0 { ret void }
define amdgpu_gs void @hazard_enable() #1 { ret void }
define amdgpu_cs void @hazard_calls() #2 { ret void }
define void @hazard_callee1() #2 { ret void }
define void @hazard_callee2() #2 { ret void }
define amdgpu_cs void @hazard_cull_vmem() #3 { ret void }
define amdgpu_cs void @hazard_cull_vmem2() #4 { ret void }
define amdgpu_cs void @hazard_cull_sample() #3 { ret void }
define amdgpu_cs void @hazard_cull_bvh() #3 { ret void }
define amdgpu_cs void @hazard_nocull_scratch() #3 { ret void }
define amdgpu_cs void @hazard_cull_global() #3 { ret void }
define amdgpu_cs void @hazard_nocull_flat() #3 { ret void }
attributes #0 = { "amdgpu-sgpr-hazard-wait"="0" }
attributes #1 = { "amdgpu-sgpr-hazard-wait"="1" }
attributes #2 = { "amdgpu-sgpr-hazard-boundary-cull" }
attributes #3 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="1" }
attributes #4 = { "amdgpu-sgpr-hazard-mem-wait-cull" "amdgpu-sgpr-hazard-mem-wait-cull-threshold"="2" }
...
---
name: hazard_disable
body: |
bb.0:
; GCN-LABEL: name: hazard_disable
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_enable
body: |
bb.0:
; GCN-LABEL: name: hazard_enable
; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_calls
frameInfo:
hasCalls: true
body: |
; GCN-LABEL: name: hazard_calls
; GCN: bb.0:
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 0, $sgpr4, $vgpr0
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 1, $sgpr8, $vgpr0
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 2, $sgpr16, $vgpr0
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 3, $sgpr18, $vgpr0
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 4, $sgpr20, $vgpr0
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 5, $sgpr22, $vgpr0
; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_CBRANCH_SCC0 %bb.3, implicit $scc
; GCN-NEXT: S_BRANCH %bb.4
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: $sgpr16 = S_MOV_B32 0
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: S_SETPC_B64 $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
; GCN-NEXT: $sgpr18 = S_MOV_B32 0
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: S_SETPC_B64_return $sgpr0_sgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.4:
; GCN-NEXT: successors: %bb.5(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vcc_lo = S_MOV_B32 0
; GCN-NEXT: $sgpr20 = S_MOV_B32 0
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3
; GCN-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, 0, implicit-def $scc
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.5:
; GCN-NEXT: successors: %bb.6(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr8_sgpr9 = S_CALL_B64 0
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.6:
; GCN-NEXT: $sgpr22 = S_MOV_B32 $sgpr8
; GCN-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_WRITELANE_B32 0, $sgpr4, $vgpr0
$vgpr0 = V_WRITELANE_B32 1, $sgpr8, $vgpr0
$vgpr0 = V_WRITELANE_B32 2, $sgpr16, $vgpr0
$vgpr0 = V_WRITELANE_B32 3, $sgpr18, $vgpr0
$vgpr0 = V_WRITELANE_B32 4, $sgpr20, $vgpr0
$vgpr0 = V_WRITELANE_B32 5, $sgpr22, $vgpr0
S_CBRANCH_SCC0 %bb.2, implicit $scc
S_BRANCH %bb.1
bb.1:
S_CBRANCH_SCC0 %bb.3, implicit $scc
S_BRANCH %bb.4
bb.2:
$sgpr16 = S_MOV_B32 0
S_SETPC_B64 $sgpr0_sgpr1
bb.3:
$sgpr18 = S_MOV_B32 0
S_SETPC_B64_return $sgpr0_sgpr1
bb.4:
$vcc_lo = S_MOV_B32 0
$sgpr20 = S_MOV_B32 0
$sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3
$sgpr4 = S_ADD_U32 $sgpr4, 0, implicit-def $scc
bb.5:
$sgpr8_sgpr9 = S_CALL_B64 0
bb.6:
$sgpr22 = S_MOV_B32 $sgpr8
S_ENDPGM 0
...
---
name: hazard_callee1
body: |
bb.0:
; GCN-LABEL: name: hazard_callee1
; GCN: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc
; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc
; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
$sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc
$sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc
S_SETPC_B64_return $sgpr30_sgpr31
...
---
name: hazard_callee2
body: |
bb.0:
; GCN-LABEL: name: hazard_callee2
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc
$sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc
S_SETPC_B64_return $sgpr30_sgpr31
...
---
name: hazard_cull_vmem
body: |
bb.0:
; GCN-LABEL: name: hazard_cull_vmem
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
S_WAIT_LOADCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_cull_vmem2
body: |
bb.0:
; GCN-LABEL: name: hazard_cull_vmem2
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr2, 0, implicit $exec
; GCN-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
S_WAIT_LOADCNT 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr2, 0, implicit $exec
$vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec
S_WAIT_LOADCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_cull_sample
body: |
bb.0:
; GCN-LABEL: name: hazard_cull_sample
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAIT_SAMPLECNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
S_WAIT_SAMPLECNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_cull_bvh
body: |
bb.0:
; GCN-LABEL: name: hazard_cull_bvh
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAIT_BVHCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_WAIT_BVHCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_nocull_scratch
body: |
bb.0:
; GCN-LABEL: name: hazard_nocull_scratch
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
S_WAIT_LOADCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_cull_global
body: |
bb.0:
; GCN-LABEL: name: hazard_cull_global
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: DS_NOP implicit $m0, implicit $exec
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
S_WAIT_LOADCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...
---
name: hazard_nocull_flat
body: |
bb.0:
; GCN-LABEL: name: hazard_nocull_flat
; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_WAIT_LOADCNT 0
; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64
; GCN-NEXT: S_WAITCNT_DEPCTR 65534
; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
$vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAIT_LOADCNT 0
$sgpr0_sgpr1 = S_GETPC_B64
$sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
S_ENDPGM 0
...