Some of the MIR test hit a bug where it errors if there is a raw global reference as the referenced value. Worked around some of those by just keeping a no-op bitcast constant expression.
1267 lines
57 KiB
YAML
1267 lines
57 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s
|
|
|
|
---
|
|
name: vmem_scratch_load
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0
|
|
; GCN-LABEL: name: vmem_scratch_load
|
|
; GCN: liveins: $vgpr0
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
|
|
$vgpr1 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_buffer_load_dword_offset
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-LABEL: name: vmem_buffer_load_dword_offset
|
|
; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
|
|
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr2, killed $vgpr2, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_buffer_load_addr
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-LABEL: name: vmem_buffer_load_addr
|
|
; GCN: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
$vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
|
|
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr1 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_flat_load
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1
|
|
; GCN-LABEL: name: vmem_flat_load
|
|
; GCN: liveins: $vgpr0, $vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_global_load
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1
|
|
; GCN-LABEL: name: vmem_global_load
|
|
; GCN: liveins: $vgpr0, $vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
$vgpr2 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec:: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1)
|
|
$vgpr3 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr0 = nofpexcept V_ADD_F32_e32 killed $vgpr3, killed $vgpr4, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_global_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
|
; GCN-LABEL: name: vmem_global_store
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_buffer_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1
|
|
; GCN-LABEL: name: vmem_buffer_store
|
|
; GCN: liveins: $vgpr0, $vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
|
|
$vgpr0 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_scratch_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GCN-LABEL: name: vmem_scratch_store
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_LSHLREV_B32_e64 16, $vgpr2, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: smem_load
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr2_sgpr3
|
|
; GCN-LABEL: name: smem_load
|
|
; GCN: liveins: $sgpr2_sgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
|
|
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 0, 0 :: (load (s64), addrspace 4)
|
|
$sgpr2 = S_MOV_B32 0
|
|
...
|
|
|
|
---
|
|
name: smem_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr2, $sgpr3
|
|
; GCN-LABEL: name: smem_store
|
|
; GCN: liveins: $sgpr0, $sgpr2, $sgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $sgpr3 = S_MOV_B32 0
|
|
S_STORE_DWORD_IMM $sgpr0, $sgpr2_sgpr3, 0, 0
|
|
$sgpr3 = S_MOV_B32 0
|
|
...
|
|
|
|
# 4 global_load instructions together form a load-group.
|
|
|
|
---
|
|
name: vmem_load_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-LABEL: name: vmem_load_group
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 2
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
...
|
|
|
|
# The contiguous stores form a single group.
|
|
|
|
---
|
|
name: vmem_store_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-LABEL: name: vmem_store_group
|
|
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: smem_load_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1
|
|
; GCN-LABEL: name: smem_load_group
|
|
; GCN: liveins: $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
|
|
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
$sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
$sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
$sgpr2 = S_MOV_B32 0
|
|
...
|
|
|
|
---
|
|
name: smem_store_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
|
|
; GCN-LABEL: name: smem_store_group
|
|
; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 0
|
|
; GCN-NEXT: $sgpr3 = S_MOV_B32 0
|
|
S_STORE_DWORD_IMM $sgpr2, $sgpr0_sgpr1, 0, 0
|
|
S_STORE_DWORD_IMM $sgpr3, $sgpr0_sgpr1, 0, 0
|
|
S_STORE_DWORD_IMM $sgpr4, $sgpr0_sgpr1, 0, 0
|
|
S_STORE_DWORD_IMM $sgpr5, $sgpr0_sgpr1, 0, 0
|
|
$sgpr2 = S_MOV_B32 0
|
|
$sgpr3 = S_MOV_B32 0
|
|
...
|
|
|
|
# The four global_load instructions form two separate groups due to the interveing s_nop.
|
|
|
|
---
|
|
name: vmem_loads_with_an_intervening_nop
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-LABEL: name: vmem_loads_with_an_intervening_nop
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
; GCN-NEXT: S_NOP 0
|
|
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 2
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
S_NOP 0
|
|
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_contiguous_loads_with_an_intervening_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-LABEL: name: vmem_contiguous_loads_with_an_intervening_store
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 1
|
|
; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 2
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 32, 0, implicit $exec
|
|
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: vmem_stores_with_intervening_nop
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-LABEL: name: vmem_stores_with_intervening_nop
|
|
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_NOP 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
S_NOP 0
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
...
|
|
|
|
# The intervening load breaks the store group and form two distict store groups.
|
|
|
|
---
|
|
name: vmem_contiguous_stores_with_an_intervening_load
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-LABEL: name: vmem_contiguous_stores_with_an_intervening_load
|
|
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
$vgpr11 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
$vgpr10 = V_LSHLREV_B32_e64 16, $vgpr6, implicit $exec
|
|
...
|
|
|
|
# Atomic operations should not form a group. But they are memory instructions and should increment
|
|
# the xcnt counter value as they might cause register dependnecy. This test ensures S_WAIT_XCNT
|
|
# insertion for such cases.
|
|
|
|
---
|
|
name: atomic_op
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-LABEL: name: atomic_op
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
|
|
; GCN-NEXT: GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAIT_XCNT 2
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 1
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2 = BUFFER_ATOMIC_ADD_ADDR64_RTN $vgpr2, $vgpr0_vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
|
|
GLOBAL_ATOMIC_ADD_F32 $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, implicit $exec :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
|
|
$vgpr6 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
...
|
|
|
|
# Force insert S_WAIT_XCNT 0 for dependency in SMEM instruction even though
|
|
# there is a pending VMEM dependency.
|
|
|
|
---
|
|
name: smem_xcnt_insertion_with_pending_vmem_event
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0_sgpr1
|
|
; GCN-LABEL: name: smem_xcnt_insertion_with_pending_vmem_event
|
|
; GCN: liveins: $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec
|
|
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 4, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec
|
|
$sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 0, 0 :: (load (s64), addrspace 4)
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr1 = V_MOV_B32_e32 2, implicit $exec
|
|
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr5 = V_MOV_B32_e32 4, implicit $exec
|
|
GLOBAL_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 16, 0, implicit $exec
|
|
$sgpr2 = S_ADD_I32 $sgpr0, 100, implicit-def $scc
|
|
$vgpr0 = V_MOV_B32_e32 20, implicit $exec
|
|
...
|
|
|
|
# The second instruction in the flat_load group has a WAR dependency with a prior
|
|
# memory opeartion (scratch_load instruction).
|
|
|
|
---
|
|
name: vmem_group_reg_dependency_with_prior_instruction
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr4, $vgpr5
|
|
; GCN-LABEL: name: vmem_group_reg_dependency_with_prior_instruction
|
|
; GCN: liveins: $vgpr4, $vgpr5
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr3 = V_MOV_B32_e32 1, implicit $exec
|
|
...
|
|
|
|
# Two instructions inside the load group have dependencies with prior instructions.
|
|
|
|
---
|
|
name: multiple_xcnt_insertion_in_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr3, $vgpr4, $vgpr5
|
|
; GCN-LABEL: name: multiple_xcnt_insertion_in_group
|
|
; GCN: liveins: $vgpr3, $vgpr4, $vgpr5
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
$vgpr2 = SCRATCH_LOAD_DWORD $vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr6 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst (s32), addrspace 1)
|
|
$vgpr8 = V_MOV_B32_e32 1, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: xcnt_event_post_load_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-LABEL: name: xcnt_event_post_load_group
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 3
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 2
|
|
; GCN-NEXT: $vgpr6 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 1
|
|
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2_vgpr3 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 8, 0, implicit $exec
|
|
$vgpr6_vgpr7 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 16, 0, implicit $exec
|
|
$vgpr8_vgpr9 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 24, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
$vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr6 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2 = V_LSHLREV_B32_e64 16, $vgpr4, implicit $exec
|
|
...
|
|
|
|
# The three V_MOV_B32 instructions waiting outside the group needs appropriate wait_xcnt
|
|
# insertion as their dst registers have dependencies with instructions inside the group.
|
|
|
|
---
|
|
name: xcnt_event_post_store_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-LABEL: name: xcnt_event_post_store_group
|
|
; GCN: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 8
|
|
; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 6
|
|
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 4
|
|
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 2, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr7, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr8, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr9, 0, 0, implicit $exec
|
|
$vgpr11 = SCRATCH_LOAD_DWORD $vgpr10, 0, 0, implicit $exec, implicit $flat_scr
|
|
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr5 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr7 = V_MOV_B32_e32 2, implicit $exec
|
|
$vgpr11 = V_LSHLREV_B32_e64 16, $vgpr10, implicit $exec
|
|
...
|
|
|
|
# This test captures the case that interleaving load store operations form separate groups.
|
|
# The registers in V_MOV_B32 are all have dependency with these independent groups and
|
|
# should have the wait_xcnt insertion with appropriate wait values.
|
|
|
|
---
|
|
name: load_store_switching
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
|
; GCN-LABEL: name: load_store_switching
|
|
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: $vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 1
|
|
; GCN-NEXT: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 2
|
|
; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 2, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 3, implicit $exec
|
|
$vgpr0 = SCRATCH_LOAD_DWORD $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1 = V_LSHLREV_B32_e64 16, $vgpr1, implicit $exec
|
|
$vgpr7 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr2_vgpr3, $vgpr4, 0, 0, implicit $exec
|
|
$vgpr8 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD killed $vgpr2_vgpr3, $vgpr5, 0, 0, implicit $exec
|
|
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr4 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr8 = V_MOV_B32_e32 2, implicit $exec
|
|
$vgpr5 = V_MOV_B32_e32 3, implicit $exec
|
|
...
|
|
|
|
# V_DUAL_MOV is a single instruction and should emit required xcnt
|
|
# if the destination registers have any memory-op dependency.
|
|
|
|
---
|
|
name: dual_mov
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr0, $sgpr1, $vgpr1
|
|
; GCN-LABEL: name: dual_mov
|
|
; GCN: liveins: $sgpr0, $sgpr1, $vgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr2 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
|
$vgpr1, $vgpr2 = V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 killed $sgpr0, killed $sgpr1, implicit $exec, implicit $exec, implicit $exec, implicit $exec, implicit $exec
|
|
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
|
...
|
|
|
|
# No xcnt wait insertion for DS load/store operations.
|
|
|
|
---
|
|
name: ds_load_store
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: ds_load_store
|
|
; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
; GCN-NEXT: $vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) undef`, addrspace 3)
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 2, implicit $exec
|
|
; GCN-NEXT: S_WAIT_DSCNT 0
|
|
; GCN-NEXT: DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(3) undef`, addrspace 3)
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 20, implicit $exec
|
|
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
|
|
$vgpr0 = DS_READ_B32_gfx9 killed $vgpr1, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) undef`)
|
|
$vgpr1 = V_MOV_B32_e32 2, implicit $exec
|
|
DS_WRITE_B32_gfx9 killed $vgpr0, killed $vgpr1, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(3) undef`)
|
|
$vgpr0 = V_MOV_B32_e32 20, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: xcnt_max
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
|
; GCN-LABEL: name: xcnt_max
|
|
; GCN: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 62
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr3, 0, 0, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 1, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: wait_kmcnt_with_outstanding_vmem
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-LABEL: name: wait_kmcnt_with_outstanding_vmem
|
|
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: wait_kmcnt_with_outstanding_vmem_2
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: wait_kmcnt_with_outstanding_vmem_2
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $sgpr2
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr2
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: wait_kmcnt_and_wait_loadcnt
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: wait_kmcnt_and_wait_loadcnt
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $sgpr2
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr2
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: implicit_handling_of_pending_vmem_group
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: implicit_handling_of_pending_vmem_group
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $sgpr2
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr0_sgpr1, $sgpr2
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
$sgpr0 = S_MOV_B32 $sgpr0
|
|
...
|
|
|
|
---
|
|
name: mixed_pending_events
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: mixed_pending_events
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr2, $vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_LOADCNT 1
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $sgpr2
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
|
|
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr2, $vgpr2
|
|
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: pending_vmem_event_between_block
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: pending_vmem_event_between_block
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_KMCNT 0
|
|
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
|
|
; GCN-NEXT: S_WAIT_XCNT 1
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
|
|
$vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
|
|
$sgpr2 = S_MOV_B32 $sgpr2
|
|
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
$sgpr0 = S_MOV_B32 $sgpr0
|
|
...
|
|
|
|
---
|
|
name: flushing_vmem_cnt_on_block_entry
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GCN-LABEL: name: flushing_vmem_cnt_on_block_entry
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
bb.1:
|
|
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr2
|
|
$vgpr4 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$vgpr5 = GLOBAL_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec
|
|
bb.2:
|
|
liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
|
|
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
|
|
$sgpr0 = S_MOV_B32 $sgpr0
|
|
...
|
|
|
|
---
|
|
name: wait_loadcnt_with_outstanding_smem
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-LABEL: name: wait_loadcnt_with_outstanding_smem
|
|
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: S_WAIT_LOADCNT 0
|
|
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
|
|
; GCN-NEXT: S_WAIT_XCNT 0
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
...
|
|
|
|
---
|
|
name: overwrite_vgpr_after_smem
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-LABEL: name: overwrite_vgpr_after_smem
|
|
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: overwrite_sgpr_after_vmem
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-LABEL: name: overwrite_sgpr_after_vmem
|
|
; GCN: liveins: $vgpr0_vgpr1, $sgpr0_sgpr1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
; GCN-NEXT: $sgpr0 = S_MOV_B32 0
|
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
|
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
|
|
$sgpr0 = S_MOV_B32 0
|
|
...
|