[AMDGPU] Remove one case of vmcnt loop header flushing for GFX12 (#105550)

When a loop contains a VMEM load whose result is only used outside the
loop, do not bother to flush vmcnt in the loop head on GFX12. A wait for
vmcnt will be required inside the loop anyway, because VMEM instructions
can write their VGPR results out of order.
This commit is contained in:
Jay Foad 2024-08-23 10:31:33 +01:00 committed by GitHub
parent 96509bb98f
commit fa2dccb377
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 6 deletions

View File

@ -2390,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
} }
if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside) if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside)
return true; return true;
return HasVMemLoad && UsesVgprLoadedOutside; return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder();
} }
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {

View File

@ -295,7 +295,7 @@ body: |
# GFX12-LABEL: waitcnt_vm_loop2 # GFX12-LABEL: waitcnt_vm_loop2
# GFX12-LABEL: bb.0: # GFX12-LABEL: bb.0:
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
# GFX12: S_WAIT_LOADCNT 0 # GFX12-NOT: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.1: # GFX12-LABEL: bb.1:
# GFX12: S_WAIT_LOADCNT 0 # GFX12: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.2: # GFX12-LABEL: bb.2:
@ -342,7 +342,7 @@ body: |
# GFX12-LABEL: waitcnt_vm_loop2_store # GFX12-LABEL: waitcnt_vm_loop2_store
# GFX12-LABEL: bb.0: # GFX12-LABEL: bb.0:
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
# GFX12: S_WAIT_LOADCNT 0 # GFX12-NOT: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.1: # GFX12-LABEL: bb.1:
# GFX12: S_WAIT_LOADCNT 0 # GFX12: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.2: # GFX12-LABEL: bb.2:
@ -499,9 +499,9 @@ body: |
# GFX12-LABEL: waitcnt_vm_loop2_reginterval # GFX12-LABEL: waitcnt_vm_loop2_reginterval
# GFX12-LABEL: bb.0: # GFX12-LABEL: bb.0:
# GFX12: GLOBAL_LOAD_DWORDX4 # GFX12: GLOBAL_LOAD_DWORDX4
# GFX12: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.1:
# GFX12-NOT: S_WAIT_LOADCNT 0 # GFX12-NOT: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.1:
# GFX12: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.2: # GFX12-LABEL: bb.2:
name: waitcnt_vm_loop2_reginterval name: waitcnt_vm_loop2_reginterval
body: | body: |
@ -600,7 +600,7 @@ body: |
# GFX12-LABEL: bb.0: # GFX12-LABEL: bb.0:
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN # GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
# GFX12: S_WAIT_LOADCNT 0 # GFX12-NOT: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.1: # GFX12-LABEL: bb.1:
# GFX12: S_WAIT_LOADCNT 0 # GFX12: S_WAIT_LOADCNT 0
# GFX12-LABEL: bb.2: # GFX12-LABEL: bb.2: