[AMDGPU] Remove one case of vmcnt loop header flushing for GFX12 (#105550)
When a loop contains a VMEM load whose result is only used outside the loop, do not bother to flush vmcnt in the loop head on GFX12. A wait for vmcnt will be required inside the loop anyway, because VMEM instructions can write their VGPR results out of order.
This commit is contained in:
parent
96509bb98f
commit
fa2dccb377
@ -2390,7 +2390,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
|
|||||||
}
|
}
|
||||||
if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside)
|
if (!ST->hasVscnt() && HasVMemStore && !HasVMemLoad && UsesVgprLoadedOutside)
|
||||||
return true;
|
return true;
|
||||||
return HasVMemLoad && UsesVgprLoadedOutside;
|
return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
@ -295,7 +295,7 @@ body: |
|
|||||||
# GFX12-LABEL: waitcnt_vm_loop2
|
# GFX12-LABEL: waitcnt_vm_loop2
|
||||||
# GFX12-LABEL: bb.0:
|
# GFX12-LABEL: bb.0:
|
||||||
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12-NOT: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.1:
|
# GFX12-LABEL: bb.1:
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.2:
|
# GFX12-LABEL: bb.2:
|
||||||
@ -342,7 +342,7 @@ body: |
|
|||||||
# GFX12-LABEL: waitcnt_vm_loop2_store
|
# GFX12-LABEL: waitcnt_vm_loop2_store
|
||||||
# GFX12-LABEL: bb.0:
|
# GFX12-LABEL: bb.0:
|
||||||
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12-NOT: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.1:
|
# GFX12-LABEL: bb.1:
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.2:
|
# GFX12-LABEL: bb.2:
|
||||||
@ -499,9 +499,9 @@ body: |
|
|||||||
# GFX12-LABEL: waitcnt_vm_loop2_reginterval
|
# GFX12-LABEL: waitcnt_vm_loop2_reginterval
|
||||||
# GFX12-LABEL: bb.0:
|
# GFX12-LABEL: bb.0:
|
||||||
# GFX12: GLOBAL_LOAD_DWORDX4
|
# GFX12: GLOBAL_LOAD_DWORDX4
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
|
||||||
# GFX12-LABEL: bb.1:
|
|
||||||
# GFX12-NOT: S_WAIT_LOADCNT 0
|
# GFX12-NOT: S_WAIT_LOADCNT 0
|
||||||
|
# GFX12-LABEL: bb.1:
|
||||||
|
# GFX12: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.2:
|
# GFX12-LABEL: bb.2:
|
||||||
name: waitcnt_vm_loop2_reginterval
|
name: waitcnt_vm_loop2_reginterval
|
||||||
body: |
|
body: |
|
||||||
@ -600,7 +600,7 @@ body: |
|
|||||||
# GFX12-LABEL: bb.0:
|
# GFX12-LABEL: bb.0:
|
||||||
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
||||||
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
# GFX12: BUFFER_LOAD_FORMAT_X_IDXEN
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12-NOT: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.1:
|
# GFX12-LABEL: bb.1:
|
||||||
# GFX12: S_WAIT_LOADCNT 0
|
# GFX12: S_WAIT_LOADCNT 0
|
||||||
# GFX12-LABEL: bb.2:
|
# GFX12-LABEL: bb.2:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user