[AMDGPU] Handle S_GETREG_B32 hazard on gfx1250 (#153848)

GFX1250 SPG says: S_GETREG_B32 does not wait for idle before executing.
The user must S_WAIT_ALU 0 before S_GETREG_B32 on:
STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
This commit is contained in:
Stanislav Mekhanoshin 2025-08-15 11:38:22 -07:00 committed by GitHub
parent 3a4a60deff
commit 29976f2e58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 119 additions and 0 deletions

View File

@ -1200,6 +1200,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
fixRequiredExportPriority(MI);
if (ST.requiresWaitIdleBeforeGetReg())
fixGetRegWaitIdle(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@ -3428,3 +3430,24 @@ bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
return true;
}
bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) {
if (!isSGetReg(MI->getOpcode()))
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
switch (getHWReg(TII, *MI)) {
default:
return false;
case AMDGPU::Hwreg::ID_STATUS:
case AMDGPU::Hwreg::ID_STATE_PRIV:
case AMDGPU::Hwreg::ID_EXCP_FLAG_PRIV:
case AMDGPU::Hwreg::ID_EXCP_FLAG_USER:
break;
}
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
.addImm(0);
return true;
}

View File

@ -110,6 +110,7 @@ private:
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
bool fixRequiredExportPriority(MachineInstr *MI);
bool fixGetRegWaitIdle(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);

View File

@ -1801,6 +1801,10 @@ public:
// instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
bool requiresNopBeforeDeallocVGPRs() const { return !GFX1250Insts; }
// \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on
// STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.
bool requiresWaitIdleBeforeGetReg() const { return GFX1250Insts; }
bool isDynamicVGPREnabled() const { return DynamicVGPR; }
unsigned getDynamicVGPRBlockSize() const {
return DynamicVGPRBlockSize32 ? 32 : 16;

View File

@ -0,0 +1,91 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec -o - %s | FileCheck --check-prefix=GCN %s
---
name: s_getreg_mode
body: |
bb.0:
; GCN-LABEL: name: s_getreg_mode
; GCN: $sgpr0 = S_GETREG_B32 1, implicit $mode
$sgpr0 = S_GETREG_B32 1, implicit $mode
...
---
name: s_getreg_status
body: |
bb.0:
; GCN-LABEL: name: s_getreg_status
; GCN: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
$sgpr0 = S_GETREG_B32 2, implicit $mode
...
---
name: s_getreg_status_masked
body: |
bb.0:
; GCN-LABEL: name: s_getreg_status_masked
; GCN: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 66, implicit $mode
$sgpr0 = S_GETREG_B32 66, implicit $mode
...
---
name: s_getreg_state_priv
body: |
bb.0:
; GCN-LABEL: name: s_getreg_state_priv
; GCN: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 4, implicit $mode
$sgpr0 = S_GETREG_B32 4, implicit $mode
...
---
name: s_getreg_excp_flag_priv
body: |
bb.0:
; GCN-LABEL: name: s_getreg_excp_flag_priv
; GCN: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 17, implicit $mode
$sgpr0 = S_GETREG_B32 17, implicit $mode
...
---
name: s_getreg_excp_flag_user
body: |
bb.0:
; GCN-LABEL: name: s_getreg_excp_flag_user
; GCN: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 18, implicit $mode
$sgpr0 = S_GETREG_B32 18, implicit $mode
...
---
name: s_getreg_status_in_bundle
body: |
bb.0:
; GCN-LABEL: name: s_getreg_status_in_bundle
; GCN: BUNDLE {
; GCN-NEXT: S_NOP 0
; GCN-NEXT: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
; GCN-NEXT: }
BUNDLE {
S_NOP 0
$sgpr0 = S_GETREG_B32 2, implicit $mode
}
...
---
name: s_getreg_status_top_of_bundle
body: |
bb.0:
; GCN-LABEL: name: s_getreg_status_top_of_bundle
; GCN: BUNDLE {
; GCN-NEXT: S_WAITCNT_DEPCTR 0
; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode
; GCN-NEXT: }
BUNDLE {
$sgpr0 = S_GETREG_B32 2, implicit $mode
}
...