diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 96cb5ae79534..1f291ce5c534 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1200,6 +1200,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixShift64HighRegBug(MI); fixVALUMaskWriteHazard(MI); fixRequiredExportPriority(MI); + if (ST.requiresWaitIdleBeforeGetReg()) + fixGetRegWaitIdle(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3428,3 +3430,24 @@ bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) { return true; } + +bool GCNHazardRecognizer::fixGetRegWaitIdle(MachineInstr *MI) { + if (!isSGetReg(MI->getOpcode())) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + switch (getHWReg(TII, *MI)) { + default: + return false; + case AMDGPU::Hwreg::ID_STATUS: + case AMDGPU::Hwreg::ID_STATE_PRIV: + case AMDGPU::Hwreg::ID_EXCP_FLAG_PRIV: + case AMDGPU::Hwreg::ID_EXCP_FLAG_USER: + break; + } + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(0); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index f796eeaebea8..a078f50219c3 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -110,6 +110,7 @@ private: bool fixShift64HighRegBug(MachineInstr *MI); bool fixVALUMaskWriteHazard(MachineInstr *MI); bool fixRequiredExportPriority(MachineInstr *MI); + bool fixGetRegWaitIdle(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 2ad83c90dc20..92de024cc6fc 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1801,6 +1801,10 @@ public: // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)". bool requiresNopBeforeDeallocVGPRs() const { return !GFX1250Insts; } + // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on + // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER. + bool requiresWaitIdleBeforeGetReg() const { return GFX1250Insts; } + bool isDynamicVGPREnabled() const { return DynamicVGPR; } unsigned getDynamicVGPRBlockSize() const { return DynamicVGPRBlockSize32 ? 32 : 16; diff --git a/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir b/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir new file mode 100644 index 000000000000..213fba9eb115 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir @@ -0,0 +1,91 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: s_getreg_mode +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_mode + ; GCN: $sgpr0 = S_GETREG_B32 1, implicit $mode + $sgpr0 = S_GETREG_B32 1, implicit $mode +... + +--- +name: s_getreg_status +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_status + ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode + $sgpr0 = S_GETREG_B32 2, implicit $mode +... + +--- +name: s_getreg_status_masked +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_status_masked + ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 66, implicit $mode + $sgpr0 = S_GETREG_B32 66, implicit $mode +... + +--- +name: s_getreg_state_priv +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_state_priv + ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 4, implicit $mode + $sgpr0 = S_GETREG_B32 4, implicit $mode +... + +--- +name: s_getreg_excp_flag_priv +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_excp_flag_priv + ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 17, implicit $mode + $sgpr0 = S_GETREG_B32 17, implicit $mode +... + +--- +name: s_getreg_excp_flag_user +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_excp_flag_user + ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 18, implicit $mode + $sgpr0 = S_GETREG_B32 18, implicit $mode +... + +--- +name: s_getreg_status_in_bundle +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_status_in_bundle + ; GCN: BUNDLE { + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode + ; GCN-NEXT: } + BUNDLE { + S_NOP 0 + $sgpr0 = S_GETREG_B32 2, implicit $mode + } +... + +--- +name: s_getreg_status_top_of_bundle +body: | + bb.0: + ; GCN-LABEL: name: s_getreg_status_top_of_bundle + ; GCN: BUNDLE { + ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode + ; GCN-NEXT: } + BUNDLE { + $sgpr0 = S_GETREG_B32 2, implicit $mode + } +...