From 1658456ccf2c74c7ddaba49590b7a31c4fdc4d33 Mon Sep 17 00:00:00 2001 From: vporpo Date: Sat, 31 Jan 2026 10:46:59 -0800 Subject: [PATCH] [AMDGPU] Introduce custom MIR formatting for s_wait_alu (#176316) This patch implements a custom printer/parser for the immediate operand of s_wait_alu that prints/parses the decoded counter values. Format: ``` .___ ``` Example: `s_wait_alu .VaVdst_1_VmVsrc_1` ; Which is equivalent to this: `s_wait_alu 8167` Features: - If a counter is at its maximum value it won't get printed. - The parser will error out if a counter is greater or equal to its max value. - If all counters are disabled we can use 'AllOff'. - For now we also accept numeric values for backwards compatibility with older MIR. Note: This is similar to https://github.com/llvm/llvm-project/pull/96004 but for `s_wait_alu`. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 133 ++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h | 11 +- .../Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 8 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 + llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 33 ++++- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 20 ++- .../AMDGPU/expert_scheduling_gfx12.mir | 85 ++++++----- .../CodeGen/AMDGPU/hazard-getreg-waitalu.mir | 14 +- llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 40 +++--- .../AMDGPU/lds-direct-hazards-gfx11.mir | 10 +- .../AMDGPU/lds-direct-hazards-gfx12.mir | 4 +- .../AMDGPU/merge-consecutive-wait-alus.mir | 12 +- .../AMDGPU/partial-forwarding-hazards.mir | 14 +- .../AMDGPU/trans-forwarding-hazards.mir | 24 ++-- .../AMDGPU/valu-mask-write-hazard-true16.mir | 2 +- .../CodeGen/AMDGPU/valu-mask-write-hazard.mir | 128 ++++++++--------- .../AMDGPU/valu-read-sgpr-hazard-attrs.mir | 14 +- .../CodeGen/AMDGPU/valu-read-sgpr-hazard.mir | 86 +++++------ .../CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir | 18 +-- .../CodeGen/AMDGPU/vmem-to-salu-hazard.mir | 38 ++--- .../CodeGen/MIR/AMDGPU/s_wait_alu-errors.mir | 71 ++++++++++ llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu.mir | 67 +++++++++ 23 files changed, 583 insertions(+), 261 deletions(-) create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu-errors.mir create mode 100644 llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 75e3d8c426e7..a541a266006c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -13,13 +13,61 @@ #include "AMDGPUMIRFormatter.h" #include "SIMachineFunctionInfo.h" +#include "llvm/TargetParser/TargetParser.h" using namespace llvm; +const char SWaitAluImmPrefix = '.'; +StringLiteral SWaitAluDelim = "_"; + +StringLiteral VaVdstName = "VaVdst"; +StringLiteral VaSdstName = "VaSdst"; +StringLiteral VaSsrcName = "VaSsrc"; +StringLiteral HoldCntName = "HoldCnt"; +StringLiteral VmVsrcName = "VmVsrc"; +StringLiteral VaVccName = "VaVcc"; +StringLiteral SaSdstName = "SaSdst"; + +StringLiteral AllOff = "AllOff"; + +void AMDGPUMIRFormatter::printSWaitAluImm(uint64_t Imm, raw_ostream &OS) const { + bool NonePrinted = true; + ListSeparator Delim(SWaitAluDelim); + auto PrintFieldIfNotMax = [&](StringRef Descr, uint64_t Num, unsigned Max) { + if (Num != Max) { + OS << Delim << Descr << SWaitAluDelim << Num; + NonePrinted = false; + } + }; + OS << SWaitAluImmPrefix; + PrintFieldIfNotMax(VaVdstName, AMDGPU::DepCtr::decodeFieldVaVdst(Imm), + AMDGPU::DepCtr::getVaVdstBitMask()); + PrintFieldIfNotMax(VaSdstName, AMDGPU::DepCtr::decodeFieldVaSdst(Imm), + AMDGPU::DepCtr::getVaSdstBitMask()); + PrintFieldIfNotMax(VaSsrcName, AMDGPU::DepCtr::decodeFieldVaSsrc(Imm), + AMDGPU::DepCtr::getVaSsrcBitMask()); + PrintFieldIfNotMax( + HoldCntName, + AMDGPU::DepCtr::decodeFieldHoldCnt(Imm, + AMDGPU::getIsaVersion(STI.getCPU())), + AMDGPU::DepCtr::getHoldCntBitMask(AMDGPU::getIsaVersion(STI.getCPU()))); + PrintFieldIfNotMax(VmVsrcName, AMDGPU::DepCtr::decodeFieldVmVsrc(Imm), + AMDGPU::DepCtr::getVmVsrcBitMask()); + PrintFieldIfNotMax(VaVccName, AMDGPU::DepCtr::decodeFieldVaVcc(Imm), + AMDGPU::DepCtr::getVaVccBitMask()); + PrintFieldIfNotMax(SaSdstName, AMDGPU::DepCtr::decodeFieldSaSdst(Imm), + AMDGPU::DepCtr::getSaSdstBitMask()); + if (NonePrinted) + OS << AllOff; +} + void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, std::optional OpIdx, int64_t Imm) const { switch (MI.getOpcode()) { + case AMDGPU::S_WAITCNT_DEPCTR: + printSWaitAluImm(Imm, OS); + break; case AMDGPU::S_DELAY_ALU: assert(OpIdx == 0); printSDelayAluImm(Imm, OS); @@ -39,6 +87,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, { switch (OpCode) { + case AMDGPU::S_WAITCNT_DEPCTR: + return parseSWaitAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); case AMDGPU::S_DELAY_ALU: return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); default: @@ -90,6 +140,89 @@ void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm, Outdep(Id1); } +bool AMDGPUMIRFormatter::parseSWaitAluImmMnemonic( + const unsigned int OpIdx, int64_t &Imm, StringRef &Src, + MIRFormatter::ErrorCallbackType &ErrorCallback) const { + // TODO: For now accept integer masks for compatibility with old MIR. + if (!Src.consumeInteger(10, Imm)) + return false; + + // Initialize with all checks off. + Imm = AMDGPU::DepCtr::getDefaultDepCtrEncoding(STI); + // The input is in the form: .Name1_Num1_Name2_Num2 + // Drop the '.' prefix. + bool ConsumePrefix = Src.consume_front(SWaitAluImmPrefix); + if (!ConsumePrefix) + return ErrorCallback(Src.begin(), "expected prefix"); + if (Src.empty()) + return ErrorCallback(Src.begin(), "expected _"); + + // Special case for all off. + if (Src == AllOff) + return false; + + // Parse a counter name, number pair in each iteration. + while (!Src.empty()) { + // Src: Name1_Num1_Name2_Num2 + // ^ + size_t DelimIdx = Src.find(SWaitAluDelim); + if (DelimIdx == StringRef::npos) + return ErrorCallback(Src.begin(), "expected _"); + // Src: Name1_Num1_Name2_Num2 + // ^^^^^ + StringRef Name = Src.substr(0, DelimIdx); + // Save the position of the name for accurate error reporting. + StringRef::iterator NamePos = Src.begin(); + [[maybe_unused]] bool ConsumeName = Src.consume_front(Name); + assert(ConsumeName && "Expected name"); + [[maybe_unused]] bool ConsumeDelim = Src.consume_front(SWaitAluDelim); + assert(ConsumeDelim && "Expected delimiter"); + // Src: Num1_Name2_Num2 + // ^ + DelimIdx = Src.find(SWaitAluDelim); + // Src: Num1_Name2_Num2 + // ^^^^ + int64_t Num; + // Save the position of the number for accurate error reporting. + StringRef::iterator NumPos = Src.begin(); + if (Src.consumeInteger(10, Num) || Num < 0) + return ErrorCallback(NumPos, + "expected non-negative integer counter number"); + unsigned Max; + if (Name == VaVdstName) { + Max = AMDGPU::DepCtr::getVaVdstBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldVaVdst(Imm, Num); + } else if (Name == VmVsrcName) { + Max = AMDGPU::DepCtr::getVmVsrcBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldVmVsrc(Imm, Num); + } else if (Name == VaSdstName) { + Max = AMDGPU::DepCtr::getVaSdstBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldVaSdst(Imm, Num); + } else if (Name == VaSsrcName) { + Max = AMDGPU::DepCtr::getVaSsrcBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldVaSsrc(Imm, Num); + } else if (Name == HoldCntName) { + const AMDGPU::IsaVersion &Version = AMDGPU::getIsaVersion(STI.getCPU()); + Max = AMDGPU::DepCtr::getHoldCntBitMask(Version); + Imm = AMDGPU::DepCtr::encodeFieldHoldCnt(Imm, Num, Version); + } else if (Name == VaVccName) { + Max = AMDGPU::DepCtr::getVaVccBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldVaVcc(Imm, Num); + } else if (Name == SaSdstName) { + Max = AMDGPU::DepCtr::getSaSdstBitMask(); + Imm = AMDGPU::DepCtr::encodeFieldSaSdst(Imm, Num); + } else { + return ErrorCallback(NamePos, "invalid counter name"); + } + // Don't allow the values to reach their maximum value. + if (Num >= Max) + return ErrorCallback(NumPos, "counter value too large"); + // Src: Name2_Num2 + Src.consume_front(SWaitAluDelim); + } + return false; +} + bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src, llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h index 0804133faca4..dbfc645fa227 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H #define LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/CodeGen/MIRFormatter.h" namespace llvm { @@ -25,7 +26,7 @@ struct PerFunctionMIParsingState; class AMDGPUMIRFormatter final : public MIRFormatter { public: - AMDGPUMIRFormatter() = default; + explicit AMDGPUMIRFormatter(const MCSubtargetInfo &STI) : STI(STI) {} ~AMDGPUMIRFormatter() override = default; /// Implement target specific printing for machine operand immediate value, so @@ -48,9 +49,17 @@ public: ErrorCallbackType ErrorCallback) const override; private: + const MCSubtargetInfo &STI; + /// Prints the string to represent s_wait_alu immediate value. + void printSWaitAluImm(uint64_t Imm, raw_ostream &OS) const; /// Print the string to represent s_delay_alu immediate value void printSDelayAluImm(int64_t Imm, llvm::raw_ostream &OS) const; + /// Parse the immediate pseudo literal for s_wait_alu + bool parseSWaitAluImmMnemonic( + const unsigned int OpIdx, int64_t &Imm, StringRef &Src, + MIRFormatter::ErrorCallbackType &ErrorCallback) const; + /// Parse the immediate pseudo literal for s_delay_alu bool parseSDelayAluImmMnemonic( const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 7ade5c64ec3b..faef40862d71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIInstrInfo.h" #include "llvm/ADT/SetVector.h" +#include "llvm/TargetParser/TargetParser.h" using namespace llvm; @@ -182,9 +183,12 @@ public: Mask = AMDGPU::DepCtr::encodeFieldVaVdst( Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1), AMDGPU::DepCtr::decodeFieldVaVdst(Mask2))); + const AMDGPU::IsaVersion &Version = AMDGPU::getIsaVersion(ST->getCPU()); Mask = AMDGPU::DepCtr::encodeFieldHoldCnt( - Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1), - AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2))); + Mask, + std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1, Version), + AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2, Version)), + Version); Mask = AMDGPU::DepCtr::encodeFieldVaSsrc( Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1), AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2))); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index e0e0bb0c05ea..09efba485f6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -10688,6 +10688,12 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::Default; } +const MIRFormatter *SIInstrInfo::getMIRFormatter() const { + if (!Formatter) + Formatter = std::make_unique(ST); + return Formatter.get(); +} + InstructionUniformity SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 5bc9f9674e56..05cf804d08ff 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1673,11 +1673,7 @@ public: InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const; - const MIRFormatter *getMIRFormatter() const override { - if (!Formatter) - Formatter = std::make_unique(); - return Formatter.get(); - } + const MIRFormatter *getMIRFormatter() const override; static unsigned getDSShaderTypeValue(const MachineFunction &MF); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 34cdb86cf9ec..c9f84708d8b3 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -177,7 +177,13 @@ inline unsigned getVaSsrcBitWidth() { return 1; } inline unsigned getVaSsrcBitShift() { return 8; } /// \returns HoldCnt bit shift -inline unsigned getHoldCntWidth() { return 1; } +inline unsigned getHoldCntWidth(unsigned VersionMajor, unsigned VersionMinor) { + static constexpr const unsigned MinMajor = 10; + static constexpr const unsigned MinMinor = 3; + return std::tie(VersionMajor, VersionMinor) >= std::tie(MinMajor, MinMinor) + ? 1 + : 0; +} /// \returns HoldCnt bit shift inline unsigned getHoldCntBitShift() { return 7; } @@ -2074,8 +2080,20 @@ int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, unsigned getVaVdstBitMask() { return (1 << getVaVdstBitWidth()) - 1; } +unsigned getVaSdstBitMask() { return (1 << getVaSdstBitWidth()) - 1; } + +unsigned getVaSsrcBitMask() { return (1 << getVaSsrcBitWidth()) - 1; } + +unsigned getHoldCntBitMask(const IsaVersion &Version) { + return (1 << getHoldCntWidth(Version.Major, Version.Minor)) - 1; +} + unsigned getVmVsrcBitMask() { return (1 << getVmVsrcBitWidth()) - 1; } +unsigned getVaVccBitMask() { return (1 << getVaVccBitWidth()) - 1; } + +unsigned getSaSdstBitMask() { return (1 << getSaSdstBitWidth()) - 1; } + unsigned decodeFieldVmVsrc(unsigned Encoded) { return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth()); } @@ -2100,8 +2118,9 @@ unsigned decodeFieldVaSsrc(unsigned Encoded) { return unpackBits(Encoded, getVaSsrcBitShift(), getVaSsrcBitWidth()); } -unsigned decodeFieldHoldCnt(unsigned Encoded) { - return unpackBits(Encoded, getHoldCntBitShift(), getHoldCntWidth()); +unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version) { + return unpackBits(Encoded, getHoldCntBitShift(), + getHoldCntWidth(Version.Major, Version.Minor)); } unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { @@ -2158,13 +2177,15 @@ unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI) { return encodeFieldVaSsrc(Encoded, VaSsrc); } -unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt) { - return packBits(HoldCnt, Encoded, getHoldCntBitShift(), getHoldCntWidth()); +unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, + const IsaVersion &Version) { + return packBits(HoldCnt, Encoded, getHoldCntBitShift(), + getHoldCntWidth(Version.Major, Version.Minor)); } unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI) { unsigned Encoded = getDefaultDepCtrEncoding(STI); - return encodeFieldHoldCnt(Encoded, HoldCnt); + return encodeFieldHoldCnt(Encoded, HoldCnt, getIsaVersion(STI.getCPU())); } } // namespace DepCtr diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 97dfdabc1369..0ecec79d08a3 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1311,9 +1311,24 @@ bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, /// \returns Maximum VaVdst value that can be encoded. unsigned getVaVdstBitMask(); +/// \returns Maximum VaSdst value that can be encoded. +unsigned getVaSdstBitMask(); + +/// \returns Maximum VaSsrc value that can be encoded. +unsigned getVaSsrcBitMask(); + +/// \returns Maximum HoldCnt value that can be encoded. +unsigned getHoldCntBitMask(const IsaVersion &Version); + /// \returns Maximum VmVsrc value that can be encoded. unsigned getVmVsrcBitMask(); +/// \returns Maximum VaVcc value that can be encoded. +unsigned getVaVccBitMask(); + +/// \returns Maximum SaSdst value that can be encoded. +unsigned getSaSdstBitMask(); + /// \returns Decoded VaVdst from given immediate \p Encoded. unsigned decodeFieldVaVdst(unsigned Encoded); @@ -1333,7 +1348,7 @@ unsigned decodeFieldVaVcc(unsigned Encoded); unsigned decodeFieldVaSsrc(unsigned Encoded); /// \returns Decoded HoldCnt from given immediate \p Encoded. -unsigned decodeFieldHoldCnt(unsigned Encoded); +unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version); /// \returns \p VmVsrc as an encoded Depctr immediate. unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI); @@ -1369,7 +1384,8 @@ unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc); unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI); /// \returns \p Encoded combined with encoded \p HoldCnt. -unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt); +unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, + const IsaVersion &Version); /// \returns \p VaSsrc as an encoded Depctr immediate. unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI); diff --git a/llvm/test/CodeGen/AMDGPU/expert_scheduling_gfx12.mir b/llvm/test/CodeGen/AMDGPU/expert_scheduling_gfx12.mir index 932bd21ad4af..95ba7fce9683 100644 --- a/llvm/test/CodeGen/AMDGPU/expert_scheduling_gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/expert_scheduling_gfx12.mir @@ -21,7 +21,7 @@ body: | ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 1073741824, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 1056964608, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: EXP_DONE 15, $vgpr3, $vgpr2, $vgpr1, $vgpr0, 0, 0, 1, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = V_MOV_B32_e32 1082130432, implicit $exec @@ -54,11 +54,11 @@ body: | ; GCN-NEXT: $vgpr3 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec @@ -88,15 +88,15 @@ body: | ; GCN-NEXT: S_WAIT_BVHCNT 0 ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = DS_READ_B128_gfx9 $vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 3) ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 $vgpr5, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr6, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 $vgpr7, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr3 = nofpexcept V_ADD_F32_e32 $vgpr8, $vgpr3, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) @@ -128,9 +128,9 @@ body: | ; GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $exec ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = DS_READ_B128_gfx9 $vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 3) ; GCN-NEXT: S_WAIT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: DS_WRITE_B128_gfx9 $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (store (s128), addrspace 3) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_LOAD_V4_V1_gfx12 $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode @@ -166,9 +166,9 @@ body: | ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr7, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr3 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr8, implicit $mode, implicit $exec ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr5_vgpr6_vgpr7_vgpr8, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 $vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) @@ -198,16 +198,16 @@ body: | ; GCN-NEXT: S_WAIT_BVHCNT 0 ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_LOAD_V4_V1_gfx12 $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $vgpr3 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr3, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec @@ -237,15 +237,15 @@ body: | ; GCN-NEXT: S_WAIT_BVHCNT 0 ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $vgpr3 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr7, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr6, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr5, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 1065353216, $vgpr4, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1, $vgpr2, $vgpr3 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec @@ -272,7 +272,7 @@ body: | ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 8095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_UBYTE_SVS $vgpr2, $sgpr0, 0, 0, implicit $exec, implicit $flat_scr $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec @@ -293,10 +293,9 @@ body: | ; GCN-NEXT: S_WAIT_BVHCNT 0 ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65535 ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - S_WAITCNT_DEPCTR 8167 + S_WAITCNT_DEPCTR .VmVsrc_1_VaVdst_1 $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ... @@ -317,7 +316,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode @@ -341,7 +340,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0_vgpr1 = V_FRACT_F64_e32 $vgpr0_vgpr1, implicit $exec, implicit $mode ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0_vgpr1 = V_FRACT_F64_e32 $vgpr0_vgpr1, implicit $exec, implicit $mode $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode @@ -365,7 +364,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0 = V_SQRT_F32_e32 $vgpr0, implicit $exec, implicit $mode ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0 = V_SQRT_F32_e32 $vgpr0, implicit $exec, implicit $mode $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode @@ -389,7 +388,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0 = V_ADD_F32_e32 244, $vgpr0, implicit $exec, implicit $mode ; GCN-NEXT: $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 8095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0 = V_ADD_F32_e32 244, $vgpr0, implicit $exec, implicit $mode $vgpr8 = V_ADD_F32_e32 244, $vgpr11, implicit $exec, implicit $mode @@ -413,7 +412,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0_vgpr1 = V_FRACT_F64_e32 $vgpr0_vgpr1, implicit $exec, implicit $mode ; GCN-NEXT: $vgpr4_vgpr5 = V_TRUNC_F64_e32 $vgpr2_vgpr3, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 8095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0_vgpr1 = V_FRACT_F64_e32 $vgpr0_vgpr1, implicit $exec, implicit $mode $vgpr4_vgpr5 = V_TRUNC_F64_e32 $vgpr2_vgpr3, implicit $exec, implicit $mode @@ -437,7 +436,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr0 = V_SQRT_F32_e32 $vgpr0, implicit $exec, implicit $mode ; GCN-NEXT: $vgpr8 = V_LOG_F32_e32 $vgpr11, implicit $exec, implicit $mode - ; GCN-NEXT: S_WAITCNT_DEPCTR 8095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0 = V_SQRT_F32_e32 $vgpr0, implicit $exec, implicit $mode $vgpr8 = V_LOG_F32_e32 $vgpr11, implicit $exec, implicit $mode @@ -461,7 +460,7 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, $vgpr10_vgpr11_vgpr12_vgpr13, 8, $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec ; GCN-NEXT: early-clobber $vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26, 0, 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 8095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 ; GCN-NEXT: IMAGE_STORE_V4_V1_gfx12 $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8) $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, $vgpr10_vgpr11_vgpr12_vgpr13, 8, $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, implicit $exec $vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26 = V_WMMA_F32_16X16X16_F16_w32_twoaddr 8, killed $vgpr10_vgpr11_vgpr12_vgpr13, 8, killed $vgpr14_vgpr15_vgpr16_vgpr17, 8, killed $vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26, 0, 0, implicit $exec @@ -485,12 +484,12 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_LOAD_V4_V1_gfx12 $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65415 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_1 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 $vgpr8_vgpr9_vgpr10_vgpr11 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) @@ -518,12 +517,12 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: S_WAITCNT_DEPCTR 65415 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_1 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 $vgpr2, $vgpr3, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr6, $vgpr7 $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr @@ -551,12 +550,12 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = DS_READ_B128_gfx9 $vgpr0, 0, 0, implicit $exec :: (load (s128), addrspace 3) ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = DS_READ_B128_gfx9 $vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 3) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65415 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_1 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 $vgpr8_vgpr9_vgpr10_vgpr11 = DS_READ_B128_gfx9 $vgpr0, 0, 0, implicit $exec :: (load (s128), addrspace 3) @@ -584,11 +583,11 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = DS_READ_B128_gfx9 $vgpr1, 0, 0, implicit $exec :: (load (s128), addrspace 3) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0 = nofpexcept V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0, $vgpr1 $vgpr8_vgpr9_vgpr10_vgpr11 = IMAGE_LOAD_V4_V1_gfx12 $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) @@ -616,11 +615,11 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = DS_READ_B128_gfx9 $vgpr2, 0, 0, implicit $exec :: (load (s128), addrspace 3) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr1, $vgpr2 $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr @@ -648,11 +647,11 @@ body: | ; GCN-NEXT: S_WAIT_KMCNT 0 ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_LOAD_V4_V1_gfx12 $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 8) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = nofpexcept V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = nofpexcept V_ADD_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT_DSCNT 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_SETREG_IMM32_B32 0, 2074, implicit-def $mode, implicit $mode ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr1, $vgpr2 $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr @@ -679,7 +678,7 @@ body: | ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir b/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir index 213fba9eb115..911cb571ac64 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard-getreg-waitalu.mir @@ -15,7 +15,7 @@ name: s_getreg_status body: | bb.0: ; GCN-LABEL: name: s_getreg_status - ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode $sgpr0 = S_GETREG_B32 2, implicit $mode ... @@ -25,7 +25,7 @@ name: s_getreg_status_masked body: | bb.0: ; GCN-LABEL: name: s_getreg_status_masked - ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 66, implicit $mode $sgpr0 = S_GETREG_B32 66, implicit $mode ... @@ -35,7 +35,7 @@ name: s_getreg_state_priv body: | bb.0: ; GCN-LABEL: name: s_getreg_state_priv - ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 4, implicit $mode $sgpr0 = S_GETREG_B32 4, implicit $mode ... @@ -45,7 +45,7 @@ name: s_getreg_excp_flag_priv body: | bb.0: ; GCN-LABEL: name: s_getreg_excp_flag_priv - ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 17, implicit $mode $sgpr0 = S_GETREG_B32 17, implicit $mode ... @@ -55,7 +55,7 @@ name: s_getreg_excp_flag_user body: | bb.0: ; GCN-LABEL: name: s_getreg_excp_flag_user - ; GCN: S_WAITCNT_DEPCTR 0 + ; GCN: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 18, implicit $mode $sgpr0 = S_GETREG_B32 18, implicit $mode ... @@ -67,7 +67,7 @@ body: | ; GCN-LABEL: name: s_getreg_status_in_bundle ; GCN: BUNDLE { ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode ; GCN-NEXT: } BUNDLE { @@ -82,7 +82,7 @@ body: | bb.0: ; GCN-LABEL: name: s_getreg_status_top_of_bundle ; GCN: BUNDLE { - ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: $sgpr0 = S_GETREG_B32 2, implicit $mode ; GCN-NEXT: } BUNDLE { diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir index 4a5814081362..4e9656e063fd 100644 --- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -10,9 +10,9 @@ body: | ; GCN-LABEL: name: ds_atomic_async_barrier_arrive_b64 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec ... @@ -26,7 +26,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec @@ -42,7 +42,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec $sgpr103 = S_MOV_B32 0 $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec @@ -55,7 +55,7 @@ body: | bb.0: ; GCN-LABEL: name: write_s102_read_flat_scr_base ; GCN: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base $sgpr102 = S_MOV_B32 0 $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base @@ -68,7 +68,7 @@ body: | bb.0: ; GCN-LABEL: name: write_s103_read_flat_scr_base ; GCN: $sgpr103 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base $sgpr103 = S_MOV_B32 0 $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base @@ -81,7 +81,7 @@ body: | bb.0: ; GCN-LABEL: name: write_s102_s103_read_flat_scr_base ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base $sgpr102_sgpr103 = S_MOV_B64 0 $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base @@ -97,7 +97,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr1 = S_GETREG_B32 20, implicit $mode $sgpr102 = S_MOV_B32 0 $sgpr1 = S_GETREG_B32 20, implicit $mode @@ -113,7 +113,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode $sgpr103 = S_MOV_B32 0 $sgpr1 = S_GETREG_B32 21, implicit $mode @@ -126,7 +126,7 @@ body: | bb.0: ; GCN-LABEL: name: write_s102_s103_getreg_flat_scr_base_hi ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode $sgpr102_sgpr103 = S_MOV_B64 0 $sgpr1 = S_GETREG_B32 21, implicit $mode @@ -154,7 +154,7 @@ body: | ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 $sgpr0 = S_MOV_B32 0 @@ -241,7 +241,7 @@ body: | ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec $sgpr103 = S_MOV_B32 0 $sgpr0 = S_MOV_B32 0 @@ -331,7 +331,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 @@ -350,7 +350,7 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 @@ -369,9 +369,9 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61951 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 S_WAITCNT_DEPCTR 61951 @@ -389,9 +389,9 @@ body: | ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 S_WAITCNT_DEPCTR 65534 @@ -410,7 +410,7 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec ; GCN-NEXT: $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec $sgpr102 = S_MOV_B32 0 @@ -459,7 +459,7 @@ body: | ; GCN-NEXT: bb.2: ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: S_WAITCNT_DEPCTR 61854 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec bb.0: liveins: $vgpr0, $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir index 5b9c3eaf21c3..18b66a3db40e 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx11.mir @@ -183,7 +183,7 @@ body: | ; GCN-LABEL: name: lds_param_load_valu_war_trans ; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 3999 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec @@ -298,7 +298,7 @@ body: | bb.0: ; GCN-LABEL: name: lds_param_load_vmem_war ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) @@ -357,7 +357,7 @@ body: | bb.0: ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) @@ -372,8 +372,8 @@ body: | bb.0: ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2 ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65535 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65411 + ; GCN-NEXT: S_WAITCNT_DEPCTR + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) diff --git a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir index 1543b688ec23..df6b4372dacb 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/lds-direct-hazards-gfx12.mir @@ -340,7 +340,7 @@ body: | bb.0: ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 1, implicit $m0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) @@ -355,7 +355,7 @@ body: | bb.0: ; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2 ; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32)) - ; GCN-NEXT: S_WAITCNT_DEPCTR 65535 + ; GCN-NEXT: S_WAITCNT_DEPCTR ; GCN-NEXT: $vgpr1 = DS_PARAM_LOAD 0, 0, 15, 0, implicit $m0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4) diff --git a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir index 10b64b03efe8..5bbb757251c8 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-consecutive-wait-alus.mir @@ -12,7 +12,7 @@ body: | ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo - ; CHECK-NEXT: S_WAITCNT_DEPCTR 61850 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_VmVsrc_6_SaSdst_0 ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc S_WAITCNT_DEPCTR 65530 @@ -27,12 +27,12 @@ body: | ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo - ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_6_SaSdst_0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_WAITCNT_DEPCTR 61855 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo bb.0: liveins: $vgpr0 @@ -52,9 +52,9 @@ body: | bb.0: ; CHECK-LABEL: name: meta_instructions ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo - ; CHECK-NEXT: S_WAITCNT_DEPCTR 65530 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_6_SaSdst_0 ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: S_WAITCNT_DEPCTR 61855 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc S_WAITCNT_DEPCTR 65530 @@ -68,7 +68,7 @@ body: | bb.0: ; CHECK-LABEL: name: debug_instruction ; CHECK: renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc_lo - ; CHECK-NEXT: S_WAITCNT_DEPCTR 61850 + ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_VmVsrc_6_SaSdst_0 ; CHECK-NEXT: DBG_VALUE $sgpr0 ; CHECK-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, killed $vgpr0, killed $sgpr0, implicit $exec, implicit-def $vcc_lo renamable $sgpr0 = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec, implicit-def $vcc_lo, implicit-def $vcc diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir index 052459a44d34..08cb03a3e686 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir @@ -10,7 +10,7 @@ body: | ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $exec = S_MOV_B64 -1 ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -46,7 +46,7 @@ body: | ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -99,7 +99,7 @@ body: | ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -235,7 +235,7 @@ body: | ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -309,7 +309,7 @@ body: | ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -396,7 +396,7 @@ body: | ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -474,7 +474,7 @@ body: | ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; diff --git a/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir index 6c7e70f5455a..e763ced42359 100644 --- a/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir +++ b/llvm/test/CodeGen/AMDGPU/trans-forwarding-hazards.mir @@ -9,7 +9,7 @@ body: | ; GFX11-LABEL: name: trans_use_1_hazard ; GFX11: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -30,7 +30,7 @@ body: | bb.0: ; GCN-LABEL: name: trans_use_1_no_hazard_1 ; GCN: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GCN-NEXT: S_ENDPGM 0 @@ -59,7 +59,7 @@ body: | ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -107,7 +107,7 @@ body: | ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -206,7 +206,7 @@ body: | ; GFX11-LABEL: name: trans_use_4_one_depctr_1 ; GFX11: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr4, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr3, $vgpr6, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 @@ -231,7 +231,7 @@ body: | ; GFX11-LABEL: name: trans_use_4_one_depctr_2 ; GFX11: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr3 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr5 = V_ADD_F32_e32 $vgpr3, $vgpr4, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr7 = V_ADD_F32_e32 $vgpr1, $vgpr6, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 @@ -257,7 +257,7 @@ body: | ; GFX11: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr10 = V_SQRT_F32_e32 $vgpr11, implicit $mode, implicit $exec ; GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr3 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -296,7 +296,7 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.2: ; GFX11-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -352,12 +352,12 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: S_BRANCH %bb.2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.2: ; GFX11-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 3999 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX11-NEXT: $vgpr4 = V_ADD_F32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec ; GFX11-NEXT: S_ENDPGM 0 ; @@ -373,7 +373,7 @@ body: | ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GFX1150-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec - ; GFX1150-NEXT: S_WAITCNT_DEPCTR 4095 + ; GFX1150-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GFX1150-NEXT: S_BRANCH %bb.2 ; GFX1150-NEXT: {{ $}} ; GFX1150-NEXT: bb.2: @@ -402,7 +402,7 @@ body: | ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr1 = V_SQRT_F32_e32 $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 4095 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard-true16.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard-true16.mir index 4d74ccebacab..c7102e656b0d 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard-true16.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard-true16.mir @@ -8,7 +8,7 @@ body: | ; GCN-LABEL: name: mask_hazard_cndmask_t16_dpp4 ; GCN: $vgpr0_lo16 = V_CNDMASK_B16_t16_e64_dpp $vgpr0_lo16, 0, $vgpr1_lo16, 0, $vgpr2_lo16, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GCN-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: S_ENDPGM 0 $vgpr0_lo16 = V_CNDMASK_B16_t16_e64_dpp $vgpr0_lo16, 0, $vgpr1_lo16, 0, $vgpr2_lo16, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir index f659587da7e2..4ddb5f027ac4 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-mask-write-hazard.mir @@ -66,7 +66,7 @@ body: | ; GCN-LABEL: name: mask_hazard_getpc1 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec @@ -83,7 +83,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 8, implicit-def $scc ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 16, implicit-def $scc, implicit $scc ; GCN-NEXT: } @@ -104,7 +104,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_vcc1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_vcc1 @@ -123,7 +123,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_vcc2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_vcc2 @@ -142,7 +142,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_cndmask_dpp1 ; GFX11: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_cndmask_dpp1 @@ -161,7 +161,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_cndmask_dpp2 ; GFX11: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_cndmask_dpp2 @@ -180,7 +180,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_cndmask_dpp3 ; GFX11: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_cndmask_dpp3 @@ -199,7 +199,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_addc1 ; GFX11: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_addc1 @@ -218,7 +218,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_addc2 ; GFX11: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_addc2 @@ -237,7 +237,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_addc3 ; GFX11: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_addc3 @@ -256,7 +256,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_addc4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_addc4 @@ -275,7 +275,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subb1 ; GFX11: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subb1 @@ -294,7 +294,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subb2 ; GFX11: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subb2 @@ -313,7 +313,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subb3 ; GFX11: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subb3 @@ -332,7 +332,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subb4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subb4 @@ -351,7 +351,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subbrev1 ; GFX11: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subbrev1 @@ -370,7 +370,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subbrev2 ; GFX11: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subbrev2 @@ -389,7 +389,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subbrev3 ; GFX11: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subbrev3 @@ -408,7 +408,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subbrev4 ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subbrev4 @@ -427,7 +427,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_div_fmas_f32 ; GFX11: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_div_fmas_f32 @@ -446,7 +446,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_div_fmas_f64 ; GFX11: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_div_fmas_f64 @@ -466,7 +466,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subreg1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subreg1 @@ -486,7 +486,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_subreg2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subreg2 @@ -507,7 +507,7 @@ body: | ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_subreg3 @@ -529,7 +529,7 @@ body: | ; GCN-LABEL: name: mask_hazard_subreg4 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $vcc_lo = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec @@ -546,7 +546,7 @@ body: | ; GCN-LABEL: name: mask_hazard_subreg5 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $vcc_hi = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec @@ -564,7 +564,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: S_WAITCNT 0 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec @@ -584,7 +584,7 @@ body: | ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec @@ -604,7 +604,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec @@ -623,7 +623,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec @@ -683,7 +683,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $vcc_lo = S_MOV_B32 0 ; GCN-NEXT: $vcc_hi = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GCN-NEXT: $vcc = S_MOV_B64 1 ; GCN-NEXT: S_ENDPGM 0 @@ -702,7 +702,7 @@ body: | ; GCN-LABEL: name: mask_hazard_cancel_hazard2 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GCN-NEXT: $vcc = S_MOV_B64 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GCN-NEXT: $vcc = S_MOV_B64 1 ; GCN-NEXT: S_ENDPGM 0 @@ -720,7 +720,7 @@ body: | ; GCN-LABEL: name: mask_hazard_cancel_hazard3 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 ; GCN-NEXT: S_ENDPGM 0 @@ -739,7 +739,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr4 = S_MOV_B32 $sgpr0 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 ; GCN-NEXT: S_ENDPGM 0 @@ -758,16 +758,16 @@ body: | ; GFX11-LABEL: name: mask_hazard_partial_cancel1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc_lo = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GFX11-NEXT: $vcc = S_MOV_B64 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_partial_cancel1 ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc_lo = S_MOV_B32 0 - ; GFX12-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GFX12-NEXT: $vcc = S_MOV_B64 1 ; GFX12-NEXT: S_ENDPGM 0 @@ -785,16 +785,16 @@ body: | ; GFX11-LABEL: name: mask_hazard_partial_cancel2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: $vcc_hi = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GFX11-NEXT: $vcc = S_MOV_B64 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_partial_cancel2 ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX12-NEXT: $vcc_hi = S_MOV_B32 0 - ; GFX12-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX12-NEXT: $sgpr0 = S_MOV_B32 $vcc_lo ; GFX12-NEXT: $vcc = S_MOV_B64 1 ; GFX12-NEXT: S_ENDPGM 0 @@ -812,16 +812,16 @@ body: | ; GFX11-LABEL: name: mask_hazard_partial_cancel3 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr0 ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_partial_cancel3 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0 - ; GFX12-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr0 ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 ; GFX12-NEXT: S_ENDPGM 0 @@ -839,16 +839,16 @@ body: | ; GFX11-LABEL: name: mask_hazard_partial_cancel4 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: $sgpr3 = S_MOV_B32 $sgpr1 ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_partial_cancel4 ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0 - ; GFX12-NEXT: S_WAITCNT_DEPCTR 65438 + ; GFX12-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GFX12-NEXT: $sgpr3 = S_MOV_B32 $sgpr1 ; GFX12-NEXT: $sgpr0_sgpr1 = S_MOV_B64 1 ; GFX12-NEXT: S_ENDPGM 0 @@ -866,7 +866,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_valu_readlane1 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_readlane1 @@ -885,7 +885,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_valu_readlane2 ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_readlane2 @@ -905,7 +905,7 @@ body: | ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = V_READLANE_B32 $vgpr3, 0 ; GFX11-NEXT: $sgpr3 = V_READLANE_B32 $vgpr3, 1 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_readlane3 @@ -926,7 +926,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_valu_readfirstlane ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_readfirstlane @@ -945,7 +945,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_valu_vcmp_vcc ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65437 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_vcmp_vcc @@ -964,7 +964,7 @@ body: | ; GFX11-LABEL: name: mask_hazard_valu_vcmp_sgpr ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_valu_vcmp_sgpr @@ -988,7 +988,7 @@ body: | ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0 ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61852 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_VaVcc_0_SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_combine1 @@ -1020,10 +1020,10 @@ body: | ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0 - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65436 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0_SaSdst_0 ; GFX11-NEXT: $sgpr1 = S_MOV_B32 $sgpr4 ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $vgpr5, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61854 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0_SaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_combine2 @@ -1054,11 +1054,11 @@ body: | ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65437 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0 ; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_combine3 @@ -1087,12 +1087,12 @@ body: | ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65437 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0 ; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $vcc ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_combine4 @@ -1123,12 +1123,12 @@ body: | ; GFX11-NEXT: $vgpr4 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec ; GFX11-NEXT: $vgpr5 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec ; GFX11-NEXT: V_CMP_NE_U32_e32 0, $vgpr5, implicit-def $vcc, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 65437 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaVcc_0 ; GFX11-NEXT: $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 2, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: $sgpr5 = S_MOV_B32 $sgpr1 ; GFX11-NEXT: $sgpr2_sgpr3 = V_CMP_EQ_U32_e64 3, $sgpr10, implicit $exec - ; GFX11-NEXT: S_WAITCNT_DEPCTR 61855 + ; GFX11-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GFX11-NEXT: S_ENDPGM 0 ; ; GFX12-LABEL: name: mask_hazard_combine5 diff --git a/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir b/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir index 3d241b0037fd..bdff78c99485 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard-attrs.mir @@ -44,7 +44,7 @@ body: | ; GCN-LABEL: name: hazard_enable ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec @@ -83,7 +83,7 @@ body: | ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: S_SETPC_B64 $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: @@ -92,7 +92,7 @@ body: | ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: S_SETPC_B64_return $sgpr0_sgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: @@ -104,7 +104,7 @@ body: | ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3 ; GCN-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, 0, implicit-def $scc ; GCN-NEXT: {{ $}} @@ -172,7 +172,7 @@ body: | ; GCN-LABEL: name: hazard_callee2 ; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec ; GCN-NEXT: DS_NOP implicit $m0, implicit $exec @@ -292,7 +292,7 @@ body: | ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -335,7 +335,7 @@ body: | ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard.mir b/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard.mir index babc64a67bc1..c62543305de2 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/valu-read-sgpr-hazard.mir @@ -54,7 +54,7 @@ body: | ; GCN-LABEL: name: hazard_getpc1 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec @@ -70,7 +70,7 @@ body: | ; GCN-LABEL: name: hazard_getpc2 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr1, implicit $exec ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr1, implicit $exec @@ -87,7 +87,7 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 8, implicit-def $scc ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-hi) @mem + 16, implicit-def $scc, implicit $scc ; GCN-NEXT: } @@ -109,10 +109,10 @@ body: | ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0, implicit $exec ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr1 = S_SEXT_I32_I16 $sgpr1 ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 12, implicit-def $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-hi) @mem + 24, implicit-def $scc, implicit $scc ; GCN-NEXT: } ; GCN-NEXT: S_ENDPGM 0 @@ -133,7 +133,7 @@ body: | ; GCN-LABEL: name: hazard_vcc1 ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2, implicit $exec ; GCN-NEXT: $sgpr3 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr4 = S_ADD_U32 $sgpr3, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2, implicit $exec @@ -149,7 +149,7 @@ body: | ; GCN-LABEL: name: hazard_vcc2 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc_lo, implicit $exec ; GCN-NEXT: $vcc_lo = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr4 = S_ADD_U32 $vcc_lo, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec @@ -165,7 +165,7 @@ body: | ; GCN-LABEL: name: hazard_vcc3 ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc_lo, implicit $exec ; GCN-NEXT: $vcc_lo = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $vgpr3 = V_CNDMASK_B32_e32 $vgpr4, $vgpr5, implicit $vcc_lo, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec @@ -198,7 +198,7 @@ body: | ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: S_WAITCNT_DEPCTR 65436 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVcc_0_SaSdst_0 ; GCN-NEXT: $vgpr3 = V_CNDMASK_B32_e32 $vgpr4, $vgpr5, implicit $vcc_lo, implicit $exec ; GCN-NEXT: S_ENDPGM 0 bb.0: @@ -223,7 +223,7 @@ body: | ; GCN-LABEL: name: hazard_addc1 ; GCN: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; GCN-NEXT: $sgpr0 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -239,7 +239,7 @@ body: | ; GCN-LABEL: name: hazard_addc2 ; GCN: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr1, 0, implicit $exec ; GCN-NEXT: $sgpr0 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr1, 0, implicit $exec @@ -255,7 +255,7 @@ body: | ; GCN-LABEL: name: hazard_addc3 ; GCN: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -271,7 +271,7 @@ body: | ; GCN-LABEL: name: hazard_addc4 ; GCN: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr3, 0, implicit $exec ; GCN-NEXT: $sgpr3 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr3, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr3, 0, implicit $exec @@ -289,7 +289,7 @@ body: | ; GCN-NEXT: $sgpr16 = S_MOV_B32 0 ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -312,7 +312,7 @@ body: | ; GCN-NEXT: $sgpr80 = S_MOV_B32 0 ; GCN-NEXT: $sgpr96 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -333,7 +333,7 @@ body: | ; GCN-LABEL: name: hazard_vaddc1 ; GCN: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $vgpr2, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr1, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -357,7 +357,7 @@ body: | ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -389,7 +389,7 @@ body: | ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -420,7 +420,7 @@ body: | ; GCN-NEXT: $sgpr12 = S_ADD_U32 $sgpr11, 0, implicit-def $scc ; GCN-NEXT: $sgpr14 = S_ADD_U32 $sgpr13, 0, implicit-def $scc ; GCN-NEXT: $sgpr16 = S_ADD_U32 $sgpr15, 0, implicit-def $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -453,7 +453,7 @@ body: | ; GCN-NEXT: $sgpr18 = S_ADD_U32 $sgpr17, 0, implicit-def $scc ; GCN-NEXT: $sgpr20 = S_ADD_U32 $sgpr19, 0, implicit-def $scc ; GCN-NEXT: $sgpr22 = S_ADD_U32 $sgpr21, 0, implicit-def $scc - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -539,7 +539,7 @@ body: | ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: @@ -591,12 +591,12 @@ body: | ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: bb.2: ; NOBC-NEXT: $sgpr16 = S_MOV_B32 0 - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: S_SETPC_B64 $sgpr0_sgpr1 ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: bb.3: ; NOBC-NEXT: $sgpr18 = S_MOV_B32 0 - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: S_SETPC_B64_return $sgpr0_sgpr1 ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: bb.4: @@ -604,19 +604,19 @@ body: | ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: $vcc_lo = S_MOV_B32 0 ; NOBC-NEXT: $sgpr20 = S_MOV_B32 0 - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3 - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, 0, implicit-def $scc ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: bb.5: ; NOBC-NEXT: successors: %bb.6(0x80000000) ; NOBC-NEXT: {{ $}} - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr8_sgpr9 = S_CALL_B64 0 ; NOBC-NEXT: {{ $}} ; NOBC-NEXT: bb.6: - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr22 = S_MOV_B32 $sgpr8 ; NOBC-NEXT: S_ENDPGM 0 ; @@ -645,7 +645,7 @@ body: | ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec - ; BC-NEXT: S_WAITCNT_DEPCTR 65438 + ; BC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; BC-NEXT: S_SETPC_B64 $sgpr0_sgpr1 ; BC-NEXT: {{ $}} ; BC-NEXT: bb.3: @@ -654,7 +654,7 @@ body: | ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec - ; BC-NEXT: S_WAITCNT_DEPCTR 65438 + ; BC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; BC-NEXT: S_SETPC_B64_return $sgpr0_sgpr1 ; BC-NEXT: {{ $}} ; BC-NEXT: bb.4: @@ -666,7 +666,7 @@ body: | ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec - ; BC-NEXT: S_WAITCNT_DEPCTR 65438 + ; BC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; BC-NEXT: $sgpr4_sgpr5 = S_SWAPPC_B64 $sgpr2_sgpr3 ; BC-NEXT: $sgpr4 = S_ADD_U32 $sgpr4, 0, implicit-def $scc ; BC-NEXT: {{ $}} @@ -720,9 +720,9 @@ body: | bb.0: ; NOBC-LABEL: name: hazard_callee1 ; NOBC: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 ; ; BC-LABEL: name: hazard_callee1 @@ -741,15 +741,15 @@ body: | ; NOBC-LABEL: name: hazard_callee2 ; NOBC: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; NOBC-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc - ; NOBC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOBC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOBC-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 ; ; BC-LABEL: name: hazard_callee2 ; BC: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; BC-NEXT: $sgpr1 = S_CSELECT_B32 -1, 0, implicit $scc - ; BC-NEXT: S_WAITCNT_DEPCTR 65438 + ; BC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; BC-NEXT: $sgpr2 = S_ADD_U32 $sgpr1, 0, implicit-def $scc ; BC-NEXT: DS_NOP implicit $m0, implicit $exec ; BC-NEXT: DS_NOP implicit $m0, implicit $exec @@ -769,7 +769,7 @@ body: | ; GCN-LABEL: name: hazard_carry_vcc ; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $vcc_lo, 0, implicit $exec ; GCN-NEXT: $vgpr0 = V_ADD_CO_U32_e32 $vgpr0, $vgpr1, implicit-def $vcc_lo, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 65437 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaVcc_0 ; GCN-NEXT: $vgpr1 = V_ADDC_U32_e32 $vgpr2, $vgpr3, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $vcc_lo, 0, implicit $exec @@ -802,7 +802,7 @@ body: | ; GCN-LABEL: name: hazard_carry_sgpr ; GCN: $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec ; GCN-NEXT: $vgpr0, $sgpr0 = V_ADD_CO_U32_e64 $vgpr0, $vgpr1, 0, implicit $exec - ; GCN-NEXT: S_WAITCNT_DEPCTR 61855 + ; GCN-NEXT: S_WAITCNT_DEPCTR .VaSdst_0 ; GCN-NEXT: $vgpr1, $sgpr1 = V_ADDC_U32_e64 $vgpr2, $vgpr3, $sgpr0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -871,7 +871,7 @@ body: | ; NOMEMC-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8, 0, 0, 0, implicit $exec ; NOMEMC-NEXT: S_WAIT_LOADCNT 0 ; NOMEMC-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; NOMEMC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOMEMC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOMEMC-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; NOMEMC-NEXT: S_ENDPGM 0 ; @@ -903,7 +903,7 @@ body: | ; NOMEMC-NEXT: $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx12 $vgpr3, $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128)) ; NOMEMC-NEXT: S_WAIT_SAMPLECNT 0 ; NOMEMC-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; NOMEMC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOMEMC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOMEMC-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; NOMEMC-NEXT: S_ENDPGM 0 ; @@ -935,7 +935,7 @@ body: | ; NOMEMC-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) ; NOMEMC-NEXT: S_WAIT_BVHCNT 0 ; NOMEMC-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; NOMEMC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOMEMC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOMEMC-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; NOMEMC-NEXT: S_ENDPGM 0 ; @@ -967,7 +967,7 @@ body: | ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec @@ -987,7 +987,7 @@ body: | ; NOMEMC-NEXT: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec ; NOMEMC-NEXT: S_WAIT_LOADCNT 0 ; NOMEMC-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; NOMEMC-NEXT: S_WAITCNT_DEPCTR 65438 + ; NOMEMC-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; NOMEMC-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; NOMEMC-NEXT: S_ENDPGM 0 ; @@ -1019,7 +1019,7 @@ body: | ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_WAIT_LOADCNT 0 ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 - ; GCN-NEXT: S_WAITCNT_DEPCTR 65438 + ; GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 ; GCN-NEXT: $sgpr3 = S_ADD_U32 $sgpr0, 0, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 $vgpr1, $sgpr0 = V_ADDC_U32_e64 0, $vgpr1, $sgpr0, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir index 1794a9c555bc..ddfe86506343 100644 --- a/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir @@ -3,7 +3,7 @@ # GCN-LABEL: name: hazard_vcmpx_smov_exec_lo # GCN: $sgpr0 = S_MOV_B32 $exec_lo -# GFX10-NEXT: S_WAITCNT_DEPCTR 65310 +# GFX10-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 # GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 --- name: hazard_vcmpx_smov_exec_lo @@ -21,7 +21,7 @@ body: | # GCN-LABEL: name: hazard_vcmpx_smov_exec # GCN: $sgpr0_sgpr1 = S_MOV_B64 $exec -# GFX10-NEXT: S_WAITCNT_DEPCTR 65310 +# GFX10-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 # GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 --- name: hazard_vcmpx_smov_exec @@ -109,7 +109,7 @@ body: | # GCN-LABEL: name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe # GCN: $sgpr0 = S_MOV_B32 $exec_lo -# GCN-NEXT: S_WAITCNT_DEPCTR 65534 +# GCN-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 # GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 --- name: no_hazard_vcmpx_smov_exec_lo_depctr_fffe @@ -118,7 +118,7 @@ body: | successors: %bb.1 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $sgpr0 = S_MOV_B32 $exec_lo - S_WAITCNT_DEPCTR 65534 + S_WAITCNT_DEPCTR .SaSdst_0 V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec S_BRANCH %bb.1 @@ -128,8 +128,8 @@ body: | # GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_ffff # GCN: $sgpr0 = S_MOV_B32 $exec_lo -# GCN-NEXT: S_WAITCNT_DEPCTR 65535 -# GFX10-NEXT: S_WAITCNT_DEPCTR 65310 +# GCN-NEXT: S_WAITCNT_DEPCTR .AllOff +# GFX10-NEXT: S_WAITCNT_DEPCTR .SaSdst_0 # GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 --- name: hazard_vcmpx_smov_exec_lo_depctr_ffff @@ -138,7 +138,7 @@ body: | successors: %bb.1 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $sgpr0 = S_MOV_B32 $exec_lo - S_WAITCNT_DEPCTR 65535 + S_WAITCNT_DEPCTR .AllOff V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec S_BRANCH %bb.1 @@ -148,7 +148,7 @@ body: | # GCN-LABEL: name: hazard_vcmpx_smov_exec_lo_depctr_effe # GCN: $sgpr0 = S_MOV_B32 $exec_lo -# GCN-NEXT: S_WAITCNT_DEPCTR 61438 +# GCN-NEXT: S_WAITCNT_DEPCTR .VaVdst_14_SaSdst_0 # GCN-NEXT: V_CMPX_LE_F32_nosdst_e32 --- name: hazard_vcmpx_smov_exec_lo_depctr_effe @@ -157,7 +157,7 @@ body: | successors: %bb.1 $vgpr0 = V_MOV_B32_e32 0, implicit $exec $sgpr0 = S_MOV_B32 $exec_lo - S_WAITCNT_DEPCTR 61438 + S_WAITCNT_DEPCTR .VaVdst_14_SaSdst_0 V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec S_BRANCH %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 84ecc5929b9f..04c0ba724263 100644 --- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -3,7 +3,7 @@ # GCN-LABEL: name: vmem_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr @@ -17,7 +17,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec # GCN: BUFFER_STORE_DWORD_OFFEN_exact -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_exec @@ -36,7 +36,7 @@ body: | # GCN-NEXT: S_MOV_B32 # GCN-NEXT: S_MOV_B32 # GCN-NEXT: S_MOV_B32 -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_chain @@ -55,7 +55,7 @@ body: | ... # GCN-LABEL: name: vmem_smem_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_LOAD_DWORD_IMM --- name: vmem_smem_write_sgpr @@ -70,7 +70,7 @@ body: | # GCN-LABEL: name: vmem_snop_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_NOP -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_snop_write_sgpr @@ -116,7 +116,7 @@ body: | # GCN-LABEL: name: vmem_swait_any_write_sgpr # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_WAITCNT -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_swait_any_write_sgpr @@ -131,7 +131,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_impread # GCN: BUFFER_LOAD_DWORD_OFFEN -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_impread @@ -145,7 +145,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_expread # GCN: BUFFER_LOAD_DWORD_OFFEN -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_expread @@ -158,7 +158,7 @@ body: | ... # GCN-LABEL: name: ds_write_m0 # GCN: DS_READ_B32 -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: ds_write_m0 @@ -173,7 +173,7 @@ body: | # GCN-LABEL: name: vmem_write_sgpr_fall_through # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN: bb.1: -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_fall_through @@ -192,7 +192,7 @@ body: | # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_BRANCH # GCN: bb.1: -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch @@ -212,7 +212,7 @@ body: | # GCN: BUFFER_LOAD_DWORD_OFFEN # GCN-NEXT: S_BRANCH # GCN: bb.2: -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch_around @@ -240,7 +240,7 @@ body: | # GCN: S_WAITCNT # GCN: V_ADD_CO_U32 # GCN: bb.2: -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_cbranch_around @@ -265,7 +265,7 @@ body: | ... # GCN-LABEL: name: vmem_write_sgpr_branch_backedge # GCN: $vgpr0 = IMPLICIT_DEF -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_write_sgpr_branch_backedge @@ -283,7 +283,7 @@ body: | ... # GCN-LABEL: name: ds_write_exec # GCN: DS_WRITE_B32_gfx9 -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: ds_write_exec @@ -296,7 +296,7 @@ body: | ... # GCN-LABEL: name: vmem_scratch_exec # GCN: SCRATCH_LOAD_DWORD -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_scratch_exec @@ -308,7 +308,7 @@ body: | ... # GCN-LABEL: name: vmem_flat_exec # GCN: FLAT_LOAD_DWORD -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_flat_exec @@ -321,7 +321,7 @@ body: | ... # GCN-LABEL: name: vmem_global_exec # GCN: GLOBAL_LOAD_DWORD -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_global_exec @@ -334,7 +334,7 @@ body: | ... # GCN-LABEL: name: vmem_global_atomic_exec # GCN: GLOBAL_ATOMIC_ADD_RTN -# GFX10-NEXT: S_WAITCNT_DEPCTR 65283 +# GFX10-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 # GCN-NEXT: S_MOV_B32 --- name: vmem_global_atomic_exec diff --git a/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu-errors.mir b/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu-errors.mir new file mode 100644 index 000000000000..8e375753acd7 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu-errors.mir @@ -0,0 +1,71 @@ +# RUN: split-file %s %t + +;--- bad-expression.mir +# RUN: not llc %t/bad-expression.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/bad-expression.mir --strict-whitespace --match-full-lines +--- +# CHECK:error: {{.*}} expected _ +# CHECK-NEXT: S_WAITCNT_DEPCTR .BadExpression +# CHECK-NEXT: ^ +name: BadExpression +body: | + bb.0: + S_WAITCNT_DEPCTR .BadExpression +... + +;--- counter-too-large.mir +# RUN: not llc %t/counter-too-large.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/counter-too-large.mir --strict-whitespace --match-full-lines +--- +# CHECK:error: {{.*}} counter value too large +# CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_99999 +# CHECK-NEXT: ^ +name: CounterTooLarge +body: | + bb.0: + S_WAITCNT_DEPCTR .VaVdst_99999 +... + +;--- expected-prefix.mir +# RUN: not llc %t/expected-prefix.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/expected-prefix.mir +--- +# CHECK: error: {{.*}} +name: MissingDotPrefix +body: | + bb.0: + S_WAITCNT_DEPCTR MissingDotPrefix +... + +;--- invalid-counter-name.mir +# RUN: not llc %t/invalid-counter-name.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/invalid-counter-name.mir --strict-whitespace --match-full-lines +--- +# CHECK:error: {{.*}} invalid counter name +# CHECK-NEXT: S_WAITCNT_DEPCTR .InvalidCounterName_1 +# CHECK-NEXT: ^ +name: InvalidCounterName +body: | + bb.0: + S_WAITCNT_DEPCTR .InvalidCounterName_1 +... + +;--- non-integer-counter.mir +# RUN: not llc %t/non-integer-counter.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/non-integer-counter.mir --strict-whitespace --match-full-lines +--- +# CHECK:error: {{.*}} expected non-negative integer counter number +# CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_BadCnt +# CHECK-NEXT: ^ +name: NonIntegerCounter +body: | + bb.0: + S_WAITCNT_DEPCTR .VaVdst_BadCnt +... + +;--- non-negative-integer-counter.mir +# RUN: not llc %t/non-negative-integer-counter.mir -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null 2>&1 | FileCheck %t/non-negative-integer-counter.mir --strict-whitespace --match-full-lines +--- +# CHECK:error: {{.*}} expected non-negative integer counter number +# CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_-1 +# CHECK-NEXT: ^ +name: NegativeCounter +body: | + bb.0: + S_WAITCNT_DEPCTR .VaVdst_-1 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu.mir b/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu.mir new file mode 100644 index 000000000000..e93a27ae51b8 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/s_wait_alu.mir @@ -0,0 +1,67 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none %s -o - | FileCheck %s + +--- +name: va_vdst_0 +body: | + bb.0: + ; CHECK-LABEL: name: va_vdst_0 + ; CHECK: S_WAITCNT_DEPCTR .VaVdst_0 + S_WAITCNT_DEPCTR .VaVdst_0 +... +--- +name: va_vdst_1 +body: | + bb.0: + ; CHECK-LABEL: name: va_vdst_1 + ; CHECK: S_WAITCNT_DEPCTR .VaVdst_1 + S_WAITCNT_DEPCTR .VaVdst_1 +... +--- +name: va_vdst_max-1 +body: | + bb.0: + ; CHECK-LABEL: name: va_vdst_max-1 + ; CHECK: S_WAITCNT_DEPCTR .VaVdst_14 + S_WAITCNT_DEPCTR .VaVdst_14 +... +--- +name: vm_vsrc_0_va_vdst_1 +body: | + bb.0: + ; CHECK-LABEL: name: vm_vsrc_0_va_vdst_1 + ; CHECK: S_WAITCNT_DEPCTR .VaSdst_1_VmVsrc_0 + S_WAITCNT_DEPCTR .VmVsrc_0_VaSdst_1 +... +--- +name: all-zero +body: | + bb.0: + ; CHECK-LABEL: name: all-zero + ; CHECK: S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 + S_WAITCNT_DEPCTR .VaVdst_0_VaSdst_0_VaSsrc_0_HoldCnt_0_VmVsrc_0_VaVcc_0_SaSdst_0 +... +--- +name: all-ones +body: | + bb.0: + ; CHECK-LABEL: name: all-ones + ; CHECK: S_WAITCNT_DEPCTR .VaVdst_1_VaSdst_1_VmVsrc_1 + S_WAITCNT_DEPCTR .VaVdst_1_VaSdst_1_VmVsrc_1 +... +--- +name: all-off +body: | + bb.0: + ; CHECK-LABEL: name: all-off + ; CHECK: S_WAITCNT_DEPCTR .AllOff + S_WAITCNT_DEPCTR .AllOff +... +--- +name: all-off-number +body: | + bb.0: + ; CHECK-LABEL: name: all-off-number + ; CHECK: S_WAITCNT_DEPCTR .AllOff + S_WAITCNT_DEPCTR 65535 +...