[GlobalISel] convergent intrinsics

Introduced the convergent equivalent of the existing G_INTRINSIC opcodes:

- G_INTRINSIC_CONVERGENT
- G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS

Out of the targets that currently have some support for GlobalISel, the patch
assumes that the convergent intrinsics only relevant to SPIRV and AMDGPU.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D154766
This commit is contained in:
Sameer Sahasrabuddhe 2023-07-31 12:14:34 +05:30
parent f2e44238ee
commit d9847cde48
79 changed files with 876 additions and 714 deletions

View File

@ -856,13 +856,25 @@ it during passes like legalization. This is needed because calls to exception
throw routines do not return, so no code that must be on an executable path must
be placed after throwing.
G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
G_INTRINSIC, G_INTRINSIC_CONVERGENT
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Call an intrinsic
Call an intrinsic that has no side-effects.
The _W_SIDE_EFFECTS version is considered to have unknown side-effects and
as such cannot be reordered across other side-effecting instructions.
The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
.. note::
Unlike SelectionDAG, there is no _VOID variant. Both of these are permitted
to have zero, one, or multiple results.
G_INTRINSIC_W_SIDE_EFFECTS, G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Call an intrinsic that is considered to have unknown side-effects and as such
cannot be reordered across other side-effecting instructions.
The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
.. note::

View File

@ -366,14 +366,33 @@ public:
}
bool is(Intrinsic::ID ID) const { return getIntrinsicID() == ID; }
bool hasSideEffects() const {
return getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
switch (getOpcode()) {
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return true;
default:
return false;
}
}
bool isConvergent() const {
switch (getOpcode()) {
case TargetOpcode::G_INTRINSIC_CONVERGENT:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return true;
default:
return false;
}
}
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return true;
default:
return false;

View File

@ -1108,20 +1108,25 @@ public:
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src,
const SrcOp &Op, unsigned Index);
/// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
/// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
/// result register definition unless \p Reg is NoReg (== 0). The second
/// operand will be the intrinsic's ID.
/// Build and insert a G_INTRINSIC instruction.
///
/// Callers are expected to add the required definitions and uses afterwards.
/// There are four different opcodes based on combinations of whether the
/// intrinsic has side effects and whether it is convergent. These properties
/// can be specified as explicit parameters, or else they are retrieved from
/// the MCID for the intrinsic.
///
/// The parameter \p Res provides the Registers or MOs that will be defined by
/// this instruction.
///
/// \pre setBasicBlock or setMI must have been called.
///
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res,
bool HasSideEffects);
bool HasSideEffects, bool isConvergent);
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res);
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res,
bool HasSideEffects);
bool HasSideEffects, bool isConvergent);
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res);
/// Build and insert \p Res = G_FPTRUNC \p Op
///

View File

@ -426,6 +426,12 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC)
/// Generic intrinsic use (with side effects).
HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
/// Generic intrinsic use (without side effects).
HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT)
/// Generic intrinsic use (with side effects).
HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS)
/// Generic extension allowing rubbish in high bits.
HANDLE_TARGET_OPCODE(G_ANYEXT)

View File

@ -1228,10 +1228,6 @@ def G_INTRINSIC : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
let hasSideEffects = false;
// Conservatively assume this is convergent. If there turnes out to
// be a need, there should be separate convergent intrinsic opcodes.
let isConvergent = 1;
}
// Intrinsic with side effects.
@ -1241,9 +1237,23 @@ def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction {
let hasSideEffects = true;
let mayLoad = true;
let mayStore = true;
}
// Conservatively assume this is convergent. If there turnes out to
// be a need, there should be separate convergent intrinsic opcodes.
// Convergent intrinsic without side effects.
def G_INTRINSIC_CONVERGENT : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
let hasSideEffects = false;
let isConvergent = true;
}
// Convergent intrinsic with side effects.
def G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS : GenericInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$intrin, variable_ops);
let hasSideEffects = true;
let mayLoad = true;
let mayStore = true;
let isConvergent = true;
}

View File

@ -39,6 +39,10 @@ class GINodeEquiv<Instruction i, SDNode node> {
// SelectionDAG has one setcc for all compares. This differentiates
// for G_ICMP and G_FCMP.
Instruction IfFloatingPoint = ?;
// SelectionDAG does not differentiate between convergent and non-convergent
// intrinsics. This specifies an alternate opcode for a convergent intrinsic.
Instruction IfConvergent = ?;
}
// These are defined in the same order as the G_* instructions.
@ -106,10 +110,17 @@ def : GINodeEquiv<G_FLOG2, flog2>;
def : GINodeEquiv<G_FLDEXP, fldexp>;
def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
def : GINodeEquiv<G_IS_FPCLASS, is_fpclass>;
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> {
let IfConvergent = G_INTRINSIC_CONVERGENT;
}
// ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS.
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
let IfConvergent = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS in {
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
}
def : GINodeEquiv<G_BR, br>;
def : GINodeEquiv<G_BSWAP, bswap>;
def : GINodeEquiv<G_BITREVERSE, bitreverse>;

View File

@ -48,6 +48,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default:
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
}
@ -726,6 +728,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
default: {
unsigned NumBits =
TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);

View File

@ -2533,8 +2533,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
// Ignore the callsite attributes. Backend code is most likely not expecting
// an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB =
MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
@ -2885,7 +2884,7 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil
}
}
MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
return true;
}

View File

@ -76,6 +76,9 @@ LegacyLegalizerInfo::LegacyLegalizerInfo() {
setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}});
setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0,
{{1, Legal}});
setLegalizeScalarToDifferentSizeStrategy(
TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);

View File

@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
MIRBuilder.setInstrAndDebugLoc(MI);
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
if (isa<GIntrinsic>(MI))
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {

View File

@ -775,30 +775,55 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<Register> ResultRegs,
bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
if (HasSideEffects && IsConvergent)
return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
if (HasSideEffects)
return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
if (IsConvergent)
return TargetOpcode::G_INTRINSIC_CONVERGENT;
return TargetOpcode::G_INTRINSIC;
}
MachineInstrBuilder
MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<Register> ResultRegs,
bool HasSideEffects, bool isConvergent) {
auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (unsigned ResultReg : ResultRegs)
MIB.addDef(ResultReg);
MIB.addIntrinsicID(ID);
return MIB;
}
MachineInstrBuilder
MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<Register> ResultRegs) {
auto Attrs = Intrinsic::getAttributes(getContext(), ID);
bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent);
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<DstOp> Results,
bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
bool HasSideEffects,
bool isConvergent) {
auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
for (DstOp Result : Results)
Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
ArrayRef<DstOp> Results) {
auto Attrs = Intrinsic::getAttributes(getContext(), ID);
bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
}
MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);

View File

@ -32,6 +32,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeCalc.h"
@ -223,7 +224,11 @@ namespace {
bool verifyAllRegOpsScalar(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
bool verifyGIntrinsicSideEffects(const MachineInstr *MI);
bool verifyGIntrinsicConvergence(const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineBundleAfter(const MachineInstr *MI);
@ -955,6 +960,55 @@ bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
return true;
}
bool MachineVerifier::verifyGIntrinsicSideEffects(const MachineInstr *MI) {
auto Opcode = MI->getOpcode();
bool NoSideEffects = Opcode == TargetOpcode::G_INTRINSIC ||
Opcode == TargetOpcode::G_INTRINSIC_CONVERGENT;
unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
AttributeList Attrs = Intrinsic::getAttributes(
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
if (NoSideEffects && DeclHasSideEffects) {
report(Twine(TII->getName(Opcode),
" used with intrinsic that accesses memory"),
MI);
return false;
}
if (!NoSideEffects && !DeclHasSideEffects) {
report(Twine(TII->getName(Opcode), " used with readnone intrinsic"), MI);
return false;
}
}
return true;
}
bool MachineVerifier::verifyGIntrinsicConvergence(const MachineInstr *MI) {
auto Opcode = MI->getOpcode();
bool NotConvergent = Opcode == TargetOpcode::G_INTRINSIC ||
Opcode == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
AttributeList Attrs = Intrinsic::getAttributes(
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
bool DeclIsConvergent = Attrs.hasFnAttr(Attribute::Convergent);
if (NotConvergent && DeclIsConvergent) {
report(Twine(TII->getName(Opcode), " used with a convergent intrinsic"),
MI);
return false;
}
if (!NotConvergent && !DeclIsConvergent) {
report(
Twine(TII->getName(Opcode), " used with a non-convergent intrinsic"),
MI);
return false;
}
}
return true;
}
void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (isFunctionSelected)
report("Unexpected generic instruction in a Selected function", MI);
@ -1493,7 +1547,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
// TODO: Should verify number of def and use operands, but the current
// interface requires passing in IR types for mangling.
const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
@ -1502,21 +1558,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
unsigned IntrID = IntrIDOp.getIntrinsicID();
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
AttributeList Attrs = Intrinsic::getAttributes(
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
if (NoSideEffects && DeclHasSideEffects) {
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
break;
}
if (!NoSideEffects && !DeclHasSideEffects) {
report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
break;
}
}
if (!verifyGIntrinsicSideEffects(MI))
break;
if (!verifyGIntrinsicConvergence(MI))
break;
break;
}

View File

@ -1497,8 +1497,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
llvm_unreachable("unexpected vector shape");
MachineInstrBuilder UADD;
for (LLT HTy : HAddTys) {
UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
.addUse(HSum);
UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
HSum = UADD.getReg(0);
}

View File

@ -82,9 +82,10 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
}
auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
{MRI.getType(ExtReg)}, false)
.addReg(ExtReg);
auto ToSGPR = MIRBuilder
.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
{MRI.getType(ExtReg)})
.addReg(ExtReg);
ExtReg = ToSGPR.getReg(0);
}

View File

@ -42,7 +42,8 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
return true;
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
@ -67,8 +68,7 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
LLVM_READONLY
static bool opMustUseVOP3Encoding(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
return MI.getNumOperands() >
(MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
}
@ -86,13 +86,15 @@ static bool hasSourceMods(const MachineInstr &MI) {
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
case AMDGPU::G_BITCAST:
case AMDGPU::G_ANYEXT:
case AMDGPU::G_BUILD_VECTOR:
case AMDGPU::G_BUILD_VECTOR_TRUNC:
case AMDGPU::G_PHI:
return false;
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_interp_p1:
@ -228,7 +230,8 @@ bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FCANONICALIZE:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
return true;
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:
@ -327,7 +330,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
case AMDGPU::G_FPTRUNC:
NegateOperand(MatchInfo->getOperand(1));
break;
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_rcp:

View File

@ -3431,8 +3431,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_INSERT:
return selectG_INSERT(I);
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_CONVERGENT:
return selectG_INTRINSIC(I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
case TargetOpcode::G_ICMP:
if (selectG_ICMP(I))

View File

@ -2346,10 +2346,10 @@ static MachineInstrBuilder extractF64Exponent(Register Hi,
auto Const0 = B.buildConstant(S32, FractBits - 32);
auto Const1 = B.buildConstant(S32, ExpBits);
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
.addUse(Hi)
.addUse(Const0.getReg(0))
.addUse(Const1.getReg(0));
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
.addUse(Hi)
.addUse(Const0.getReg(0))
.addUse(Const1.getReg(0));
return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
}
@ -2437,8 +2437,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
/*HasSideEffects=*/false)
auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
.addUse(Unmerge.getReg(1));
auto LS2 = B.buildSub(S32, LS, One);
ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
@ -2671,15 +2670,16 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 * numbers::inv_pi);
if (ST.hasTrigReducedRange()) {
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
.addUse(MulVal.getReg(0))
.setMIFlags(Flags).getReg(0);
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty})
.addUse(MulVal.getReg(0))
.setMIFlags(Flags)
.getReg(0);
} else
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg), false)
B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg))
.addUse(TrigVal)
.setMIFlags(Flags);
MI.eraseFromParent();
@ -2772,7 +2772,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
// functions that use local objects. However, if these dead functions are
// not eliminated, we don't want a compile time error. Just emit a warning
// and a trap, since there should be no callable path here.
B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
B.buildUndef(DstReg);
MI.eraseFromParent();
return true;
@ -2798,8 +2798,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
// Adjust alignment for that dynamic shared memory array.
MFI->setDynLDSAlign(MF.getFunction(), *cast<GlobalVariable>(GV));
LLT S32 = LLT::scalar(32);
auto Sz =
B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
B.buildIntToPtr(DstReg, Sz);
MI.eraseFromParent();
return true;
@ -3074,9 +3073,9 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
const LLT F32 = LLT::scalar(32);
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}, false)
.addUse(Ext.getReg(0))
.setMIFlags(Flags);
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32})
.addUse(Ext.getReg(0))
.setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@ -3086,14 +3085,14 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
if (!ScaledInput) {
B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)})
.addUse(Src)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(ScaledInput)
.setMIFlags(Flags);
@ -3153,9 +3152,8 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
if (ScaledInput)
X = ScaledInput;
auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
.addUse(X)
.setMIFlags(Flags);
auto Y =
B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}).addUse(X).setMIFlags(Flags);
Register R;
if (ST.hasFastFMAF32()) {
@ -3232,7 +3230,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
LLT Ty = B.getMRI()->getType(Dst);
auto Log2Operand = Ty == LLT::scalar(16)
? B.buildFLog2(Ty, Src, Flags)
: B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
: B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
.addUse(Src)
.setMIFlags(Flags);
auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
@ -3255,9 +3253,9 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
if (Ty == F16) {
// Nothing in half is a denormal when promoted to f32.
auto Ext = B.buildFPExt(F32, Src, Flags);
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
.addUse(Ext.getReg(0))
.setMIFlags(Flags);
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32})
.addUse(Ext.getReg(0))
.setMIFlags(Flags);
B.buildFPTrunc(Dst, Log2, Flags);
MI.eraseFromParent();
return true;
@ -3267,9 +3265,9 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
if (allowApproxFunc(B.getMF(), Flags) ||
!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
.addUse(Src)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
.addUse(Src)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
}
@ -3287,7 +3285,7 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(AddInput.getReg(0))
.setMIFlags(Flags);
@ -3307,9 +3305,9 @@ bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
auto Mul = B.buildFMul(Ty, Src, K, Flags);
if (Ty == LLT::scalar(32)) {
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
.addUse(Mul.getReg(0))
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
.addUse(Mul.getReg(0))
.setMIFlags(Flags);
} else {
B.buildFExp2(Dst, Mul.getReg(0), Flags);
}
@ -3429,7 +3427,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
auto A = B.buildFAdd(Ty, PHSubE, PL, Flags);
auto IntE = B.buildFPTOSI(LLT::scalar(32), E);
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
.addUse(A.getReg(0))
.setMIFlags(Flags);
auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags);
@ -3471,20 +3469,20 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
if (Ty == S32) {
auto Log = B.buildFLog2(S32, Src0, Flags);
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
.addUse(Log.getReg(0))
.addUse(Src1)
.setMIFlags(Flags);
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
.addUse(Log.getReg(0))
.addUse(Src1)
.setMIFlags(Flags);
B.buildFExp2(Dst, Mul, Flags);
} else if (Ty == S16) {
// There's no f16 fmul_legacy, so we need to convert for it.
auto Log = B.buildFLog2(S16, Src0, Flags);
auto Ext0 = B.buildFPExt(S32, Log, Flags);
auto Ext1 = B.buildFPExt(S32, Src1, Flags);
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
.addUse(Ext0.getReg(0))
.addUse(Ext1.getReg(0))
.setMIFlags(Flags);
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
.addUse(Ext0.getReg(0))
.addUse(Ext1.getReg(0))
.setMIFlags(Flags);
B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
} else
@ -3526,9 +3524,9 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
//
// Convert floor(x) to (x - fract(x))
auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
.addUse(OrigSrc)
.setMIFlags(Flags);
auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64})
.addUse(OrigSrc)
.setMIFlags(Flags);
// Give source modifier matching some assistance before obscuring a foldable
// pattern.
@ -4477,9 +4475,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
// 1 / x -> RCP(x)
if (CLHS->isExactlyValue(1.0)) {
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
.addUse(RHS)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
.addUse(RHS)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
@ -4490,9 +4488,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
// -1 / x -> RCP( FNEG(x) )
if (CLHS->isExactlyValue(-1.0)) {
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
.addUse(FNeg.getReg(0))
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
.addUse(FNeg.getReg(0))
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
@ -4506,9 +4504,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
return false;
// x / y -> x * (1.0 / y)
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
.addUse(RHS)
.setMIFlags(Flags);
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
.addUse(RHS)
.setMIFlags(Flags);
B.buildFMul(Res, LHS, RCP, Flags);
MI.eraseFromParent();
@ -4534,9 +4532,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
auto NegY = B.buildFNeg(ResTy, Y);
auto One = B.buildFConstant(ResTy, 1.0);
auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
.addUse(Y)
.setMIFlags(Flags);
auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
.addUse(Y)
.setMIFlags(Flags);
auto Tmp0 = B.buildFMA(ResTy, NegY, R, One);
R = B.buildFMA(ResTy, Tmp0, R, R);
@ -4570,18 +4568,18 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
auto LHSExt = B.buildFPExt(S32, LHS, Flags);
auto RHSExt = B.buildFPExt(S32, RHS, Flags);
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
.addUse(RHSExt.getReg(0))
.setMIFlags(Flags);
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
.addUse(RHSExt.getReg(0))
.setMIFlags(Flags);
auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
.addUse(RDst.getReg(0))
.addUse(RHS)
.addUse(LHS)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
.addUse(RDst.getReg(0))
.addUse(RHS)
.addUse(LHS)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
@ -4636,21 +4634,21 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto One = B.buildFConstant(S32, 1.0f);
auto DenominatorScaled =
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
.addUse(LHS)
.addUse(RHS)
.addImm(0)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
.addUse(LHS)
.addUse(RHS)
.addImm(0)
.setMIFlags(Flags);
auto NumeratorScaled =
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
.addUse(LHS)
.addUse(RHS)
.addImm(1)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
.addUse(LHS)
.addUse(RHS)
.addImm(1)
.setMIFlags(Flags);
auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
.addUse(DenominatorScaled.getReg(0))
.setMIFlags(Flags);
auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
.addUse(DenominatorScaled.getReg(0))
.setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
@ -4670,18 +4668,18 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(false, B, ST, Mode);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
.addUse(Fma4.getReg(0))
.addUse(Fma1.getReg(0))
.addUse(Fma3.getReg(0))
.addUse(NumeratorScaled.getReg(1))
.setMIFlags(Flags);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
.addUse(Fma4.getReg(0))
.addUse(Fma1.getReg(0))
.addUse(Fma3.getReg(0))
.addUse(NumeratorScaled.getReg(1))
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
.addUse(Fmas.getReg(0))
.addUse(RHS)
.addUse(LHS)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
.addUse(Fmas.getReg(0))
.addUse(RHS)
.addUse(LHS)
.setMIFlags(Flags);
MI.eraseFromParent();
return true;
@ -4704,27 +4702,27 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
auto One = B.buildFConstant(S64, 1.0);
auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
.addUse(LHS)
.addUse(RHS)
.addImm(0)
.setMIFlags(Flags);
auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
.addUse(LHS)
.addUse(RHS)
.addImm(0)
.setMIFlags(Flags);
auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
.addUse(DivScale0.getReg(0))
.setMIFlags(Flags);
auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64})
.addUse(DivScale0.getReg(0))
.setMIFlags(Flags);
auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
.addUse(LHS)
.addUse(RHS)
.addImm(1)
.setMIFlags(Flags);
auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
.addUse(LHS)
.addUse(RHS)
.addImm(1)
.setMIFlags(Flags);
auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
@ -4751,14 +4749,14 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
Scale = DivScale1.getReg(1);
}
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
.addUse(Fma4.getReg(0))
.addUse(Fma3.getReg(0))
.addUse(Mul.getReg(0))
.addUse(Scale)
.setMIFlags(Flags);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64})
.addUse(Fma4.getReg(0))
.addUse(Fma3.getReg(0))
.addUse(Mul.getReg(0))
.addUse(Scale)
.setMIFlags(Flags);
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res), false)
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res))
.addUse(Fmas.getReg(0))
.addUse(RHS)
.addUse(LHS)
@ -4779,10 +4777,10 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
LLT Ty = MRI.getType(Res0);
LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32);
auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false)
auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty})
.addUse(Val)
.setMIFlags(Flags);
auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false)
auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy})
.addUse(Val)
.setMIFlags(Flags);
@ -4826,9 +4824,9 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
.addUse(Mul0.getReg(0))
.setMIFlags(Flags);
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
.addUse(Mul0.getReg(0))
.setMIFlags(Flags);
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
@ -4881,8 +4879,8 @@ bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}, false)
.addReg(SqrtX.getReg(0));
auto SqrtY =
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
auto Half = B.buildFConstant(F64, 0.5);
auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
@ -4948,9 +4946,9 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
else
return false;
auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}, false)
.addUse(Src)
.setMIFlags(Flags);
auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty})
.addUse(Src)
.setMIFlags(Flags);
// We don't need to concern ourselves with the snan handling difference, since
// the rsq quieted (or not) so use the one which will directly select.

View File

@ -288,7 +288,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
// rcp(sqrt(x))
if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(SqrtSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};
@ -298,7 +298,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
// sqrt(rcp(x))
if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
.addUse(RcpSrcMI->getOperand(0).getReg())
.setMIFlags(MI.getFlags());
};

View File

@ -633,8 +633,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
return AltMappings;
}
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT:
return getInstrAlternativeMappingsIntrinsic(MI, MRI);
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
default:
break;
@ -923,8 +925,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// The ballot becomes a no-op during instruction selection.
CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
{LLT::scalar(Subtarget.isWave32() ? 32 : 64)},
false)
{LLT::scalar(Subtarget.isWave32() ? 32 : 64)})
.addReg(CondReg)
.getReg(0);
MRI.setRegClass(CondReg, WaveRC);
@ -1452,7 +1453,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
const LLT S32 = LLT::scalar(32);
unsigned FirstOpnd = MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1;
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();
Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
@ -2949,7 +2950,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyMappingSBufferLoad(OpdMapper);
return;
}
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::amdgcn_readlane: {
substituteSimpleCopyRegs(OpdMapper, 2);
@ -3035,7 +3037,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
executeInWaterfallLoop(MI, MRI, { N });
return;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_ds_ordered_add:
@ -4198,7 +4201,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
break;
}
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
@ -4560,7 +4564,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:

View File

@ -11328,6 +11328,7 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
return false;
return true;
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT:
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fmad_ftz:
@ -13761,7 +13762,8 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
const MachineRegisterInfo &MRI, unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
case AMDGPU::G_INTRINSIC: {
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
case Intrinsic::amdgcn_workitem_id_x:
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
@ -13835,7 +13837,6 @@ Align SITargetLowering::computeKnownAlignForTargetInstr(
AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
if (MaybeAlign RetAlign = Attrs.getRetAlignment())
return *RetAlign;
return Align(1);
}
return Align(1);
}

View File

@ -976,7 +976,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
// Use Intrinsic::spv_extractelt so dynamic vs static extraction is
// handled later: extr = spv_extractelt LoadedVector, IndexRegister.
MachineInstrBuilder ExtractInst = MIRBuilder.buildIntrinsic(
Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true);
Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true, false);
ExtractInst.addUse(LoadedVector).addUse(IndexRegister);
// If the index is dynamic, need check if it's < 3, and then use a select.
@ -1644,8 +1644,8 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
Register Reg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
MRI->setType(Reg, LLType);
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
auto GEPInst = MIRBuilder.buildIntrinsic(Intrinsic::spv_gep,
ArrayRef<Register>{Reg}, true);
auto GEPInst = MIRBuilder.buildIntrinsic(
Intrinsic::spv_gep, ArrayRef<Register>{Reg}, true, false);
GEPInst
.addImm(GepMI->getOperand(2).getImm()) // In bound.
.addUse(ArrayMI->getOperand(0).getReg()) // Alloca.

View File

@ -276,6 +276,7 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
return selectOpUndef(ResVReg, ResType, I);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
return selectIntrinsic(ResVReg, ResType, I);
case TargetOpcode::G_BITREVERSE:
return selectBitreverse(ResVReg, ResType, I);
@ -591,15 +592,16 @@ static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) {
bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I) const {
unsigned OpOffset =
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register Ptr = I.getOperand(1 + OpOffset).getReg();
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.addUse(Ptr);
if (!I.getNumMemOperands()) {
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
I.getOpcode() ==
TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@ -608,8 +610,7 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
}
bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
unsigned OpOffset =
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
Register StoreVal = I.getOperand(0 + OpOffset).getReg();
Register Ptr = I.getOperand(1 + OpOffset).getReg();
MachineBasicBlock &BB = *I.getParent();
@ -617,7 +618,9 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
.addUse(Ptr)
.addUse(StoreVal);
if (!I.getNumMemOperands()) {
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
I.getOpcode() ==
TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
} else {
addMemoryOperands(*I.memoperands_begin(), MIB);
@ -719,7 +722,7 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
Register MemSemEqReg;
Register MemSemNeqReg;
Register Ptr = I.getOperand(2).getReg();
if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {
if (!isa<GIntrinsic>(I)) {
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));

View File

@ -219,7 +219,7 @@ MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
ConstReg = ConstInstr->getOperand(1).getReg();
return MRI->getVRegDef(ConstReg);
}
return ConstInstr;
return MRI->getVRegDef(ConstReg);
}
uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {

View File

@ -10,7 +10,7 @@ body: |
; CHECK-NOT: DIVERGENT: {{.*}}llvm.amdgcn.readfirstlane
%6:_(p1) = G_IMPLICIT_DEF
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
%5:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
%5:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
G_STORE %5(s32), %6(p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
...
@ -29,7 +29,7 @@ body: |
%9:_(s64) = G_CONSTANT i64 8
%10:_(p4) = G_PTR_ADD %7, %9(s64)
%11:_(p1) = G_LOAD %10(p4) :: (dereferenceable invariant load (p1), addrspace 4)
%12:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
%12:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
G_STORE %12(s64), %11(p1) :: (volatile store (s64) , addrspace 1)
S_ENDPGM 0
@ -52,7 +52,7 @@ body: |
%11:_(s32) = G_EXTRACT_VECTOR_ELT %8(<2 x s32>), %12(s32)
%13:_(s64) = G_CONSTANT i64 4
%14:_(p4) = G_PTR_ADD %7, %13(s64)
%15:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
%15:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
G_STORE %15(s64), %16(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0
@ -70,7 +70,7 @@ body: |
%6:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
%7:_(s32) = G_LOAD %6(p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
%8:_(s1) = G_TRUNC %7(s32)
%9:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %8(s1)
%9:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %8(s1)
G_STORE %9(s64), %10(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0

View File

@ -4,16 +4,16 @@
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
# CHECK: DIVERGENT: G_BR %bb.2
# CHECK-LABEL: BLOCK bb.1
# CHECK-LABEL: BLOCK bb.2
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
# CHECK: DIVERGENT: G_BR %bb.4
# CHECK-LABEL: BLOCK bb.3
@ -44,7 +44,7 @@ body: |
%14:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
%16:_(s1) = G_ICMP intpred(slt), %14(s32), %15
%18:_(s1) = G_XOR %16, %17
%19:_(s1), %20:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
%19:_(s1), %20:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
G_BRCOND %19(s1), %bb.2
G_BR %bb.3
@ -60,8 +60,8 @@ body: |
%25:_(s32) = G_PHI %22(s32), %bb.2, %33(s32), %bb.1
%26:_(s1) = G_PHI %24(s1), %bb.2, %18(s1), %bb.1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
%27:_(s1), %28:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
%27:_(s1), %28:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
G_BRCOND %27(s1), %bb.4
G_BR %bb.5
@ -72,7 +72,7 @@ body: |
bb.5:
%31:_(s32) = G_PHI %25(s32), %bb.3, %29(s32), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
G_STORE %31(s32), %32(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
S_ENDPGM 0

View File

@ -27,7 +27,7 @@ body: |
%11:_(s64) = G_PHI %12(s64), %bb.2, %15(s64), %bb.1
%18:_(s1) = G_CONSTANT i1 false
%12:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
%12:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %12(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
@ -35,7 +35,7 @@ body: |
bb.3:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%14:_(s64) = G_PHI %12(s64), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
S_ENDPGM 0
...
@ -82,7 +82,7 @@ body: |
successors: %bb.5, %bb.4
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
@ -90,7 +90,7 @@ body: |
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BR %bb.3
bb.6:
@ -140,7 +140,7 @@ body: |
successors: %bb.5, %bb.4
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.5
@ -148,7 +148,7 @@ body: |
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BR %bb.3
bb.6:
@ -191,7 +191,7 @@ body: |
%15:_(s64) = G_PHI %25(s64), %bb.2, %16(s64), %bb.3
%24:_(s1) = G_CONSTANT i1 false
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
@ -201,7 +201,7 @@ body: |
successors: %bb.5, %bb.2
%18:_(s64) = G_PHI %16(s64), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_BRCOND %13(s1), %bb.2
G_BR %bb.5
@ -241,7 +241,7 @@ body: |
bb.2:
%15:_(s64) = G_PHI %16(s64), %bb.4, %19(s64), %bb.1
%24:_(s1) = G_CONSTANT i1 true
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
bb.3:
successors: %bb.4, %bb.3
@ -259,7 +259,7 @@ body: |
bb.5:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
S_ENDPGM 0
...
@ -291,7 +291,7 @@ body: |
%10:_(s64) = G_PHI %11(s64), %bb.2, %19(s64), %bb.1
%24:_(s1) = G_CONSTANT i1 false
%11:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
%11:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %11(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.3
@ -300,7 +300,7 @@ body: |
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
; CHECK-NOT: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%13:_(s64) = G_PHI %11(s64), %bb.2
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
%14:_(p4) = COPY %3(p4)
%15:_(s64) = G_CONSTANT i64 40
%16:_(p4) = G_PTR_ADD %14, %15(s64)
@ -354,7 +354,7 @@ body: |
%15:_(s64) = G_PHI %23(s64), %bb.2, %16(s64), %bb.3
%25:_(s1) = G_CONSTANT i1 false
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
; CHECK: DIVERGENT: SI_LOOP
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.4
@ -362,7 +362,7 @@ body: |
bb.4:
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
%18:_(s64) = G_PHI %16(s64), %bb.3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
bb.5:

View File

@ -43,18 +43,18 @@ body: |
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break)
%19:_(s32) = G_PHI %18(s32), %bb.7, %25(s32), %bb.4
%20:_(s32) = G_PHI %6(s32), %bb.7, %25(s32), %bb.4
%21:_(s1) = G_PHI %34(s1), %bb.7, %33(s1), %bb.4
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
SI_LOOP %22(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
G_BR %bb.6
bb.6:
%24:_(s32) = G_PHI %22(s32), %bb.5
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
SI_RETURN
bb.7:

View File

@ -273,6 +273,12 @@
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_INTRINSIC_CONVERGENT (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ANYEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected

View File

@ -222,9 +222,9 @@ body: |
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%1:_(s32) = COPY $sgpr0
@ -236,9 +236,9 @@ body: |
%7:_(s44) = G_SSHLSAT %6, %5(s44)
%8:_(s64) = G_ANYEXT %7(s44)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64)
%11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
%11:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
$sgpr0 = COPY %11(s32)
%12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
%12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
$sgpr1 = COPY %12(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
@ -261,9 +261,9 @@ body: |
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
%1:_(s32) = COPY $vgpr0
@ -276,9 +276,9 @@ body: |
%8:_(s55) = G_SSHLSAT %6, %7(s55)
%9:_(s64) = G_ANYEXT %8(s55)
%10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64)
%12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
%12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
$vgpr0 = COPY %12(s32)
%13:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
%13:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
$vgpr1 = COPY %13(s32)
SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1

View File

@ -37,7 +37,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:_(s32) = COPY $sgpr0
@ -49,7 +49,7 @@ body: |
%5:_(s32) = G_SSHLSAT %3, %4(s32)
%7:_(s32) = G_SSHLSAT %5, %6(s32)
%9:_(s32) = G_SSHLSAT %7, %8(s32)
%10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
%10:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
$sgpr0 = COPY %10(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0

View File

@ -19,7 +19,7 @@ body: |
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
%1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
S_ENDPGM 0, implicit %1
...
@ -42,7 +42,7 @@ body: |
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
%1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
S_ENDPGM 0, implicit %1
...

View File

@ -23,7 +23,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@ -49,7 +49,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
@ -71,7 +71,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
S_ENDPGM 0, implicit %4
...
@ -93,7 +93,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
S_ENDPGM 0, implicit %4
...
@ -119,7 +119,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@ -145,6 +145,6 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...

View File

@ -23,7 +23,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@ -49,7 +49,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_FPTRUNC %0
%3:vgpr(s16) = G_FPTRUNC %1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...
@ -71,7 +71,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
S_ENDPGM 0, implicit %4
...
@ -93,7 +93,7 @@ body: |
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
S_ENDPGM 0, implicit %4
...
@ -119,7 +119,7 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
S_ENDPGM 0, implicit %4
...
@ -145,6 +145,6 @@ body: |
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s64) = G_FPEXT %0
%3:vgpr(s64) = G_FPEXT %1
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
S_ENDPGM 0, implicit %4
...

View File

@ -2,7 +2,7 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2> %t | FileCheck -check-prefix=GCN %s
# RUN: FileCheck -check-prefix=ERR %s < %t
# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
---
name: readfirstlane_v
@ -20,7 +20,7 @@ body: |
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...
@ -39,7 +39,7 @@ body: |
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]]
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
%0:vgpr(s32) = G_CONSTANT i32 123
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...
@ -57,9 +57,9 @@ body: |
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; GCN-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
S_ENDPGM 0, implicit %1
...

View File

@ -13,7 +13,7 @@ body: |
bb.0:
; GCN-LABEL: name: s_barrier
; GCN: S_BARRIER
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
...

View File

@ -51,8 +51,8 @@ define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) {
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret i32 %vgpr
}
@ -66,10 +66,10 @@ define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret i64 %vgpr
}
@ -83,10 +83,10 @@ define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret <2 x i32> %vgpr
}
@ -99,10 +99,10 @@ define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
%value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
@ -116,8 +116,8 @@ define amdgpu_ps ptr addrspace(3) @sgpr_return_p3i8(ptr addrspace(3) %vgpr) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret ptr addrspace(3) %vgpr
}
@ -131,10 +131,10 @@ define amdgpu_ps ptr addrspace(1) @sgpr_return_p1i8(ptr addrspace(1) %vgpr) {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
ret ptr addrspace(1) %vgpr
}
@ -146,8 +146,8 @@ define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret <2 x i16> %vgpr
}

View File

@ -82,10 +82,10 @@ define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
main_body:
%tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0
@ -97,8 +97,8 @@ define amdgpu_vs i32 @non_void_ret() {
; CHECK-LABEL: name: non_void_ret
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
ret i32 0
}

View File

@ -34,15 +34,15 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64)
; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INTRINSIC_CONVERGENT]](s64)
; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.atomicrmw.end:
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INTRINSIC_CONVERGENT]](s64), %bb.2
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst

View File

@ -97,8 +97,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
; CHECK-NEXT: G_BRCOND [[INT]](s1), %bb.2
; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.2
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.bb1:
@ -108,7 +108,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb2:
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1]](s64)
; CHECK-NEXT: SI_RETURN
bb:
br i1 %arg, label %bb2, label %bb1

View File

@ -2,12 +2,12 @@
# Make sure incorrect usage of control flow intrinsics fails to select in case some transform separated the intrinsic from its branch.
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
---
@ -19,7 +19,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
bb.1:
G_BRCOND %3, %bb.1
@ -34,7 +34,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = G_SELECT %3, %0, %1
S_ENDPGM 0, implicit %5
@ -48,7 +48,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = G_SELECT %3, %0, %1
G_BRCOND %3, %bb.1
@ -67,7 +67,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 false
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@ -93,7 +93,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_OR %3, %5
G_BRCOND %6, %bb.2
@ -118,7 +118,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
S_NOP 0, implicit %6

View File

@ -2,7 +2,7 @@
# Make sure there's no crash if there is somehow no successor block.
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
---
name: brcond_si_if_no_succ_block
@ -16,6 +16,6 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
G_BRCOND %3, %bb.1
...

View File

@ -150,7 +150,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
G_BRCOND %3, %bb.1
bb.1:
@ -189,7 +189,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
G_BRCOND %3, %bb.1
bb.1:
@ -244,7 +244,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.2
G_BR %bb.1
@ -303,7 +303,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.1
G_BR %bb.2
@ -360,7 +360,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
G_BRCOND %3, %bb.1
bb.2:
@ -405,7 +405,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s32) = COPY $vgpr2
G_BRCOND %3, %bb.1
@ -466,7 +466,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
S_NOP 0
S_NOP 0
G_BRCOND %3, %bb.2
@ -521,7 +521,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@ -588,7 +588,7 @@ body: |
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
G_BRCOND %6, %bb.2
@ -653,7 +653,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.1
@ -711,7 +711,7 @@ body: |
bb.1:
successors: %bb.1, %bb.2
S_NOP 0
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
%4:_(s1) = G_CONSTANT i1 true
%5:_(s1) = G_XOR %3, %4
G_BRCOND %5, %bb.2

View File

@ -304,7 +304,7 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
@ -313,7 +313,7 @@ body: |
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_SMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
%7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
...

View File

@ -304,7 +304,7 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
@ -313,7 +313,7 @@ body: |
%5:sgpr(s32) = G_CONSTANT i32 17
%6:sgpr(s32) = G_UMIN %4, %5
%8:vgpr(s32) = COPY %6(s32)
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
%7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0

View File

@ -79,8 +79,8 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@ -135,8 +135,8 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)

View File

@ -15,11 +15,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
S_ENDPGM 0, implicit %2
...
@ -36,11 +36,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
S_ENDPGM 0, implicit %2
...
@ -57,11 +57,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %2
%3:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %2
S_ENDPGM 0, implicit %3
...

View File

@ -13,9 +13,9 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
...
@ -31,8 +31,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
...

View File

@ -17,9 +17,9 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
...

View File

@ -13,9 +13,9 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
...
@ -31,8 +31,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
...

View File

@ -16,10 +16,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@ -37,10 +37,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@ -56,10 +56,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...
---
@ -76,8 +76,8 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
...

View File

@ -14,9 +14,9 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
%0:_(s32) = COPY $sgpr0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
...
---
@ -32,8 +32,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
...

View File

@ -17,9 +17,9 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), %0, %1
...

View File

@ -15,8 +15,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
...

View File

@ -15,8 +15,8 @@ body: |
; CHECK: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s1), %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
%1:_(s1), %2:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
...

View File

@ -12,8 +12,8 @@ body: |
; CHECK: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
%0:_(s64) = COPY $sgpr0_sgpr1
%1:_(s1), %2:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
%1:_(s1), %2:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
...

View File

@ -16,10 +16,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@ -35,10 +35,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@ -54,10 +54,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...
---
@ -72,8 +72,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
...

View File

@ -16,10 +16,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@ -35,10 +35,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@ -54,10 +54,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...
---
@ -72,8 +72,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
...

View File

@ -138,8 +138,8 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -199,8 +199,8 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -268,8 +268,8 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -330,8 +330,8 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -159,8 +159,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -224,8 +224,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -288,8 +288,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -345,8 +345,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -429,8 +429,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; FAST-NEXT: {{ $}}
; FAST-NEXT: bb.3:
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -506,8 +506,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY-NEXT: {{ $}}
; GREEDY-NEXT: bb.3:
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -16,7 +16,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
@ -24,12 +24,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -47,7 +47,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@ -55,12 +55,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -78,7 +78,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@ -86,12 +86,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -109,7 +109,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
@ -117,12 +117,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -140,7 +140,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@ -148,12 +148,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -171,7 +171,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
@ -179,12 +179,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
%3:_(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
%3:_(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
...
@ -202,7 +202,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1
@ -210,11 +210,11 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = COPY $agpr0_agpr1
%3:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
%3:_(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1 = COPY %3
...

View File

@ -16,7 +16,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@ -24,12 +24,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -47,7 +47,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@ -55,12 +55,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -78,7 +78,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
@ -86,12 +86,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -109,7 +109,7 @@ body: |
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
@ -117,12 +117,12 @@ body: |
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -141,7 +141,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@ -150,13 +150,13 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@ -175,7 +175,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@ -184,13 +184,13 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...
@ -209,7 +209,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@ -218,13 +218,13 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@ -243,7 +243,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@ -252,13 +252,13 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...
@ -277,7 +277,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
@ -286,13 +286,13 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(s32) = COPY $vgpr20
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
...
@ -311,7 +311,7 @@ body: |
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
@ -320,12 +320,12 @@ body: |
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(s32) = COPY $vgpr20
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
...

View File

@ -16,12 +16,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -42,12 +42,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -65,12 +65,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -91,12 +91,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -114,12 +114,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -140,12 +140,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -163,12 +163,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -189,12 +189,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -212,12 +212,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -238,12 +238,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -261,12 +261,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -287,12 +287,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -310,12 +310,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -336,12 +336,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -359,12 +359,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -385,12 +385,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -408,12 +408,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -434,12 +434,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -457,12 +457,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -483,12 +483,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -506,12 +506,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -532,12 +532,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -555,12 +555,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -581,12 +581,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -604,12 +604,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -630,12 +630,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -653,12 +653,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -679,12 +679,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -702,12 +702,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -728,12 +728,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(s32) = COPY $sgpr32
%1:_(s32) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -751,12 +751,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -777,12 +777,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
...
@ -800,12 +800,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -826,12 +826,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -849,12 +849,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -875,12 +875,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -898,12 +898,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -924,12 +924,12 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
...
@ -947,12 +947,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr2
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...
@ -973,11 +973,11 @@ body: |
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
%0:_(<2 x s16>) = COPY $sgpr32
%1:_(<2 x s16>) = COPY $sgpr33
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
...

View File

@ -81,8 +81,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -128,8 +128,8 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -187,8 +187,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -81,8 +81,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -128,8 +128,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -187,8 +187,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -14,9 +14,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
...
---
@ -30,7 +30,7 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
...

View File

@ -15,10 +15,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -33,10 +33,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -52,10 +52,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -72,10 +72,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -93,11 +93,11 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
%0:_(s32) = COPY $agpr0
%1:_(s32) = COPY $agpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
S_ENDPGM 0, implicit %2
...
@ -114,10 +114,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
%0:_(s32) = COPY $agpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -135,10 +135,10 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $agpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...
---
@ -155,8 +155,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $agpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
...

View File

@ -16,8 +16,8 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32))
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32)
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret i32 %val
@ -37,11 +37,11 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
%val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <2 x i32> %val
@ -61,14 +61,14 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <3 x i32> %val
@ -88,29 +88,29 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
%val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <8 x i32> %val
@ -130,53 +130,53 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4)
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>)
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32)
; GFX7-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
; GFX7-NEXT: $sgpr8 = COPY [[INT8]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
; GFX7-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32)
; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32)
; GFX7-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
; GFX7-NEXT: $sgpr9 = COPY [[INT9]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
; GFX7-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32)
; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32)
; GFX7-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
; GFX7-NEXT: $sgpr10 = COPY [[INT10]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
; GFX7-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32)
; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32)
; GFX7-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
; GFX7-NEXT: $sgpr11 = COPY [[INT11]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
; GFX7-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32)
; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32)
; GFX7-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
; GFX7-NEXT: $sgpr12 = COPY [[INT12]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
; GFX7-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32)
; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32)
; GFX7-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
; GFX7-NEXT: $sgpr13 = COPY [[INT13]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
; GFX7-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32)
; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32)
; GFX7-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
; GFX7-NEXT: $sgpr14 = COPY [[INT14]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
; GFX7-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32)
; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32)
; GFX7-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
; GFX7-NEXT: $sgpr15 = COPY [[INT15]](s32)
; GFX7-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
; GFX7-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32)
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
%val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <16 x i32> %val
@ -887,8 +887,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -939,8 +939,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -993,8 +993,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1045,8 +1045,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1096,8 +1096,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1149,8 +1149,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1214,8 +1214,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1277,8 +1277,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1339,8 +1339,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1401,8 +1401,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1463,8 +1463,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2
@ -1524,8 +1524,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX7-NEXT: successors: %bb.4, %bb.2

View File

@ -79,8 +79,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -125,8 +125,8 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -183,8 +183,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -81,8 +81,8 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -127,8 +127,8 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -185,8 +185,8 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -79,8 +79,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -125,8 +125,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -183,8 +183,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -81,8 +81,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -127,8 +127,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
@ -185,8 +185,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)

View File

@ -16,10 +16,10 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@ -37,10 +37,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@ -58,10 +58,10 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...
@ -78,9 +78,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
...

View File

@ -17,11 +17,11 @@ body: |
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
...
---
@ -37,11 +37,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(ne), %0, %1
%3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
%3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
...
---
@ -57,8 +57,8 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %1
%2:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %1
...

View File

@ -16,11 +16,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@ -36,11 +36,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@ -57,11 +57,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $vgpr1
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@ -79,11 +79,11 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...
---
@ -100,9 +100,9 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
...

View File

@ -14,9 +14,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
...
---
@ -30,7 +30,7 @@ body: |
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
...

View File

@ -33,8 +33,8 @@ body: |
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@ -101,8 +101,8 @@ body: |
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)

View File

@ -13,7 +13,7 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s
; GFX6ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.gws.sema.release.all
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
; GCN-LABEL: {{^}}gws_sema_release_all_offset0:
; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}

View File

@ -165,14 +165,12 @@ TEST_F(AArch64GISelMITest, BuildIntrinsic) {
collectCopies(Copies, MF);
// Make sure DstOp version works. sqrt is just a placeholder intrinsic.
B.buildIntrinsic(Intrinsic::sqrt, {S64}, false)
.addUse(Copies[0]);
B.buildIntrinsic(Intrinsic::sqrt, {S64}).addUse(Copies[0]);
// Make sure register version works
SmallVector<Register, 1> Results;
Results.push_back(MRI->createGenericVirtualRegister(S64));
B.buildIntrinsic(Intrinsic::sqrt, Results, false)
.addUse(Copies[1]);
B.buildIntrinsic(Intrinsic::sqrt, Results).addUse(Copies[1]);
auto CheckStr = R"(
; CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY $x0

View File

@ -498,6 +498,10 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint"));
}
if (!Equiv.isValueUnset("IfConvergent") &&
N->getIntrinsicInfo(CGP)->isConvergent)
return &Target.getInstruction(Equiv.getValueAsDef("IfConvergent"));
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
const TreePredicateFn &Predicate = Call.Fn;
if (!Equiv.isValueUnset("IfSignExtend") &&
@ -863,7 +867,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
// Match the used operands (i.e. the children of the operator).
bool IsIntrinsic =
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS";
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS" ||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_CONVERGENT" ||
SrcGIOrNull->TheDef->getName() ==
"G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS";
const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP);
if (IsIntrinsic && !II)
return failedImport("Expected IntInit containing intrinsic ID)");