[GlobalISel] convergent intrinsics
Introduced the convergent equivalent of the existing G_INTRINSIC opcodes: - G_INTRINSIC_CONVERGENT - G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS Out of the targets that currently have some support for GlobalISel, the patch assumes that the convergent intrinsics only relevant to SPIRV and AMDGPU. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D154766
This commit is contained in:
parent
f2e44238ee
commit
d9847cde48
@ -856,13 +856,25 @@ it during passes like legalization. This is needed because calls to exception
|
||||
throw routines do not return, so no code that must be on an executable path must
|
||||
be placed after throwing.
|
||||
|
||||
G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
G_INTRINSIC, G_INTRINSIC_CONVERGENT
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Call an intrinsic
|
||||
Call an intrinsic that has no side-effects.
|
||||
|
||||
The _W_SIDE_EFFECTS version is considered to have unknown side-effects and
|
||||
as such cannot be reordered across other side-effecting instructions.
|
||||
The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
|
||||
|
||||
.. note::
|
||||
|
||||
Unlike SelectionDAG, there is no _VOID variant. Both of these are permitted
|
||||
to have zero, one, or multiple results.
|
||||
|
||||
G_INTRINSIC_W_SIDE_EFFECTS, G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Call an intrinsic that is considered to have unknown side-effects and as such
|
||||
cannot be reordered across other side-effecting instructions.
|
||||
|
||||
The _CONVERGENT variant corresponds to an LLVM IR intrinsic marked `convergent`.
|
||||
|
||||
.. note::
|
||||
|
||||
|
||||
@ -366,14 +366,33 @@ public:
|
||||
}
|
||||
|
||||
bool is(Intrinsic::ID ID) const { return getIntrinsicID() == ID; }
|
||||
|
||||
bool hasSideEffects() const {
|
||||
return getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
|
||||
switch (getOpcode()) {
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool isConvergent() const {
|
||||
switch (getOpcode()) {
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool classof(const MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
||||
@ -1108,20 +1108,25 @@ public:
|
||||
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src,
|
||||
const SrcOp &Op, unsigned Index);
|
||||
|
||||
/// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
|
||||
/// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
|
||||
/// result register definition unless \p Reg is NoReg (== 0). The second
|
||||
/// operand will be the intrinsic's ID.
|
||||
/// Build and insert a G_INTRINSIC instruction.
|
||||
///
|
||||
/// Callers are expected to add the required definitions and uses afterwards.
|
||||
/// There are four different opcodes based on combinations of whether the
|
||||
/// intrinsic has side effects and whether it is convergent. These properties
|
||||
/// can be specified as explicit parameters, or else they are retrieved from
|
||||
/// the MCID for the intrinsic.
|
||||
///
|
||||
/// The parameter \p Res provides the Registers or MOs that will be defined by
|
||||
/// this instruction.
|
||||
///
|
||||
/// \pre setBasicBlock or setMI must have been called.
|
||||
///
|
||||
/// \return a MachineInstrBuilder for the newly created instruction.
|
||||
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res,
|
||||
bool HasSideEffects);
|
||||
bool HasSideEffects, bool isConvergent);
|
||||
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<Register> Res);
|
||||
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res,
|
||||
bool HasSideEffects);
|
||||
bool HasSideEffects, bool isConvergent);
|
||||
MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, ArrayRef<DstOp> Res);
|
||||
|
||||
/// Build and insert \p Res = G_FPTRUNC \p Op
|
||||
///
|
||||
|
||||
@ -426,6 +426,12 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC)
|
||||
/// Generic intrinsic use (with side effects).
|
||||
HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
|
||||
|
||||
/// Generic intrinsic use (without side effects).
|
||||
HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT)
|
||||
|
||||
/// Generic intrinsic use (with side effects).
|
||||
HANDLE_TARGET_OPCODE(G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS)
|
||||
|
||||
/// Generic extension allowing rubbish in high bits.
|
||||
HANDLE_TARGET_OPCODE(G_ANYEXT)
|
||||
|
||||
|
||||
@ -1228,10 +1228,6 @@ def G_INTRINSIC : GenericInstruction {
|
||||
let OutOperandList = (outs);
|
||||
let InOperandList = (ins unknown:$intrin, variable_ops);
|
||||
let hasSideEffects = false;
|
||||
|
||||
// Conservatively assume this is convergent. If there turnes out to
|
||||
// be a need, there should be separate convergent intrinsic opcodes.
|
||||
let isConvergent = 1;
|
||||
}
|
||||
|
||||
// Intrinsic with side effects.
|
||||
@ -1241,9 +1237,23 @@ def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction {
|
||||
let hasSideEffects = true;
|
||||
let mayLoad = true;
|
||||
let mayStore = true;
|
||||
}
|
||||
|
||||
// Conservatively assume this is convergent. If there turnes out to
|
||||
// be a need, there should be separate convergent intrinsic opcodes.
|
||||
// Convergent intrinsic without side effects.
|
||||
def G_INTRINSIC_CONVERGENT : GenericInstruction {
|
||||
let OutOperandList = (outs);
|
||||
let InOperandList = (ins unknown:$intrin, variable_ops);
|
||||
let hasSideEffects = false;
|
||||
let isConvergent = true;
|
||||
}
|
||||
|
||||
// Convergent intrinsic with side effects.
|
||||
def G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS : GenericInstruction {
|
||||
let OutOperandList = (outs);
|
||||
let InOperandList = (ins unknown:$intrin, variable_ops);
|
||||
let hasSideEffects = true;
|
||||
let mayLoad = true;
|
||||
let mayStore = true;
|
||||
let isConvergent = true;
|
||||
}
|
||||
|
||||
|
||||
@ -39,6 +39,10 @@ class GINodeEquiv<Instruction i, SDNode node> {
|
||||
// SelectionDAG has one setcc for all compares. This differentiates
|
||||
// for G_ICMP and G_FCMP.
|
||||
Instruction IfFloatingPoint = ?;
|
||||
|
||||
// SelectionDAG does not differentiate between convergent and non-convergent
|
||||
// intrinsics. This specifies an alternate opcode for a convergent intrinsic.
|
||||
Instruction IfConvergent = ?;
|
||||
}
|
||||
|
||||
// These are defined in the same order as the G_* instructions.
|
||||
@ -106,10 +110,17 @@ def : GINodeEquiv<G_FLOG2, flog2>;
|
||||
def : GINodeEquiv<G_FLDEXP, fldexp>;
|
||||
def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
|
||||
def : GINodeEquiv<G_IS_FPCLASS, is_fpclass>;
|
||||
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
|
||||
|
||||
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> {
|
||||
let IfConvergent = G_INTRINSIC_CONVERGENT;
|
||||
}
|
||||
|
||||
// ISD::INTRINSIC_VOID can also be handled with G_INTRINSIC_W_SIDE_EFFECTS.
|
||||
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
|
||||
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
|
||||
let IfConvergent = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS in {
|
||||
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_void>;
|
||||
def : GINodeEquiv<G_INTRINSIC_W_SIDE_EFFECTS, intrinsic_w_chain>;
|
||||
}
|
||||
|
||||
def : GINodeEquiv<G_BR, br>;
|
||||
def : GINodeEquiv<G_BSWAP, bswap>;
|
||||
def : GINodeEquiv<G_BITREVERSE, bitreverse>;
|
||||
|
||||
@ -48,6 +48,8 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
|
||||
}
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
default:
|
||||
return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
|
||||
}
|
||||
@ -726,6 +728,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
|
||||
}
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
default: {
|
||||
unsigned NumBits =
|
||||
TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
|
||||
|
||||
@ -2533,8 +2533,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
|
||||
|
||||
// Ignore the callsite attributes. Backend code is most likely not expecting
|
||||
// an intrinsic to sometimes have side effects and sometimes not.
|
||||
MachineInstrBuilder MIB =
|
||||
MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
|
||||
MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs);
|
||||
if (isa<FPMathOperator>(CI))
|
||||
MIB->copyIRFlags(CI);
|
||||
|
||||
@ -2885,7 +2884,7 @@ bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuil
|
||||
}
|
||||
}
|
||||
|
||||
MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
|
||||
MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -76,6 +76,9 @@ LegacyLegalizerInfo::LegacyLegalizerInfo() {
|
||||
|
||||
setScalarAction(TargetOpcode::G_INTRINSIC, 0, {{1, Legal}});
|
||||
setScalarAction(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS, 0, {{1, Legal}});
|
||||
setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT, 0, {{1, Legal}});
|
||||
setScalarAction(TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS, 0,
|
||||
{{1, Legal}});
|
||||
|
||||
setLegalizeScalarToDifferentSizeStrategy(
|
||||
TargetOpcode::G_IMPLICIT_DEF, 0, narrowToSmallerAndUnsupportedIfTooSmall);
|
||||
|
||||
@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
|
||||
|
||||
MIRBuilder.setInstrAndDebugLoc(MI);
|
||||
|
||||
if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
|
||||
MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
|
||||
if (isa<GIntrinsic>(MI))
|
||||
return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
|
||||
auto Step = LI.getAction(MI, MRI);
|
||||
switch (Step.Action) {
|
||||
|
||||
@ -775,30 +775,55 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
|
||||
return buildInstr(TargetOpcode::G_INSERT, Res, {Src, Op, uint64_t(Index)});
|
||||
}
|
||||
|
||||
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
|
||||
ArrayRef<Register> ResultRegs,
|
||||
bool HasSideEffects) {
|
||||
auto MIB =
|
||||
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
|
||||
: TargetOpcode::G_INTRINSIC);
|
||||
static unsigned getIntrinsicOpcode(bool HasSideEffects, bool IsConvergent) {
|
||||
if (HasSideEffects && IsConvergent)
|
||||
return TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS;
|
||||
if (HasSideEffects)
|
||||
return TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
|
||||
if (IsConvergent)
|
||||
return TargetOpcode::G_INTRINSIC_CONVERGENT;
|
||||
return TargetOpcode::G_INTRINSIC;
|
||||
}
|
||||
|
||||
MachineInstrBuilder
|
||||
MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
|
||||
ArrayRef<Register> ResultRegs,
|
||||
bool HasSideEffects, bool isConvergent) {
|
||||
auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
|
||||
for (unsigned ResultReg : ResultRegs)
|
||||
MIB.addDef(ResultReg);
|
||||
MIB.addIntrinsicID(ID);
|
||||
return MIB;
|
||||
}
|
||||
|
||||
MachineInstrBuilder
|
||||
MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
|
||||
ArrayRef<Register> ResultRegs) {
|
||||
auto Attrs = Intrinsic::getAttributes(getContext(), ID);
|
||||
bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
|
||||
bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
|
||||
return buildIntrinsic(ID, ResultRegs, HasSideEffects, isConvergent);
|
||||
}
|
||||
|
||||
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
|
||||
ArrayRef<DstOp> Results,
|
||||
bool HasSideEffects) {
|
||||
auto MIB =
|
||||
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
|
||||
: TargetOpcode::G_INTRINSIC);
|
||||
bool HasSideEffects,
|
||||
bool isConvergent) {
|
||||
auto MIB = buildInstr(getIntrinsicOpcode(HasSideEffects, isConvergent));
|
||||
for (DstOp Result : Results)
|
||||
Result.addDefToMIB(*getMRI(), MIB);
|
||||
MIB.addIntrinsicID(ID);
|
||||
return MIB;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
|
||||
ArrayRef<DstOp> Results) {
|
||||
auto Attrs = Intrinsic::getAttributes(getContext(), ID);
|
||||
bool HasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
|
||||
bool isConvergent = Attrs.hasFnAttr(Attribute::Convergent);
|
||||
return buildIntrinsic(ID, Results, HasSideEffects, isConvergent);
|
||||
}
|
||||
|
||||
MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
|
||||
const SrcOp &Op) {
|
||||
return buildInstr(TargetOpcode::G_TRUNC, Res, Op);
|
||||
|
||||
@ -32,6 +32,7 @@
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/CodeGen/CodeGenCommonISel.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
|
||||
#include "llvm/CodeGen/LiveInterval.h"
|
||||
#include "llvm/CodeGen/LiveIntervals.h"
|
||||
#include "llvm/CodeGen/LiveRangeCalc.h"
|
||||
@ -223,7 +224,11 @@ namespace {
|
||||
bool verifyAllRegOpsScalar(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI);
|
||||
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
|
||||
|
||||
bool verifyGIntrinsicSideEffects(const MachineInstr *MI);
|
||||
bool verifyGIntrinsicConvergence(const MachineInstr *MI);
|
||||
void verifyPreISelGenericInstruction(const MachineInstr *MI);
|
||||
|
||||
void visitMachineInstrBefore(const MachineInstr *MI);
|
||||
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
|
||||
void visitMachineBundleAfter(const MachineInstr *MI);
|
||||
@ -955,6 +960,55 @@ bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MachineVerifier::verifyGIntrinsicSideEffects(const MachineInstr *MI) {
|
||||
auto Opcode = MI->getOpcode();
|
||||
bool NoSideEffects = Opcode == TargetOpcode::G_INTRINSIC ||
|
||||
Opcode == TargetOpcode::G_INTRINSIC_CONVERGENT;
|
||||
unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
|
||||
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
|
||||
AttributeList Attrs = Intrinsic::getAttributes(
|
||||
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
|
||||
bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
|
||||
if (NoSideEffects && DeclHasSideEffects) {
|
||||
report(Twine(TII->getName(Opcode),
|
||||
" used with intrinsic that accesses memory"),
|
||||
MI);
|
||||
return false;
|
||||
}
|
||||
if (!NoSideEffects && !DeclHasSideEffects) {
|
||||
report(Twine(TII->getName(Opcode), " used with readnone intrinsic"), MI);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MachineVerifier::verifyGIntrinsicConvergence(const MachineInstr *MI) {
|
||||
auto Opcode = MI->getOpcode();
|
||||
bool NotConvergent = Opcode == TargetOpcode::G_INTRINSIC ||
|
||||
Opcode == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS;
|
||||
unsigned IntrID = cast<GIntrinsic>(MI)->getIntrinsicID();
|
||||
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
|
||||
AttributeList Attrs = Intrinsic::getAttributes(
|
||||
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
|
||||
bool DeclIsConvergent = Attrs.hasFnAttr(Attribute::Convergent);
|
||||
if (NotConvergent && DeclIsConvergent) {
|
||||
report(Twine(TII->getName(Opcode), " used with a convergent intrinsic"),
|
||||
MI);
|
||||
return false;
|
||||
}
|
||||
if (!NotConvergent && !DeclIsConvergent) {
|
||||
report(
|
||||
Twine(TII->getName(Opcode), " used with a non-convergent intrinsic"),
|
||||
MI);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
||||
if (isFunctionSelected)
|
||||
report("Unexpected generic instruction in a Selected function", MI);
|
||||
@ -1493,7 +1547,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
||||
break;
|
||||
}
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
|
||||
// TODO: Should verify number of def and use operands, but the current
|
||||
// interface requires passing in IR types for mangling.
|
||||
const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
|
||||
@ -1502,21 +1558,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
|
||||
break;
|
||||
}
|
||||
|
||||
bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
|
||||
unsigned IntrID = IntrIDOp.getIntrinsicID();
|
||||
if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
|
||||
AttributeList Attrs = Intrinsic::getAttributes(
|
||||
MF->getFunction().getContext(), static_cast<Intrinsic::ID>(IntrID));
|
||||
bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory();
|
||||
if (NoSideEffects && DeclHasSideEffects) {
|
||||
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
|
||||
break;
|
||||
}
|
||||
if (!NoSideEffects && !DeclHasSideEffects) {
|
||||
report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!verifyGIntrinsicSideEffects(MI))
|
||||
break;
|
||||
if (!verifyGIntrinsicConvergence(MI))
|
||||
break;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1497,8 +1497,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
|
||||
llvm_unreachable("unexpected vector shape");
|
||||
MachineInstrBuilder UADD;
|
||||
for (LLT HTy : HAddTys) {
|
||||
UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}, /*HasSideEffects =*/false)
|
||||
.addUse(HSum);
|
||||
UADD = MIRBuilder.buildIntrinsic(Opc, {HTy}).addUse(HSum);
|
||||
HSum = UADD.getReg(0);
|
||||
}
|
||||
|
||||
|
||||
@ -82,9 +82,10 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
|
||||
ExtReg = MIRBuilder.buildBitcast(S32, ExtReg).getReg(0);
|
||||
}
|
||||
|
||||
auto ToSGPR = MIRBuilder.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
|
||||
{MRI.getType(ExtReg)}, false)
|
||||
.addReg(ExtReg);
|
||||
auto ToSGPR = MIRBuilder
|
||||
.buildIntrinsic(Intrinsic::amdgcn_readfirstlane,
|
||||
{MRI.getType(ExtReg)})
|
||||
.addReg(ExtReg);
|
||||
ExtReg = ToSGPR.getReg(0);
|
||||
}
|
||||
|
||||
|
||||
@ -42,7 +42,8 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
|
||||
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
|
||||
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
|
||||
return true;
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_rcp:
|
||||
@ -67,8 +68,7 @@ static bool fnegFoldsIntoMI(const MachineInstr &MI) {
|
||||
LLVM_READONLY
|
||||
static bool opMustUseVOP3Encoding(const MachineInstr &MI,
|
||||
const MachineRegisterInfo &MRI) {
|
||||
return MI.getNumOperands() >
|
||||
(MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
|
||||
return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
|
||||
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
|
||||
}
|
||||
|
||||
@ -86,13 +86,15 @@ static bool hasSourceMods(const MachineInstr &MI) {
|
||||
case TargetOpcode::INLINEASM:
|
||||
case TargetOpcode::INLINEASM_BR:
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
case AMDGPU::G_BITCAST:
|
||||
case AMDGPU::G_ANYEXT:
|
||||
case AMDGPU::G_BUILD_VECTOR:
|
||||
case AMDGPU::G_BUILD_VECTOR_TRUNC:
|
||||
case AMDGPU::G_PHI:
|
||||
return false;
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_interp_p1:
|
||||
@ -228,7 +230,8 @@ bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
|
||||
case AMDGPU::G_FCANONICALIZE:
|
||||
case AMDGPU::G_AMDGPU_RCP_IFLAG:
|
||||
return true;
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_rcp:
|
||||
@ -327,7 +330,8 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
|
||||
case AMDGPU::G_FPTRUNC:
|
||||
NegateOperand(MatchInfo->getOperand(1));
|
||||
break;
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
unsigned IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_rcp:
|
||||
|
||||
@ -3431,8 +3431,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
||||
case TargetOpcode::G_INSERT:
|
||||
return selectG_INSERT(I);
|
||||
case TargetOpcode::G_INTRINSIC:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT:
|
||||
return selectG_INTRINSIC(I);
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
|
||||
case TargetOpcode::G_ICMP:
|
||||
if (selectG_ICMP(I))
|
||||
|
||||
@ -2346,10 +2346,10 @@ static MachineInstrBuilder extractF64Exponent(Register Hi,
|
||||
auto Const0 = B.buildConstant(S32, FractBits - 32);
|
||||
auto Const1 = B.buildConstant(S32, ExpBits);
|
||||
|
||||
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
|
||||
.addUse(Hi)
|
||||
.addUse(Const0.getReg(0))
|
||||
.addUse(Const1.getReg(0));
|
||||
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32})
|
||||
.addUse(Hi)
|
||||
.addUse(Const0.getReg(0))
|
||||
.addUse(Const1.getReg(0));
|
||||
|
||||
return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
|
||||
}
|
||||
@ -2437,8 +2437,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
|
||||
auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
|
||||
auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
|
||||
auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
|
||||
auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
|
||||
/*HasSideEffects=*/false)
|
||||
auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32})
|
||||
.addUse(Unmerge.getReg(1));
|
||||
auto LS2 = B.buildSub(S32, LS, One);
|
||||
ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
|
||||
@ -2671,15 +2670,16 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
|
||||
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 * numbers::inv_pi);
|
||||
if (ST.hasTrigReducedRange()) {
|
||||
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
|
||||
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
|
||||
.addUse(MulVal.getReg(0))
|
||||
.setMIFlags(Flags).getReg(0);
|
||||
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty})
|
||||
.addUse(MulVal.getReg(0))
|
||||
.setMIFlags(Flags)
|
||||
.getReg(0);
|
||||
} else
|
||||
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
|
||||
|
||||
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
|
||||
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
|
||||
B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg), false)
|
||||
B.buildIntrinsic(TrigIntrin, ArrayRef<Register>(DstReg))
|
||||
.addUse(TrigVal)
|
||||
.setMIFlags(Flags);
|
||||
MI.eraseFromParent();
|
||||
@ -2772,7 +2772,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
||||
// functions that use local objects. However, if these dead functions are
|
||||
// not eliminated, we don't want a compile time error. Just emit a warning
|
||||
// and a trap, since there should be no callable path here.
|
||||
B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
|
||||
B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>());
|
||||
B.buildUndef(DstReg);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -2798,8 +2798,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
||||
// Adjust alignment for that dynamic shared memory array.
|
||||
MFI->setDynLDSAlign(MF.getFunction(), *cast<GlobalVariable>(GV));
|
||||
LLT S32 = LLT::scalar(32);
|
||||
auto Sz =
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
|
||||
auto Sz = B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32});
|
||||
B.buildIntToPtr(DstReg, Sz);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -3074,9 +3073,9 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
|
||||
const LLT F32 = LLT::scalar(32);
|
||||
// Nothing in half is a denormal when promoted to f32.
|
||||
auto Ext = B.buildFPExt(F32, Src, Flags);
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}, false)
|
||||
.addUse(Ext.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32})
|
||||
.addUse(Ext.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
B.buildFPTrunc(Dst, Log2, Flags);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -3086,14 +3085,14 @@ bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
|
||||
|
||||
auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
|
||||
if (!ScaledInput) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)})
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
|
||||
.addUse(ScaledInput)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
@ -3153,9 +3152,8 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
|
||||
if (ScaledInput)
|
||||
X = ScaledInput;
|
||||
|
||||
auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
|
||||
.addUse(X)
|
||||
.setMIFlags(Flags);
|
||||
auto Y =
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}).addUse(X).setMIFlags(Flags);
|
||||
|
||||
Register R;
|
||||
if (ST.hasFastFMAF32()) {
|
||||
@ -3232,7 +3230,7 @@ bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
|
||||
LLT Ty = B.getMRI()->getType(Dst);
|
||||
auto Log2Operand = Ty == LLT::scalar(16)
|
||||
? B.buildFLog2(Ty, Src, Flags)
|
||||
: B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
|
||||
: B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty})
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
|
||||
@ -3255,9 +3253,9 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
|
||||
if (Ty == F16) {
|
||||
// Nothing in half is a denormal when promoted to f32.
|
||||
auto Ext = B.buildFPExt(F32, Src, Flags);
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
|
||||
.addUse(Ext.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32})
|
||||
.addUse(Ext.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
B.buildFPTrunc(Dst, Log2, Flags);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -3267,9 +3265,9 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
|
||||
|
||||
if (allowApproxFunc(B.getMF(), Flags) ||
|
||||
!needsDenormHandlingF32(B.getMF(), Src, Flags)) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
@ -3287,7 +3285,7 @@ bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
|
||||
auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
|
||||
auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
|
||||
|
||||
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
|
||||
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
|
||||
.addUse(AddInput.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
@ -3307,9 +3305,9 @@ bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
|
||||
auto Mul = B.buildFMul(Ty, Src, K, Flags);
|
||||
|
||||
if (Ty == LLT::scalar(32)) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
|
||||
.addUse(Mul.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst})
|
||||
.addUse(Mul.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
} else {
|
||||
B.buildFExp2(Dst, Mul.getReg(0), Flags);
|
||||
}
|
||||
@ -3429,7 +3427,7 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
|
||||
auto A = B.buildFAdd(Ty, PHSubE, PL, Flags);
|
||||
auto IntE = B.buildFPTOSI(LLT::scalar(32), E);
|
||||
|
||||
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
|
||||
auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty})
|
||||
.addUse(A.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags);
|
||||
@ -3471,20 +3469,20 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
|
||||
|
||||
if (Ty == S32) {
|
||||
auto Log = B.buildFLog2(S32, Src0, Flags);
|
||||
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
|
||||
.addUse(Log.getReg(0))
|
||||
.addUse(Src1)
|
||||
.setMIFlags(Flags);
|
||||
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
|
||||
.addUse(Log.getReg(0))
|
||||
.addUse(Src1)
|
||||
.setMIFlags(Flags);
|
||||
B.buildFExp2(Dst, Mul, Flags);
|
||||
} else if (Ty == S16) {
|
||||
// There's no f16 fmul_legacy, so we need to convert for it.
|
||||
auto Log = B.buildFLog2(S16, Src0, Flags);
|
||||
auto Ext0 = B.buildFPExt(S32, Log, Flags);
|
||||
auto Ext1 = B.buildFPExt(S32, Src1, Flags);
|
||||
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false)
|
||||
.addUse(Ext0.getReg(0))
|
||||
.addUse(Ext1.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32})
|
||||
.addUse(Ext0.getReg(0))
|
||||
.addUse(Ext1.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags);
|
||||
} else
|
||||
@ -3526,9 +3524,9 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
|
||||
//
|
||||
// Convert floor(x) to (x - fract(x))
|
||||
|
||||
auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false)
|
||||
.addUse(OrigSrc)
|
||||
.setMIFlags(Flags);
|
||||
auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64})
|
||||
.addUse(OrigSrc)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
// Give source modifier matching some assistance before obscuring a foldable
|
||||
// pattern.
|
||||
@ -4477,9 +4475,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
|
||||
|
||||
// 1 / x -> RCP(x)
|
||||
if (CLHS->isExactlyValue(1.0)) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
|
||||
.addUse(RHS)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
|
||||
.addUse(RHS)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -4490,9 +4488,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
|
||||
// -1 / x -> RCP( FNEG(x) )
|
||||
if (CLHS->isExactlyValue(-1.0)) {
|
||||
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
|
||||
.addUse(FNeg.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res)
|
||||
.addUse(FNeg.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -4506,9 +4504,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
|
||||
return false;
|
||||
|
||||
// x / y -> x * (1.0 / y)
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
|
||||
.addUse(RHS)
|
||||
.setMIFlags(Flags);
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
|
||||
.addUse(RHS)
|
||||
.setMIFlags(Flags);
|
||||
B.buildFMul(Res, LHS, RCP, Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
@ -4534,9 +4532,9 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
|
||||
auto NegY = B.buildFNeg(ResTy, Y);
|
||||
auto One = B.buildFConstant(ResTy, 1.0);
|
||||
|
||||
auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
|
||||
.addUse(Y)
|
||||
.setMIFlags(Flags);
|
||||
auto R = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy})
|
||||
.addUse(Y)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto Tmp0 = B.buildFMA(ResTy, NegY, R, One);
|
||||
R = B.buildFMA(ResTy, Tmp0, R, R);
|
||||
@ -4570,18 +4568,18 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
|
||||
auto LHSExt = B.buildFPExt(S32, LHS, Flags);
|
||||
auto RHSExt = B.buildFPExt(S32, RHS, Flags);
|
||||
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
|
||||
.addUse(RHSExt.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
|
||||
.addUse(RHSExt.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
|
||||
auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
|
||||
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
|
||||
.addUse(RDst.getReg(0))
|
||||
.addUse(RHS)
|
||||
.addUse(LHS)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
|
||||
.addUse(RDst.getReg(0))
|
||||
.addUse(RHS)
|
||||
.addUse(LHS)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -4636,21 +4634,21 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||
auto One = B.buildFConstant(S32, 1.0f);
|
||||
|
||||
auto DenominatorScaled =
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(0)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(0)
|
||||
.setMIFlags(Flags);
|
||||
auto NumeratorScaled =
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1}, false)
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(1)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S32, S1})
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(1)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
|
||||
.addUse(DenominatorScaled.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto ApproxRcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
|
||||
.addUse(DenominatorScaled.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
|
||||
|
||||
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
|
||||
@ -4670,18 +4668,18 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
|
||||
if (Mode.FP32Denormals != DenormalMode::getIEEE())
|
||||
toggleSPDenormMode(false, B, ST, Mode);
|
||||
|
||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
|
||||
.addUse(Fma4.getReg(0))
|
||||
.addUse(Fma1.getReg(0))
|
||||
.addUse(Fma3.getReg(0))
|
||||
.addUse(NumeratorScaled.getReg(1))
|
||||
.setMIFlags(Flags);
|
||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
|
||||
.addUse(Fma4.getReg(0))
|
||||
.addUse(Fma1.getReg(0))
|
||||
.addUse(Fma3.getReg(0))
|
||||
.addUse(NumeratorScaled.getReg(1))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res, false)
|
||||
.addUse(Fmas.getReg(0))
|
||||
.addUse(RHS)
|
||||
.addUse(LHS)
|
||||
.setMIFlags(Flags);
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, Res)
|
||||
.addUse(Fmas.getReg(0))
|
||||
.addUse(RHS)
|
||||
.addUse(LHS)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
@ -4704,27 +4702,27 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
|
||||
|
||||
auto One = B.buildFConstant(S64, 1.0);
|
||||
|
||||
auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(0)
|
||||
.setMIFlags(Flags);
|
||||
auto DivScale0 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(0)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto NegDivScale0 = B.buildFNeg(S64, DivScale0.getReg(0), Flags);
|
||||
|
||||
auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64}, false)
|
||||
.addUse(DivScale0.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S64})
|
||||
.addUse(DivScale0.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto Fma0 = B.buildFMA(S64, NegDivScale0, Rcp, One, Flags);
|
||||
auto Fma1 = B.buildFMA(S64, Rcp, Fma0, Rcp, Flags);
|
||||
auto Fma2 = B.buildFMA(S64, NegDivScale0, Fma1, One, Flags);
|
||||
|
||||
auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1}, false)
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(1)
|
||||
.setMIFlags(Flags);
|
||||
auto DivScale1 = B.buildIntrinsic(Intrinsic::amdgcn_div_scale, {S64, S1})
|
||||
.addUse(LHS)
|
||||
.addUse(RHS)
|
||||
.addImm(1)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto Fma3 = B.buildFMA(S64, Fma1, Fma2, Fma1, Flags);
|
||||
auto Mul = B.buildFMul(S64, DivScale1.getReg(0), Fma3, Flags);
|
||||
@ -4751,14 +4749,14 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
|
||||
Scale = DivScale1.getReg(1);
|
||||
}
|
||||
|
||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64}, false)
|
||||
.addUse(Fma4.getReg(0))
|
||||
.addUse(Fma3.getReg(0))
|
||||
.addUse(Mul.getReg(0))
|
||||
.addUse(Scale)
|
||||
.setMIFlags(Flags);
|
||||
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S64})
|
||||
.addUse(Fma4.getReg(0))
|
||||
.addUse(Fma3.getReg(0))
|
||||
.addUse(Mul.getReg(0))
|
||||
.addUse(Scale)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res), false)
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_div_fixup, ArrayRef(Res))
|
||||
.addUse(Fmas.getReg(0))
|
||||
.addUse(RHS)
|
||||
.addUse(LHS)
|
||||
@ -4779,10 +4777,10 @@ bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
|
||||
LLT Ty = MRI.getType(Res0);
|
||||
LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32);
|
||||
|
||||
auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false)
|
||||
auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty})
|
||||
.addUse(Val)
|
||||
.setMIFlags(Flags);
|
||||
auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false)
|
||||
auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy})
|
||||
.addUse(Val)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
@ -4826,9 +4824,9 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
|
||||
|
||||
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
|
||||
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
|
||||
.addUse(Mul0.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
|
||||
.addUse(Mul0.getReg(0))
|
||||
.setMIFlags(Flags);
|
||||
|
||||
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
|
||||
|
||||
@ -4881,8 +4879,8 @@ bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
|
||||
auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
|
||||
auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
|
||||
|
||||
auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}, false)
|
||||
.addReg(SqrtX.getReg(0));
|
||||
auto SqrtY =
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}).addReg(SqrtX.getReg(0));
|
||||
|
||||
auto Half = B.buildFConstant(F64, 0.5);
|
||||
auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
|
||||
@ -4948,9 +4946,9 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
|
||||
else
|
||||
return false;
|
||||
|
||||
auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty}, false)
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
auto Rsq = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {Ty})
|
||||
.addUse(Src)
|
||||
.setMIFlags(Flags);
|
||||
|
||||
// We don't need to concern ourselves with the snan handling difference, since
|
||||
// the rsq quieted (or not) so use the one which will directly select.
|
||||
|
||||
@ -288,7 +288,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
|
||||
// rcp(sqrt(x))
|
||||
if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
|
||||
MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
|
||||
.addUse(SqrtSrcMI->getOperand(0).getReg())
|
||||
.setMIFlags(MI.getFlags());
|
||||
};
|
||||
@ -298,7 +298,7 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
|
||||
// sqrt(rcp(x))
|
||||
if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
|
||||
MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
|
||||
.addUse(RcpSrcMI->getOperand(0).getReg())
|
||||
.setMIFlags(MI.getFlags());
|
||||
};
|
||||
|
||||
@ -633,8 +633,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
|
||||
return AltMappings;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT:
|
||||
return getInstrAlternativeMappingsIntrinsic(MI, MRI);
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI);
|
||||
default:
|
||||
break;
|
||||
@ -923,8 +925,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||
|
||||
// The ballot becomes a no-op during instruction selection.
|
||||
CondReg = B.buildIntrinsic(Intrinsic::amdgcn_ballot,
|
||||
{LLT::scalar(Subtarget.isWave32() ? 32 : 64)},
|
||||
false)
|
||||
{LLT::scalar(Subtarget.isWave32() ? 32 : 64)})
|
||||
.addReg(CondReg)
|
||||
.getReg(0);
|
||||
MRI.setRegClass(CondReg, WaveRC);
|
||||
@ -1452,7 +1453,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
|
||||
|
||||
const LLT S32 = LLT::scalar(32);
|
||||
|
||||
unsigned FirstOpnd = MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1;
|
||||
unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
|
||||
Register SrcReg = MI.getOperand(FirstOpnd).getReg();
|
||||
Register OffsetReg = MI.getOperand(FirstOpnd + 1).getReg();
|
||||
Register WidthReg = MI.getOperand(FirstOpnd + 2).getReg();
|
||||
@ -2949,7 +2950,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
applyMappingSBufferLoad(OpdMapper);
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_readlane: {
|
||||
substituteSimpleCopyRegs(OpdMapper, 2);
|
||||
@ -3035,7 +3037,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
executeInWaterfallLoop(MI, MRI, { N });
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
|
||||
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_ds_ordered_add:
|
||||
@ -4198,7 +4201,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
|
||||
default:
|
||||
return getInvalidInstructionMapping();
|
||||
@ -4560,7 +4564,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
|
||||
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
|
||||
auto IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_s_getreg:
|
||||
|
||||
@ -11328,6 +11328,7 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
|
||||
return false;
|
||||
return true;
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT:
|
||||
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_fmul_legacy:
|
||||
case Intrinsic::amdgcn_fmad_ftz:
|
||||
@ -13761,7 +13762,8 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
|
||||
const MachineRegisterInfo &MRI, unsigned Depth) const {
|
||||
const MachineInstr *MI = MRI.getVRegDef(R);
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::G_INTRINSIC: {
|
||||
case AMDGPU::G_INTRINSIC:
|
||||
case AMDGPU::G_INTRINSIC_CONVERGENT: {
|
||||
switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
|
||||
@ -13835,7 +13837,6 @@ Align SITargetLowering::computeKnownAlignForTargetInstr(
|
||||
AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
|
||||
if (MaybeAlign RetAlign = Attrs.getRetAlignment())
|
||||
return *RetAlign;
|
||||
return Align(1);
|
||||
}
|
||||
return Align(1);
|
||||
}
|
||||
|
||||
@ -976,7 +976,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
|
||||
// Use Intrinsic::spv_extractelt so dynamic vs static extraction is
|
||||
// handled later: extr = spv_extractelt LoadedVector, IndexRegister.
|
||||
MachineInstrBuilder ExtractInst = MIRBuilder.buildIntrinsic(
|
||||
Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true);
|
||||
Intrinsic::spv_extractelt, ArrayRef<Register>{Extracted}, true, false);
|
||||
ExtractInst.addUse(LoadedVector).addUse(IndexRegister);
|
||||
|
||||
// If the index is dynamic, need check if it's < 3, and then use a select.
|
||||
@ -1644,8 +1644,8 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
|
||||
Register Reg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
|
||||
MRI->setType(Reg, LLType);
|
||||
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
|
||||
auto GEPInst = MIRBuilder.buildIntrinsic(Intrinsic::spv_gep,
|
||||
ArrayRef<Register>{Reg}, true);
|
||||
auto GEPInst = MIRBuilder.buildIntrinsic(
|
||||
Intrinsic::spv_gep, ArrayRef<Register>{Reg}, true, false);
|
||||
GEPInst
|
||||
.addImm(GepMI->getOperand(2).getImm()) // In bound.
|
||||
.addUse(ArrayMI->getOperand(0).getReg()) // Alloca.
|
||||
|
||||
@ -276,6 +276,7 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
|
||||
return selectOpUndef(ResVReg, ResType, I);
|
||||
|
||||
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
|
||||
case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
|
||||
return selectIntrinsic(ResVReg, ResType, I);
|
||||
case TargetOpcode::G_BITREVERSE:
|
||||
return selectBitreverse(ResVReg, ResType, I);
|
||||
@ -591,15 +592,16 @@ static void addMemoryOperands(uint64_t Flags, MachineInstrBuilder &MIB) {
|
||||
bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
|
||||
const SPIRVType *ResType,
|
||||
MachineInstr &I) const {
|
||||
unsigned OpOffset =
|
||||
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
|
||||
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
|
||||
Register Ptr = I.getOperand(1 + OpOffset).getReg();
|
||||
auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpLoad))
|
||||
.addDef(ResVReg)
|
||||
.addUse(GR.getSPIRVTypeID(ResType))
|
||||
.addUse(Ptr);
|
||||
if (!I.getNumMemOperands()) {
|
||||
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
|
||||
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
|
||||
I.getOpcode() ==
|
||||
TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
|
||||
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
|
||||
} else {
|
||||
addMemoryOperands(*I.memoperands_begin(), MIB);
|
||||
@ -608,8 +610,7 @@ bool SPIRVInstructionSelector::selectLoad(Register ResVReg,
|
||||
}
|
||||
|
||||
bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
|
||||
unsigned OpOffset =
|
||||
I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ? 1 : 0;
|
||||
unsigned OpOffset = isa<GIntrinsic>(I) ? 1 : 0;
|
||||
Register StoreVal = I.getOperand(0 + OpOffset).getReg();
|
||||
Register Ptr = I.getOperand(1 + OpOffset).getReg();
|
||||
MachineBasicBlock &BB = *I.getParent();
|
||||
@ -617,7 +618,9 @@ bool SPIRVInstructionSelector::selectStore(MachineInstr &I) const {
|
||||
.addUse(Ptr)
|
||||
.addUse(StoreVal);
|
||||
if (!I.getNumMemOperands()) {
|
||||
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
|
||||
assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS ||
|
||||
I.getOpcode() ==
|
||||
TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS);
|
||||
addMemoryOperands(I.getOperand(2 + OpOffset).getImm(), MIB);
|
||||
} else {
|
||||
addMemoryOperands(*I.memoperands_begin(), MIB);
|
||||
@ -719,7 +722,7 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
|
||||
Register MemSemEqReg;
|
||||
Register MemSemNeqReg;
|
||||
Register Ptr = I.getOperand(2).getReg();
|
||||
if (I.getOpcode() != TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) {
|
||||
if (!isa<GIntrinsic>(I)) {
|
||||
assert(I.hasOneMemOperand());
|
||||
const MachineMemOperand *MemOp = *I.memoperands_begin();
|
||||
unsigned Scope = static_cast<uint32_t>(getScope(MemOp->getSyncScopeID()));
|
||||
|
||||
@ -219,7 +219,7 @@ MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
|
||||
ConstReg = ConstInstr->getOperand(1).getReg();
|
||||
return MRI->getVRegDef(ConstReg);
|
||||
}
|
||||
return ConstInstr;
|
||||
return MRI->getVRegDef(ConstReg);
|
||||
}
|
||||
|
||||
uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI) {
|
||||
|
||||
@ -10,7 +10,7 @@ body: |
|
||||
; CHECK-NOT: DIVERGENT: {{.*}}llvm.amdgcn.readfirstlane
|
||||
%6:_(p1) = G_IMPLICIT_DEF
|
||||
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
|
||||
%5:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
|
||||
%5:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %4(s32)
|
||||
G_STORE %5(s32), %6(p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
S_ENDPGM 0
|
||||
...
|
||||
@ -29,7 +29,7 @@ body: |
|
||||
%9:_(s64) = G_CONSTANT i64 8
|
||||
%10:_(p4) = G_PTR_ADD %7, %9(s64)
|
||||
%11:_(p1) = G_LOAD %10(p4) :: (dereferenceable invariant load (p1), addrspace 4)
|
||||
%12:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
|
||||
%12:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %8(s32), %13(s32), 33
|
||||
G_STORE %12(s64), %11(p1) :: (volatile store (s64) , addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
@ -52,7 +52,7 @@ body: |
|
||||
%11:_(s32) = G_EXTRACT_VECTOR_ELT %8(<2 x s32>), %12(s32)
|
||||
%13:_(s64) = G_CONSTANT i64 4
|
||||
%14:_(p4) = G_PTR_ADD %7, %13(s64)
|
||||
%15:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
|
||||
%15:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %9(s32), %11(s32), 33
|
||||
G_STORE %15(s64), %16(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
@ -70,7 +70,7 @@ body: |
|
||||
%6:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
|
||||
%7:_(s32) = G_LOAD %6(p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
||||
%8:_(s1) = G_TRUNC %7(s32)
|
||||
%9:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %8(s1)
|
||||
%9:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %8(s1)
|
||||
G_STORE %9(s64), %10(p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
|
||||
@ -4,16 +4,16 @@
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_XOR %{{[0-9]*}}:_, %{{[0-9]*}}:_
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.1
|
||||
# CHECK: DIVERGENT: G_BR %bb.2
|
||||
# CHECK-LABEL: BLOCK bb.1
|
||||
# CHECK-LABEL: BLOCK bb.2
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.1, %{{[0-9]*}}:_(s32), %bb.0
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_PHI %{{[0-9]*}}:_(s1), %bb.1, %{{[0-9]*}}:_(s1), %bb.0
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1), %{{[0-9]*}}:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if)
|
||||
# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
|
||||
# CHECK: DIVERGENT: G_BR %bb.4
|
||||
# CHECK-LABEL: BLOCK bb.3
|
||||
@ -44,7 +44,7 @@ body: |
|
||||
%14:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
|
||||
%16:_(s1) = G_ICMP intpred(slt), %14(s32), %15
|
||||
%18:_(s1) = G_XOR %16, %17
|
||||
%19:_(s1), %20:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
|
||||
%19:_(s1), %20:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %16(s1)
|
||||
G_BRCOND %19(s1), %bb.2
|
||||
G_BR %bb.3
|
||||
|
||||
@ -60,8 +60,8 @@ body: |
|
||||
|
||||
%25:_(s32) = G_PHI %22(s32), %bb.2, %33(s32), %bb.1
|
||||
%26:_(s1) = G_PHI %24(s1), %bb.2, %18(s1), %bb.1
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
|
||||
%27:_(s1), %28:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %20(s64)
|
||||
%27:_(s1), %28:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %26(s1)
|
||||
G_BRCOND %27(s1), %bb.4
|
||||
G_BR %bb.5
|
||||
|
||||
@ -72,7 +72,7 @@ body: |
|
||||
|
||||
bb.5:
|
||||
%31:_(s32) = G_PHI %25(s32), %bb.3, %29(s32), %bb.4
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %28(s64)
|
||||
G_STORE %31(s32), %32(p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
S_ENDPGM 0
|
||||
|
||||
|
||||
@ -27,7 +27,7 @@ body: |
|
||||
|
||||
%11:_(s64) = G_PHI %12(s64), %bb.2, %15(s64), %bb.1
|
||||
%18:_(s1) = G_CONSTANT i1 false
|
||||
%12:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
|
||||
%12:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %12(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.3
|
||||
@ -35,7 +35,7 @@ body: |
|
||||
bb.3:
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%14:_(s64) = G_PHI %12(s64), %bb.2
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -82,7 +82,7 @@ body: |
|
||||
successors: %bb.5, %bb.4
|
||||
|
||||
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.5
|
||||
@ -90,7 +90,7 @@ body: |
|
||||
bb.5:
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%18:_(s64) = G_PHI %16(s64), %bb.4
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_BR %bb.3
|
||||
|
||||
bb.6:
|
||||
@ -140,7 +140,7 @@ body: |
|
||||
successors: %bb.5, %bb.4
|
||||
|
||||
%15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.5
|
||||
@ -148,7 +148,7 @@ body: |
|
||||
bb.5:
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%18:_(s64) = G_PHI %16(s64), %bb.4
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_BR %bb.3
|
||||
|
||||
bb.6:
|
||||
@ -191,7 +191,7 @@ body: |
|
||||
|
||||
%15:_(s64) = G_PHI %25(s64), %bb.2, %16(s64), %bb.3
|
||||
%24:_(s1) = G_CONSTANT i1 false
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.4
|
||||
@ -201,7 +201,7 @@ body: |
|
||||
successors: %bb.5, %bb.2
|
||||
|
||||
%18:_(s64) = G_PHI %16(s64), %bb.3
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_BRCOND %13(s1), %bb.2
|
||||
G_BR %bb.5
|
||||
|
||||
@ -241,7 +241,7 @@ body: |
|
||||
bb.2:
|
||||
%15:_(s64) = G_PHI %16(s64), %bb.4, %19(s64), %bb.1
|
||||
%24:_(s1) = G_CONSTANT i1 true
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
|
||||
|
||||
bb.3:
|
||||
successors: %bb.4, %bb.3
|
||||
@ -259,7 +259,7 @@ body: |
|
||||
bb.5:
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%18:_(s64) = G_PHI %16(s64), %bb.4
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
@ -291,7 +291,7 @@ body: |
|
||||
|
||||
%10:_(s64) = G_PHI %11(s64), %bb.2, %19(s64), %bb.1
|
||||
%24:_(s1) = G_CONSTANT i1 false
|
||||
%11:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
|
||||
%11:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %11(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.3
|
||||
@ -300,7 +300,7 @@ body: |
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
; CHECK-NOT: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%13:_(s64) = G_PHI %11(s64), %bb.2
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
|
||||
%14:_(p4) = COPY %3(p4)
|
||||
%15:_(s64) = G_CONSTANT i64 40
|
||||
%16:_(p4) = G_PTR_ADD %14, %15(s64)
|
||||
@ -354,7 +354,7 @@ body: |
|
||||
|
||||
%15:_(s64) = G_PHI %23(s64), %bb.2, %16(s64), %bb.3
|
||||
%25:_(s1) = G_CONSTANT i1 false
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
|
||||
%16:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
|
||||
; CHECK: DIVERGENT: SI_LOOP
|
||||
SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.4
|
||||
@ -362,7 +362,7 @@ body: |
|
||||
bb.4:
|
||||
; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
|
||||
%18:_(s64) = G_PHI %16(s64), %bb.3
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
|
||||
|
||||
bb.5:
|
||||
|
||||
|
||||
@ -43,18 +43,18 @@ body: |
|
||||
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
|
||||
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
|
||||
; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_PHI
|
||||
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break)
|
||||
; CHECK-NOT: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break)
|
||||
%19:_(s32) = G_PHI %18(s32), %bb.7, %25(s32), %bb.4
|
||||
%20:_(s32) = G_PHI %6(s32), %bb.7, %25(s32), %bb.4
|
||||
%21:_(s1) = G_PHI %34(s1), %bb.7, %33(s1), %bb.4
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
|
||||
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %16(s32)
|
||||
%22:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), %21(s1), %0(s32)
|
||||
SI_LOOP %22(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
G_BR %bb.6
|
||||
|
||||
bb.6:
|
||||
%24:_(s32) = G_PHI %22(s32), %bb.5
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %24(s32)
|
||||
SI_RETURN
|
||||
|
||||
bb.7:
|
||||
|
||||
@ -273,6 +273,12 @@
|
||||
# DEBUG-NEXT: G_INTRINSIC_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: G_INTRINSIC_CONVERGENT (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: G_ANYEXT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||
|
||||
@ -222,9 +222,9 @@ body: |
|
||||
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s44) = G_SSHLSAT [[TRUNC]], [[C]](s44)
|
||||
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s44)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
@ -236,9 +236,9 @@ body: |
|
||||
%7:_(s44) = G_SSHLSAT %6, %5(s44)
|
||||
%8:_(s64) = G_ANYEXT %7(s44)
|
||||
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64)
|
||||
%11:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
|
||||
%11:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
|
||||
$sgpr0 = COPY %11(s32)
|
||||
%12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
|
||||
%12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
|
||||
$sgpr1 = COPY %12(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
|
||||
@ -261,9 +261,9 @@ body: |
|
||||
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s55) = G_SSHLSAT [[TRUNC]], [[C]](s55)
|
||||
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[SSHLSAT]](s55)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $vgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
@ -276,9 +276,9 @@ body: |
|
||||
%8:_(s55) = G_SSHLSAT %6, %7(s55)
|
||||
%9:_(s64) = G_ANYEXT %8(s55)
|
||||
%10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64)
|
||||
%12:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
|
||||
%12:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %10(s32)
|
||||
$vgpr0 = COPY %12(s32)
|
||||
%13:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
|
||||
%13:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %11(s32)
|
||||
$vgpr1 = COPY %13(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
|
||||
; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[C]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SSHLSAT]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
@ -49,7 +49,7 @@ body: |
|
||||
%5:_(s32) = G_SSHLSAT %3, %4(s32)
|
||||
%7:_(s32) = G_SSHLSAT %5, %6(s32)
|
||||
%9:_(s32) = G_SSHLSAT %7, %8(s32)
|
||||
%10:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
|
||||
%10:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %9(s32)
|
||||
$sgpr0 = COPY %10(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ body: |
|
||||
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 0, 0, implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
|
||||
%1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
@ -42,7 +42,7 @@ body: |
|
||||
; CHECK-NEXT: [[DS_SWIZZLE_B32_:%[0-9]+]]:vgpr_32 = DS_SWIZZLE_B32 [[COPY]], 65535, 0, implicit $exec
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[DS_SWIZZLE_B32_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
|
||||
%1:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 65535
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
@ -23,7 +23,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s16) = G_FPTRUNC %0
|
||||
%3:vgpr(s16) = G_FPTRUNC %1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -49,7 +49,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s16) = G_FPTRUNC %0
|
||||
%3:vgpr(s16) = G_FPTRUNC %1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -71,7 +71,7 @@ body: |
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -93,7 +93,7 @@ body: |
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -119,7 +119,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s64) = G_FPEXT %0
|
||||
%3:vgpr(s64) = G_FPEXT %1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -145,6 +145,6 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s64) = G_FPEXT %0
|
||||
%3:vgpr(s64) = G_FPEXT %1
|
||||
%4:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
%4:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -23,7 +23,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s16) = G_FPTRUNC %0
|
||||
%3:vgpr(s16) = G_FPTRUNC %1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -49,7 +49,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s16) = G_FPTRUNC %0
|
||||
%3:vgpr(s16) = G_FPTRUNC %1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -71,7 +71,7 @@ body: |
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -93,7 +93,7 @@ body: |
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F32_e64_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -119,7 +119,7 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s64) = G_FPEXT %0
|
||||
%3:vgpr(s64) = G_FPEXT %1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 0
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -145,6 +145,6 @@ body: |
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s64) = G_FPEXT %0
|
||||
%3:vgpr(s64) = G_FPEXT %1
|
||||
%4:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
%4:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %2, %3, 15
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2> %t | FileCheck -check-prefix=GCN %s
|
||||
# RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
|
||||
# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
|
||||
# ERR: remark: <unknown>:0:0: cannot select: %1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0:sgpr(s32) (in function: readfirstlane_s)
|
||||
|
||||
---
|
||||
name: readfirstlane_v
|
||||
@ -20,7 +20,7 @@ body: |
|
||||
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[V_READFIRSTLANE_B32_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
@ -39,7 +39,7 @@ body: |
|
||||
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 [[COPY]]
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||
%0:vgpr(s32) = G_CONSTANT i32 123
|
||||
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
@ -57,9 +57,9 @@ body: |
|
||||
; GCN: liveins: $sgpr0
|
||||
; GCN-NEXT: {{ $}}
|
||||
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; GCN-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; GCN-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
%1:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
@ -13,7 +13,7 @@ body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: s_barrier
|
||||
; GCN: S_BARRIER
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.barrier)
|
||||
|
||||
|
||||
...
|
||||
|
||||
@ -51,8 +51,8 @@ define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) {
|
||||
; CHECK-NEXT: liveins: $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret i32 %vgpr
|
||||
}
|
||||
@ -66,10 +66,10 @@ define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) {
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
ret i64 %vgpr
|
||||
}
|
||||
@ -83,10 +83,10 @@ define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) {
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
ret <2 x i32> %vgpr
|
||||
}
|
||||
@ -99,10 +99,10 @@ define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
%insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
|
||||
%value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
|
||||
@ -116,8 +116,8 @@ define amdgpu_ps ptr addrspace(3) @sgpr_return_p3i8(ptr addrspace(3) %vgpr) {
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret ptr addrspace(3) %vgpr
|
||||
}
|
||||
@ -131,10 +131,10 @@ define amdgpu_ps ptr addrspace(1) @sgpr_return_p1i8(ptr addrspace(1) %vgpr) {
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
ret ptr addrspace(1) %vgpr
|
||||
}
|
||||
@ -146,8 +146,8 @@ define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret <2 x i16> %vgpr
|
||||
}
|
||||
|
||||
@ -82,10 +82,10 @@ define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) {
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
main_body:
|
||||
%tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0
|
||||
@ -97,8 +97,8 @@ define amdgpu_vs i32 @non_void_ret() {
|
||||
; CHECK-LABEL: name: non_void_ret
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
@ -34,15 +34,15 @@ define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
|
||||
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2
|
||||
; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
|
||||
; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
|
||||
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64)
|
||||
; CHECK-NEXT: G_BRCOND [[INT1]](s1), %bb.3
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INTRINSIC_CONVERGENT]](s64)
|
||||
; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.3
|
||||
; CHECK-NEXT: G_BR %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3.atomicrmw.end:
|
||||
; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ATOMIC_CMPXCHG_WITH_SUCCESS]](s32), %bb.2
|
||||
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
|
||||
; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INTRINSIC_CONVERGENT]](s64), %bb.2
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
|
||||
; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32)
|
||||
; CHECK-NEXT: SI_RETURN implicit $vgpr0
|
||||
%oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst
|
||||
|
||||
@ -97,8 +97,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
|
||||
; CHECK-NEXT: G_BRCOND [[INT]](s1), %bb.2
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:%[0-9]+]]:_(s1), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1:%[0-9]+]]:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
|
||||
; CHECK-NEXT: G_BRCOND [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS]](s1), %bb.2
|
||||
; CHECK-NEXT: G_BR %bb.3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2.bb1:
|
||||
@ -108,7 +108,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
|
||||
; CHECK-NEXT: G_BR %bb.3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3.bb2:
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT_W_SIDE_EFFECTS1]](s64)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
bb:
|
||||
br i1 %arg, label %bb2, label %bb1
|
||||
|
||||
@ -2,12 +2,12 @@
|
||||
|
||||
# Make sure incorrect usage of control flow intrinsics fails to select in case some transform separated the intrinsic from its branch.
|
||||
|
||||
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
|
||||
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
|
||||
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
|
||||
|
||||
|
||||
---
|
||||
@ -19,7 +19,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
|
||||
bb.1:
|
||||
G_BRCOND %3, %bb.1
|
||||
@ -34,7 +34,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s32) = G_SELECT %3, %0, %1
|
||||
S_ENDPGM 0, implicit %5
|
||||
|
||||
@ -48,7 +48,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s32) = G_SELECT %3, %0, %1
|
||||
G_BRCOND %3, %bb.1
|
||||
|
||||
@ -67,7 +67,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s1) = G_CONSTANT i1 false
|
||||
%6:_(s1) = G_XOR %3, %5
|
||||
G_BRCOND %6, %bb.2
|
||||
@ -93,7 +93,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s1) = G_CONSTANT i1 true
|
||||
%6:_(s1) = G_OR %3, %5
|
||||
G_BRCOND %6, %bb.2
|
||||
@ -118,7 +118,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s1) = G_CONSTANT i1 true
|
||||
%6:_(s1) = G_XOR %3, %5
|
||||
S_NOP 0, implicit %6
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
# Make sure there's no crash if there is somehow no successor block.
|
||||
|
||||
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
|
||||
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_no_succ_block)
|
||||
|
||||
---
|
||||
name: brcond_si_if_no_succ_block
|
||||
@ -16,6 +16,6 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
G_BRCOND %3, %bb.1
|
||||
...
|
||||
|
||||
@ -150,7 +150,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
G_BRCOND %3, %bb.1
|
||||
|
||||
bb.1:
|
||||
@ -189,7 +189,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %2
|
||||
G_BRCOND %3, %bb.1
|
||||
|
||||
bb.1:
|
||||
@ -244,7 +244,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
G_BRCOND %3, %bb.2
|
||||
G_BR %bb.1
|
||||
|
||||
@ -303,7 +303,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
G_BRCOND %3, %bb.1
|
||||
G_BR %bb.2
|
||||
|
||||
@ -360,7 +360,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
G_BRCOND %3, %bb.1
|
||||
|
||||
bb.2:
|
||||
@ -405,7 +405,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s32) = COPY $vgpr2
|
||||
G_BRCOND %3, %bb.1
|
||||
|
||||
@ -466,7 +466,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
S_NOP 0
|
||||
S_NOP 0
|
||||
G_BRCOND %3, %bb.2
|
||||
@ -521,7 +521,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s1) = G_CONSTANT i1 true
|
||||
%6:_(s1) = G_XOR %3, %5
|
||||
G_BRCOND %6, %bb.2
|
||||
@ -588,7 +588,7 @@ body: |
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%3:_(s1), %4:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
|
||||
%5:_(s1) = G_CONSTANT i1 true
|
||||
%6:_(s1) = G_XOR %3, %5
|
||||
G_BRCOND %6, %bb.2
|
||||
@ -653,7 +653,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%4:_(s1) = G_CONSTANT i1 true
|
||||
%5:_(s1) = G_XOR %3, %4
|
||||
G_BRCOND %5, %bb.1
|
||||
@ -711,7 +711,7 @@ body: |
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
S_NOP 0
|
||||
%3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
|
||||
%4:_(s1) = G_CONSTANT i1 true
|
||||
%5:_(s1) = G_XOR %3, %4
|
||||
G_BRCOND %5, %bb.2
|
||||
|
||||
@ -304,7 +304,7 @@ body: |
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%0:sgpr(s32) = COPY $sgpr2
|
||||
@ -313,7 +313,7 @@ body: |
|
||||
%5:sgpr(s32) = G_CONSTANT i32 17
|
||||
%6:sgpr(s32) = G_SMIN %4, %5
|
||||
%8:vgpr(s32) = COPY %6(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
$sgpr0 = COPY %7(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
...
|
||||
|
||||
@ -304,7 +304,7 @@ body: |
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
|
||||
; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%0:sgpr(s32) = COPY $sgpr2
|
||||
@ -313,7 +313,7 @@ body: |
|
||||
%5:sgpr(s32) = G_CONSTANT i32 17
|
||||
%6:sgpr(s32) = G_UMIN %4, %5
|
||||
%8:vgpr(s32) = COPY %6(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
%7:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
|
||||
$sgpr0 = COPY %7(s32)
|
||||
SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
|
||||
|
||||
@ -79,8 +79,8 @@ body: |
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: .2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
|
||||
@ -135,8 +135,8 @@ body: |
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: .2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
|
||||
|
||||
@ -15,11 +15,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s1) = G_TRUNC %0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
@ -36,11 +36,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[COPY1]](s1)
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s1) = G_TRUNC %0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
@ -57,11 +57,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s64)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(eq), %0, %1
|
||||
%3:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), %2
|
||||
%3:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
@ -13,9 +13,9 @@ body: |
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -31,8 +31,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -17,9 +17,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1
|
||||
|
||||
...
|
||||
|
||||
@ -13,9 +13,9 @@ body: |
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -31,8 +31,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -16,10 +16,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -37,10 +37,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -56,10 +56,10 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -76,8 +76,8 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.init), %0, %1
|
||||
...
|
||||
|
||||
@ -14,9 +14,9 @@ body: |
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[COPY]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
|
||||
...
|
||||
|
||||
---
|
||||
@ -32,8 +32,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
|
||||
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
|
||||
G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.v), %0
|
||||
...
|
||||
|
||||
|
||||
@ -17,9 +17,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.permute), %0, %1
|
||||
|
||||
...
|
||||
|
||||
@ -15,8 +15,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -15,8 +15,8 @@ body: |
|
||||
; CHECK: liveins: $sgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s1), %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
|
||||
%1:_(s1), %2:_(s32) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
|
||||
|
||||
...
|
||||
|
||||
@ -12,8 +12,8 @@ body: |
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1), [[INT1:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), [[COPY]](s64)
|
||||
%0:_(s64) = COPY $sgpr0_sgpr1
|
||||
%1:_(s1), %2:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
|
||||
%1:_(s1), %2:_(s64) = G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.else), %0
|
||||
|
||||
...
|
||||
|
||||
@ -16,10 +16,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY3]](s32), 1
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
...
|
||||
|
||||
---
|
||||
@ -35,10 +35,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY2]](s32), [[COPY1]](s32), 1
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
...
|
||||
|
||||
---
|
||||
@ -54,10 +54,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY2]](s32), 1
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
...
|
||||
|
||||
---
|
||||
@ -72,8 +72,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), [[COPY]](s32), [[COPY1]](s32), 1
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.fcmp), %0, %1, 1
|
||||
...
|
||||
|
||||
@ -16,10 +16,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY3]](s32), 32
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
...
|
||||
|
||||
---
|
||||
@ -35,10 +35,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY2]](s32), [[COPY1]](s32), 32
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
...
|
||||
|
||||
---
|
||||
@ -54,10 +54,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY2]](s32), 32
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
...
|
||||
|
||||
---
|
||||
@ -72,8 +72,8 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), [[COPY]](s32), [[COPY1]](s32), 32
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
%2:_(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.icmp), %0, %1, 32
|
||||
...
|
||||
|
||||
@ -138,8 +138,8 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
|
||||
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: {{ $}}
|
||||
; FAST-NEXT: bb.3:
|
||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -199,8 +199,8 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
|
||||
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: {{ $}}
|
||||
; GREEDY-NEXT: bb.3:
|
||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -268,8 +268,8 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
|
||||
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: {{ $}}
|
||||
; FAST-NEXT: bb.3:
|
||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -330,8 +330,8 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
|
||||
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: {{ $}}
|
||||
; GREEDY-NEXT: bb.3:
|
||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -159,8 +159,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
|
||||
; FAST-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: {{ $}}
|
||||
; FAST-NEXT: bb.3:
|
||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -224,8 +224,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
|
||||
; GREEDY-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: {{ $}}
|
||||
; GREEDY-NEXT: bb.3:
|
||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -288,8 +288,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
|
||||
; FAST-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; FAST-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; FAST-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: {{ $}}
|
||||
; FAST-NEXT: bb.3:
|
||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -345,8 +345,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
|
||||
; GREEDY-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GREEDY-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GREEDY-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: {{ $}}
|
||||
; GREEDY-NEXT: bb.3:
|
||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -429,8 +429,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
|
||||
; FAST-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
|
||||
; FAST-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
|
||||
; FAST-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
|
||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; FAST-NEXT: {{ $}}
|
||||
; FAST-NEXT: bb.3:
|
||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -506,8 +506,8 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
|
||||
; GREEDY-NEXT: [[AND3:%[0-9]+]]:vcc(s1) = G_AND [[AND2]], [[ICMP4]]
|
||||
; GREEDY-NEXT: [[ICMP5:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV23]](s64), [[UV21]]
|
||||
; GREEDY-NEXT: [[AND4:%[0-9]+]]:vcc(s1) = G_AND [[AND3]], [[ICMP5]]
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND4]](s1)
|
||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GREEDY-NEXT: {{ $}}
|
||||
; GREEDY-NEXT: bb.3:
|
||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -16,7 +16,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_32x32x4bf16_1k_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
@ -24,12 +24,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_16x16x4bf16_1k_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
@ -55,12 +55,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -78,7 +78,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_4x4x4bf16_1k_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
@ -86,12 +86,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -109,7 +109,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_32x32x8bf16_1k_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
@ -117,12 +117,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -140,7 +140,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_16x16x16bf16_1k_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
@ -148,12 +148,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16bf16.1k), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -171,7 +171,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f64_16x16x4f64_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
@ -179,12 +179,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<8 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INT]](<8 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
%3:_(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<8 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.16x16x4f64), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
|
||||
...
|
||||
|
||||
@ -202,7 +202,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f64_4x4x4f64_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1
|
||||
@ -210,11 +210,11 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<2 x s32>) = COPY $agpr0_agpr1
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<2 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1 = COPY [[INT]](<2 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $agpr0_agpr1
|
||||
%3:_(<2 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<2 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f64.4x4x4f64), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1 = COPY %3
|
||||
...
|
||||
|
||||
@ -16,7 +16,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_i32_16x16x32_i8_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
@ -24,12 +24,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x32.i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -47,7 +47,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_i32_32x32x16_i8_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
@ -55,12 +55,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x16.i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -78,7 +78,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_16x16x8_xf32_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3
|
||||
@ -86,12 +86,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8.xf32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -109,7 +109,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: mfma_f32_32x32x4_xf32_vva
|
||||
; GREEDY: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
@ -117,12 +117,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), [[COPY]](s64), [[COPY1]](s64), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s64) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4.xf32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -141,7 +141,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_f32_16x16x32_f16_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
|
||||
@ -150,13 +150,13 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.f16), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
|
||||
...
|
||||
|
||||
@ -175,7 +175,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_f32_32x32x16_f16_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
|
||||
@ -184,13 +184,13 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.f16), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
|
||||
...
|
||||
|
||||
@ -209,7 +209,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_f32_16x16x32_bf16_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
|
||||
@ -218,13 +218,13 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.16x16x32.bf16), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
|
||||
...
|
||||
|
||||
@ -243,7 +243,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_f32_32x32x16_bf16_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
|
||||
@ -252,13 +252,13 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.f32.32x32x16.bf16), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
|
||||
...
|
||||
|
||||
@ -277,7 +277,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_i32_16x16x64_i8_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, $vgpr20
|
||||
@ -286,13 +286,13 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<4 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.16x16x64.i8), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
|
||||
...
|
||||
|
||||
@ -311,7 +311,7 @@ body: |
|
||||
; FAST-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; FAST-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; FAST-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
; GREEDY-LABEL: name: smfmac_i32_32x32x32_i8_vva
|
||||
; GREEDY: liveins: $vgpr1_vgpr2, $vgpr2_vgpr3_vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr20
|
||||
@ -320,12 +320,12 @@ body: |
|
||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr20
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), [[COPY]](s64), [[COPY1]](s128), [[COPY2]](<16 x s32>), [[COPY3]](s32), 0, 0
|
||||
; GREEDY-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s64) = COPY $vgpr0_vgpr1
|
||||
%1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(s32) = COPY $vgpr20
|
||||
%4:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
|
||||
%4:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.smfmac.i32.32x32x32.i8), %0, %1, %2, %3, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %4
|
||||
...
|
||||
|
||||
@ -16,12 +16,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -42,12 +42,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -65,12 +65,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -91,12 +91,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -114,12 +114,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -140,12 +140,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -163,12 +163,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -189,12 +189,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -212,12 +212,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -238,12 +238,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -261,12 +261,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -287,12 +287,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -310,12 +310,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -336,12 +336,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -359,12 +359,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -385,12 +385,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -408,12 +408,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -434,12 +434,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -457,12 +457,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -483,12 +483,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<4 x s16>) = COPY $sgpr32_sgpr33
|
||||
%1:_(<4 x s16>) = COPY $sgpr34_sgpr35
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -506,12 +506,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -532,12 +532,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -555,12 +555,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -581,12 +581,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -604,12 +604,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -630,12 +630,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -653,12 +653,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -679,12 +679,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -702,12 +702,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -728,12 +728,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(s32) = COPY $sgpr32
|
||||
%1:_(s32) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -751,12 +751,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -777,12 +777,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
|
||||
%3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<32 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3
|
||||
...
|
||||
|
||||
@ -800,12 +800,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -826,12 +826,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -849,12 +849,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -875,12 +875,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -898,12 +898,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -924,12 +924,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<16 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
|
||||
...
|
||||
|
||||
@ -947,12 +947,12 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr2
|
||||
%2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -973,11 +973,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
|
||||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0
|
||||
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $sgpr32
|
||||
%1:_(<2 x s16>) = COPY $sgpr33
|
||||
%2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
%3:_(<4 x s32>) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
|
||||
...
|
||||
|
||||
@ -81,8 +81,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -128,8 +128,8 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -187,8 +187,8 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -81,8 +81,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -128,8 +128,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %20, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY5]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -187,8 +187,8 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -14,9 +14,9 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
...
|
||||
|
||||
---
|
||||
@ -30,7 +30,7 @@ body: |
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), %0
|
||||
...
|
||||
|
||||
@ -15,10 +15,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -33,10 +33,10 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -52,10 +52,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -72,10 +72,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -93,11 +93,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: S_ENDPGM 0, implicit [[INT]](s32)
|
||||
%0:_(s32) = COPY $agpr0
|
||||
%1:_(s32) = COPY $agpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
@ -114,10 +114,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $agpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -135,10 +135,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY3]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $agpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
---
|
||||
@ -155,8 +155,8 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr(s32) = COPY $agpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY2]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $agpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readlane), %0, %1
|
||||
...
|
||||
|
||||
@ -16,8 +16,8 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse
|
||||
; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
|
||||
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32))
|
||||
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32)
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
|
||||
%val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret i32 %val
|
||||
@ -37,11 +37,11 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg
|
||||
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4)
|
||||
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>)
|
||||
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
|
||||
%val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <2 x i32> %val
|
||||
@ -61,14 +61,14 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
|
||||
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4)
|
||||
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
|
||||
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
|
||||
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
|
||||
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <3 x i32> %val
|
||||
@ -88,29 +88,29 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg
|
||||
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4)
|
||||
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
|
||||
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
|
||||
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
|
||||
; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
|
||||
; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
|
||||
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
|
||||
; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
|
||||
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
|
||||
; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
|
||||
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
|
||||
; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
|
||||
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
|
||||
%val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <8 x i32> %val
|
||||
@ -130,53 +130,53 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr
|
||||
; GFX7-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4)
|
||||
; GFX7-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>)
|
||||
; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32)
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INT]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32)
|
||||
; GFX7-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
|
||||
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32)
|
||||
; GFX7-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INT1]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32)
|
||||
; GFX7-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
|
||||
; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32)
|
||||
; GFX7-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INT2]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32)
|
||||
; GFX7-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
|
||||
; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32)
|
||||
; GFX7-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; GFX7-NEXT: $sgpr3 = COPY [[INT3]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32)
|
||||
; GFX7-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
|
||||
; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32)
|
||||
; GFX7-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; GFX7-NEXT: $sgpr4 = COPY [[INT4]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
|
||||
; GFX7-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
|
||||
; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32)
|
||||
; GFX7-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; GFX7-NEXT: $sgpr5 = COPY [[INT5]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32)
|
||||
; GFX7-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
|
||||
; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32)
|
||||
; GFX7-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; GFX7-NEXT: $sgpr6 = COPY [[INT6]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32)
|
||||
; GFX7-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
|
||||
; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32)
|
||||
; GFX7-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; GFX7-NEXT: $sgpr7 = COPY [[INT7]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32)
|
||||
; GFX7-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
|
||||
; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32)
|
||||
; GFX7-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
|
||||
; GFX7-NEXT: $sgpr8 = COPY [[INT8]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32)
|
||||
; GFX7-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32)
|
||||
; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32)
|
||||
; GFX7-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
|
||||
; GFX7-NEXT: $sgpr9 = COPY [[INT9]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32)
|
||||
; GFX7-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32)
|
||||
; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32)
|
||||
; GFX7-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
|
||||
; GFX7-NEXT: $sgpr10 = COPY [[INT10]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32)
|
||||
; GFX7-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32)
|
||||
; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32)
|
||||
; GFX7-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
|
||||
; GFX7-NEXT: $sgpr11 = COPY [[INT11]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32)
|
||||
; GFX7-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32)
|
||||
; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32)
|
||||
; GFX7-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
|
||||
; GFX7-NEXT: $sgpr12 = COPY [[INT12]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32)
|
||||
; GFX7-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32)
|
||||
; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32)
|
||||
; GFX7-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
|
||||
; GFX7-NEXT: $sgpr13 = COPY [[INT13]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32)
|
||||
; GFX7-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32)
|
||||
; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32)
|
||||
; GFX7-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
|
||||
; GFX7-NEXT: $sgpr14 = COPY [[INT14]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32)
|
||||
; GFX7-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32)
|
||||
; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32)
|
||||
; GFX7-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
|
||||
; GFX7-NEXT: $sgpr15 = COPY [[INT15]](s32)
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32)
|
||||
; GFX7-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32)
|
||||
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
|
||||
%val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
|
||||
ret <16 x i32> %val
|
||||
@ -887,8 +887,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -939,8 +939,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -993,8 +993,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1045,8 +1045,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1096,8 +1096,8 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1149,8 +1149,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1214,8 +1214,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1277,8 +1277,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1339,8 +1339,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1401,8 +1401,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1463,8 +1463,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
@ -1524,8 +1524,8 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
|
||||
; GFX7-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; GFX7-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; GFX7-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; GFX7-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX7-NEXT: {{ $}}
|
||||
; GFX7-NEXT: bb.3:
|
||||
; GFX7-NEXT: successors: %bb.4, %bb.2
|
||||
|
||||
@ -79,8 +79,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -125,8 +125,8 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -183,8 +183,8 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -81,8 +81,8 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -127,8 +127,8 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -185,8 +185,8 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -79,8 +79,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -125,8 +125,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY6]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -183,8 +183,8 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -81,8 +81,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV6]](s64), [[UV4]]
|
||||
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV7]](s64), [[UV5]]
|
||||
; CHECK-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -127,8 +127,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %19, %bb.3
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
@ -185,8 +185,8 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]]
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND1]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3:
|
||||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
@ -16,10 +16,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0, 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -37,10 +37,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0, 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -58,10 +58,10 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0, 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -78,9 +78,9 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0, 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
%2:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.update.dpp), %0, %1, 0, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
@ -17,11 +17,11 @@ body: |
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY2]](s1)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -37,11 +37,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[ICMP]](s1)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s1) = G_ICMP intpred(ne), %0, %1
|
||||
%3:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %2
|
||||
%3:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -57,8 +57,8 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), [[COPY1]](s1)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s1) = G_TRUNC %0
|
||||
%2:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.wqm.vote), %1
|
||||
%2:_(s1) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wqm.vote), %1
|
||||
...
|
||||
|
||||
@ -16,11 +16,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = COPY $sgpr2
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -36,11 +36,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = COPY $vgpr0
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -57,11 +57,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = COPY $vgpr1
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -79,11 +79,11 @@ body: |
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
@ -100,9 +100,9 @@ body: |
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = COPY $vgpr1
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
%3:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
@ -14,9 +14,9 @@ body: |
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY1]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
|
||||
...
|
||||
|
||||
---
|
||||
@ -30,7 +30,7 @@ body: |
|
||||
; CHECK: liveins: $vgpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), [[COPY]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.strict.wwm), %0
|
||||
%1:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.strict.wwm), %0
|
||||
...
|
||||
|
||||
@ -33,8 +33,8 @@ body: |
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.2
|
||||
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[V_READFIRSTLANE_B32_]](s32), [[COPY1]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: .2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
|
||||
@ -101,8 +101,8 @@ body: |
|
||||
; CHECK-NEXT: [[AND1:%[0-9]+]]:vcc(s1) = G_AND [[AND]], [[ICMP2]]
|
||||
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]]
|
||||
; CHECK-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]]
|
||||
; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1)
|
||||
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: .2:
|
||||
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s
|
||||
|
||||
; GFX6ERR-SDAG: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.gws.sema.release.all
|
||||
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
|
||||
; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store (s32) into custom "GWSResource") (in function: gws_sema_release_all_offset0)
|
||||
|
||||
; GCN-LABEL: {{^}}gws_sema_release_all_offset0:
|
||||
; NOLOOP-DAG: s_mov_b32 m0, 0{{$}}
|
||||
|
||||
@ -165,14 +165,12 @@ TEST_F(AArch64GISelMITest, BuildIntrinsic) {
|
||||
collectCopies(Copies, MF);
|
||||
|
||||
// Make sure DstOp version works. sqrt is just a placeholder intrinsic.
|
||||
B.buildIntrinsic(Intrinsic::sqrt, {S64}, false)
|
||||
.addUse(Copies[0]);
|
||||
B.buildIntrinsic(Intrinsic::sqrt, {S64}).addUse(Copies[0]);
|
||||
|
||||
// Make sure register version works
|
||||
SmallVector<Register, 1> Results;
|
||||
Results.push_back(MRI->createGenericVirtualRegister(S64));
|
||||
B.buildIntrinsic(Intrinsic::sqrt, Results, false)
|
||||
.addUse(Copies[1]);
|
||||
B.buildIntrinsic(Intrinsic::sqrt, Results).addUse(Copies[1]);
|
||||
|
||||
auto CheckStr = R"(
|
||||
; CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY $x0
|
||||
|
||||
@ -498,6 +498,10 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
|
||||
return &Target.getInstruction(Equiv.getValueAsDef("IfFloatingPoint"));
|
||||
}
|
||||
|
||||
if (!Equiv.isValueUnset("IfConvergent") &&
|
||||
N->getIntrinsicInfo(CGP)->isConvergent)
|
||||
return &Target.getInstruction(Equiv.getValueAsDef("IfConvergent"));
|
||||
|
||||
for (const TreePredicateCall &Call : N->getPredicateCalls()) {
|
||||
const TreePredicateFn &Predicate = Call.Fn;
|
||||
if (!Equiv.isValueUnset("IfSignExtend") &&
|
||||
@ -863,7 +867,10 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
|
||||
// Match the used operands (i.e. the children of the operator).
|
||||
bool IsIntrinsic =
|
||||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC" ||
|
||||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS";
|
||||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_W_SIDE_EFFECTS" ||
|
||||
SrcGIOrNull->TheDef->getName() == "G_INTRINSIC_CONVERGENT" ||
|
||||
SrcGIOrNull->TheDef->getName() ==
|
||||
"G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS";
|
||||
const CodeGenIntrinsic *II = Src->getIntrinsicInfo(CGP);
|
||||
if (IsIntrinsic && !II)
|
||||
return failedImport("Expected IntInit containing intrinsic ID)");
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user