[PowerPC] Intrinsics and tests for dmr insert/extract (#135653)
Add some intrinsics and LIT tests for PPC dmr insert/extract instructions.
This commit is contained in:
parent
b278aa3197
commit
a903c7b7f5
@ -1677,6 +1677,22 @@ let TargetPrefix = "ppc" in {
|
||||
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty,
|
||||
llvm_v1024i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_dmxxextfdmr512 :
|
||||
DefaultAttrsIntrinsic<[llvm_v256i1_ty, llvm_v256i1_ty], [llvm_v1024i1_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_dmxxinstdmr512 :
|
||||
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
|
||||
llvm_v256i1_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_dmxxextfdmr256 :
|
||||
DefaultAttrsIntrinsic<[llvm_v256i1_ty], [llvm_v1024i1_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_dmxxinstdmr256 :
|
||||
DefaultAttrsIntrinsic<[llvm_v1024i1_ty], [llvm_v1024i1_ty, llvm_v256i1_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
|
||||
defm int_ppc_mma_xvi4ger8 :
|
||||
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
|
||||
|
@ -294,6 +294,10 @@ static inline bool isVFRegister(unsigned Reg) {
|
||||
static inline bool isVRRegister(unsigned Reg) {
|
||||
return Reg >= PPC::V0 && Reg <= PPC::V31;
|
||||
}
|
||||
|
||||
static inline bool isDMRROWpRegister(unsigned Reg) {
|
||||
return Reg >= PPC::DMRROWp0 && Reg <= PPC::DMRROWp31;
|
||||
}
|
||||
} // namespace PPC
|
||||
} // namespace llvm
|
||||
|
||||
|
@ -11146,6 +11146,116 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_mma_dmxxextfdmr512: {
|
||||
assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
|
||||
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
|
||||
"Specify P of 0 or 1 for lower or upper 512 bytes");
|
||||
unsigned HiLo = Idx->getSExtValue();
|
||||
unsigned Opcode;
|
||||
unsigned Subx;
|
||||
if (HiLo == 0) {
|
||||
Opcode = PPC::DMXXEXTFDMR512;
|
||||
Subx = PPC::sub_wacc_lo;
|
||||
} else {
|
||||
Opcode = PPC::DMXXEXTFDMR512_HI;
|
||||
Subx = PPC::sub_wacc_hi;
|
||||
}
|
||||
SDValue Subreg(
|
||||
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
|
||||
Op.getOperand(1),
|
||||
DAG.getTargetConstant(Subx, dl, MVT::i32)),
|
||||
0);
|
||||
EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
|
||||
return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_mma_dmxxextfdmr256: {
|
||||
assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
|
||||
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
|
||||
"Specify a dmr row pair 0-3");
|
||||
unsigned IdxVal = Idx->getSExtValue();
|
||||
unsigned Subx;
|
||||
switch (IdxVal) {
|
||||
case 0:
|
||||
Subx = PPC::sub_dmrrowp0;
|
||||
break;
|
||||
case 1:
|
||||
Subx = PPC::sub_dmrrowp1;
|
||||
break;
|
||||
case 2:
|
||||
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
|
||||
break;
|
||||
case 3:
|
||||
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
|
||||
break;
|
||||
}
|
||||
SDValue Subreg(
|
||||
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
|
||||
Op.getOperand(1),
|
||||
DAG.getTargetConstant(Subx, dl, MVT::i32)),
|
||||
0);
|
||||
SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
|
||||
return SDValue(
|
||||
DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
|
||||
0);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_mma_dmxxinstdmr512: {
|
||||
assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
|
||||
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
|
||||
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
|
||||
"Specify P of 0 or 1 for lower or upper 512 bytes");
|
||||
unsigned HiLo = Idx->getSExtValue();
|
||||
unsigned Opcode;
|
||||
unsigned Subx;
|
||||
if (HiLo == 0) {
|
||||
Opcode = PPC::DMXXINSTDMR512;
|
||||
Subx = PPC::sub_wacc_lo;
|
||||
} else {
|
||||
Opcode = PPC::DMXXINSTDMR512_HI;
|
||||
Subx = PPC::sub_wacc_hi;
|
||||
}
|
||||
SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
|
||||
SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
|
||||
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
|
||||
return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
|
||||
Op.getOperand(1), Wacc, SubReg),
|
||||
0);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_mma_dmxxinstdmr256: {
|
||||
assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
|
||||
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
|
||||
assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
|
||||
"Specify a dmr row pair 0-3");
|
||||
unsigned IdxVal = Idx->getSExtValue();
|
||||
unsigned Subx;
|
||||
switch (IdxVal) {
|
||||
case 0:
|
||||
Subx = PPC::sub_dmrrowp0;
|
||||
break;
|
||||
case 1:
|
||||
Subx = PPC::sub_dmrrowp1;
|
||||
break;
|
||||
case 2:
|
||||
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
|
||||
break;
|
||||
case 3:
|
||||
Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
|
||||
break;
|
||||
}
|
||||
SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
|
||||
SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
|
||||
SDValue Ops[] = {Op.getOperand(2), P};
|
||||
SDValue DMRRowp = SDValue(
|
||||
DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
|
||||
return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
|
||||
Op.getOperand(1), DMRRowp, SubReg),
|
||||
0);
|
||||
}
|
||||
|
||||
case Intrinsic::ppc_mma_xxmfacc:
|
||||
case Intrinsic::ppc_mma_xxmtacc: {
|
||||
// Allow pre-isa-future subtargets to lower as normal.
|
||||
|
@ -196,6 +196,12 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
|
||||
assert(MO.getReg() > PPC::NoRegister &&
|
||||
MO.getReg() < PPC::NUM_TARGET_REGS &&
|
||||
"Invalid register for this target!");
|
||||
// ISA instructions refer to the containing dmr reg.
|
||||
if (PPC::isDMRROWpRegister(MO.getReg())) {
|
||||
OutMO =
|
||||
MCOperand::createReg(PPC::DMR0 + (MO.getReg() - PPC::DMRROWp0) / 4);
|
||||
return true;
|
||||
}
|
||||
// Ignore all implicit register operands.
|
||||
if (MO.isImplicit())
|
||||
return false;
|
||||
|
@ -129,6 +129,248 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @text512(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
|
||||
; CHECK-LABEL: text512:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-NEXT: stxv v2, 16(r4)
|
||||
; CHECK-NEXT: stxv v3, 0(r4)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxv v2, 16(r6)
|
||||
; CHECK-NEXT: stxv v3, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: text512:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r6)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
|
||||
%x = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 0)
|
||||
%p = extractvalue { <256 x i1>, <256 x i1 > } %x, 0
|
||||
store <256 x i1> %p, ptr %rp1, align 16
|
||||
%y = call { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1> %z, i32 1)
|
||||
%q = extractvalue { <256 x i1>, <256 x i1 > } %y, 0
|
||||
store <256 x i1> %q, ptr %rp3, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @text256(ptr %vp1, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
|
||||
; CHECK-LABEL: text256:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 0
|
||||
; CHECK-NEXT: stxv v2, 16(r4)
|
||||
; CHECK-NEXT: stxv v3, 0(r4)
|
||||
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 1
|
||||
; CHECK-NEXT: stxv v2, 16(r5)
|
||||
; CHECK-NEXT: stxv v3, 0(r5)
|
||||
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 2
|
||||
; CHECK-NEXT: stxv v2, 16(r6)
|
||||
; CHECK-NEXT: stxv v3, 0(r6)
|
||||
; CHECK-NEXT: dmxxextfdmr256 vsp34, dmr0, 3
|
||||
; CHECK-NEXT: stxv v2, 16(r7)
|
||||
; CHECK-NEXT: stxv v3, 0(r7)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: text256:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 0
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 1
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r5)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r5)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 2
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r6)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r6)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmr0, 3
|
||||
; CHECK-BE-NEXT: stxv v3, 16(r7)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r7)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
|
||||
%x = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 0)
|
||||
store <256 x i1> %x, ptr %rp1, align 16
|
||||
%q = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 1)
|
||||
store <256 x i1> %q, ptr %rp2, align 16
|
||||
%w = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 2)
|
||||
store <256 x i1> %w, ptr %rp3, align 16
|
||||
%y = call <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1> %z, i32 3)
|
||||
store <256 x i1> %y, ptr %rp4, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @tins512(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2) {
|
||||
; CHECK-LABEL: tins512:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv v2, 16(r3)
|
||||
; CHECK-NEXT: lxv v3, 0(r3)
|
||||
; CHECK-NEXT: lxv v4, 16(r4)
|
||||
; CHECK-NEXT: lxv v5, 0(r4)
|
||||
; CHECK-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r7)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r7)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r7)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r7)
|
||||
; CHECK-NEXT: lxv v2, 16(r5)
|
||||
; CHECK-NEXT: lxv v4, 16(r6)
|
||||
; CHECK-NEXT: lxv v3, 0(r5)
|
||||
; CHECK-NEXT: lxv v5, 0(r6)
|
||||
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r8)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r8)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r8)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r8)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: tins512:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv v4, 0(r4)
|
||||
; CHECK-BE-NEXT: lxv v5, 16(r4)
|
||||
; CHECK-BE-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r5)
|
||||
; CHECK-BE-NEXT: lxv v4, 0(r6)
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r5)
|
||||
; CHECK-BE-NEXT: lxv v5, 16(r6)
|
||||
; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r8)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r8)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r8)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r8)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
|
||||
%l1 = load <256 x i1>, ptr %vp1, align 16
|
||||
%r1 = load <256 x i1>, ptr %vp2, align 16
|
||||
%a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %z, <256 x i1> %l1, <256 x i1> %r1, i32 0)
|
||||
store <1024 x i1> %a, ptr %rp1, align 16
|
||||
%l2 = load <256 x i1>, ptr %vp3, align 16
|
||||
%r2 = load <256 x i1>, ptr %vp4, align 16
|
||||
%b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1> %a, <256 x i1> %l2, <256 x i1> %r2, i32 1)
|
||||
store <1024 x i1> %b, ptr %rp2, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @tins256(ptr %vp1, ptr %vp2, ptr %vp3, ptr %vp4, ptr %rp1, ptr %rp2, ptr %rp3, ptr %rp4) {
|
||||
; CHECK-LABEL: tins256:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv v2, 16(r3)
|
||||
; CHECK-NEXT: lxv v3, 0(r3)
|
||||
; CHECK-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 0
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r7)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r7)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r7)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r7)
|
||||
; CHECK-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 1
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp36, 96(r8)
|
||||
; CHECK-NEXT: stxvp vsp32, 64(r8)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp36, 32(r8)
|
||||
; CHECK-NEXT: stxvp vsp32, 0(r8)
|
||||
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 2
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp36, 96(r9)
|
||||
; CHECK-NEXT: stxvp vsp32, 64(r9)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp36, 32(r9)
|
||||
; CHECK-NEXT: stxvp vsp32, 0(r9)
|
||||
; CHECK-NEXT: dmxxinstdmr256 dmr0, vsp34, 3
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r10)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r10)
|
||||
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r10)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r10)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: tins256:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r3)
|
||||
; CHECK-BE-NEXT: dmsetdmrz dmr0
|
||||
; CHECK-BE-NEXT: dmxxinstdmr256 dmr0, vsp34, 0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: dmxxinstdmr256 dmr0, vsp34, 1
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp32, 96(r8)
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 64(r8)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp32, 32(r8)
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 0(r8)
|
||||
; CHECK-BE-NEXT: dmxxinstdmr256 dmr0, vsp34, 2
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp32, 96(r9)
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 64(r9)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp32, 32(r9)
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 0(r9)
|
||||
; CHECK-BE-NEXT: dmxxinstdmr256 dmr0, vsp34, 3
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r10)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r10)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r10)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r10)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%z = call <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
|
||||
%l1 = load <256 x i1>, ptr %vp1, align 16
|
||||
%a = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %z, <256 x i1> %l1, i32 0)
|
||||
store <1024 x i1> %a, ptr %rp1, align 16
|
||||
%l2 = load <256 x i1>, ptr %vp2, align 16
|
||||
%b = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %a, <256 x i1> %l2, i32 1)
|
||||
store <1024 x i1> %b, ptr %rp2, align 16
|
||||
%l3 = load <256 x i1>, ptr %vp3, align 16
|
||||
%c = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %b, <256 x i1> %l2, i32 2)
|
||||
store <1024 x i1> %c, ptr %rp3, align 16
|
||||
%l4 = load <256 x i1>, ptr %vp4, align 16
|
||||
%d = call <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1> %c, <256 x i1> %l2, i32 3)
|
||||
store <1024 x i1> %d, ptr %rp4, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmsetdmrz()
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmmr(<1024 x i1>)
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxor(<1024 x i1>, <1024 x i1>)
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr512(<1024 x i1>, <256 x i1>, <256 x i1>, i32)
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxxinstdmr256(<1024 x i1>, <256 x i1>, i32)
|
||||
declare { <256 x i1>, <256 x i1> } @llvm.ppc.mma.dmxxextfdmr512(<1024 x i1>, i32)
|
||||
declare <256 x i1> @llvm.ppc.mma.dmxxextfdmr256(<1024 x i1>, i32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user