Compare commits
1 Commits
main
...
users/bjop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c174035432 |
@ -1885,6 +1885,12 @@ LLVM_ABI SDValue peekThroughOneUseBitcasts(SDValue V);
|
||||
/// If \p V is not an extracted subvector, it is returned as-is.
|
||||
LLVM_ABI SDValue peekThroughExtractSubvectors(SDValue V);
|
||||
|
||||
/// Recursively peek through INSERT_VECTOR_ELT nodes, returning the source
|
||||
/// vector operand of \p V, as long as \p V is an INSERT_VECTOR_ELT operation
|
||||
/// that do not insert into any of the demanded vector elts.
|
||||
LLVM_ABI SDValue peekThroughInsertVectorElt(SDValue V,
|
||||
const APInt &DemandedElts);
|
||||
|
||||
/// Return the non-truncated source operand of \p V if it exists.
|
||||
/// If \p V is not a truncation, it is returned as-is.
|
||||
LLVM_ABI SDValue peekThroughTruncates(SDValue V);
|
||||
|
||||
@ -23298,6 +23298,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
||||
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
|
||||
|
||||
// Insert into out-of-bounds element is undefined.
|
||||
// Code below relies on that we handle this special case early.
|
||||
if (IndexC && VT.isFixedLengthVector() &&
|
||||
IndexC->getZExtValue() >= VT.getVectorNumElements())
|
||||
return DAG.getUNDEF(VT);
|
||||
@ -23308,14 +23309,28 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
|
||||
InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
|
||||
return InVec;
|
||||
|
||||
if (!IndexC) {
|
||||
// If this is variable insert to undef vector, it might be better to splat:
|
||||
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
|
||||
if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
|
||||
return DAG.getSplat(VT, DL, InVal);
|
||||
return SDValue();
|
||||
// If this is variable insert to undef vector, it might be better to splat:
|
||||
// inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
|
||||
if (!IndexC && InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
|
||||
return DAG.getSplat(VT, DL, InVal);
|
||||
|
||||
// Try to drop insert of UNDEF/POISON elements. This is also done in getNode,
|
||||
// but we also do it as a DAG combine since for example simplifications into
|
||||
// SPLAT_VECTOR/BUILD_VECTOR may turn poison elements into undef/zero etc, and
|
||||
// then suddenly the InVec is guaranteed to not be poison.
|
||||
if (InVal.isUndef()) {
|
||||
if (IndexC && VT.isFixedLengthVector()) {
|
||||
APInt EltMask = APInt::getOneBitSet(VT.getVectorNumElements(),
|
||||
IndexC->getZExtValue());
|
||||
if (DAG.isGuaranteedNotToBePoison(InVec, EltMask))
|
||||
return InVec;
|
||||
}
|
||||
return DAG.getFreeze(InVec);
|
||||
}
|
||||
|
||||
if (!IndexC)
|
||||
return SDValue();
|
||||
|
||||
if (VT.isScalableVector())
|
||||
return SDValue();
|
||||
|
||||
@ -27799,18 +27814,42 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
|
||||
SDValue N2 = N->getOperand(2);
|
||||
uint64_t InsIdx = N->getConstantOperandVal(2);
|
||||
|
||||
// If inserting an UNDEF, just return the original vector.
|
||||
if (N1.isUndef())
|
||||
return N0;
|
||||
// If inserting an UNDEF, just return the original vector (unless it makes the
|
||||
// result more poisonous).
|
||||
if (N1.isUndef()) {
|
||||
if (N1.getOpcode() == ISD::POISON)
|
||||
return N0;
|
||||
if (VT.isFixedLengthVector()) {
|
||||
unsigned SubVecNumElts = N1.getValueType().getVectorNumElements();
|
||||
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
|
||||
InsIdx + SubVecNumElts);
|
||||
if (DAG.isGuaranteedNotToBePoison(N0, EltMask))
|
||||
return N0;
|
||||
}
|
||||
return DAG.getFreeze(N0);
|
||||
}
|
||||
|
||||
// If this is an insert of an extracted vector into an undef vector, we can
|
||||
// just use the input to the extract if the types match, and can simplify
|
||||
// If this is an insert of an extracted vector into an undef/poison vector, we
|
||||
// can just use the input to the extract if the types match, and can simplify
|
||||
// in some cases even if they don't.
|
||||
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
N1.getOperand(1) == N2) {
|
||||
EVT N1VT = N1.getValueType();
|
||||
EVT SrcVT = N1.getOperand(0).getValueType();
|
||||
if (SrcVT == VT)
|
||||
return N1.getOperand(0);
|
||||
if (SrcVT == VT) {
|
||||
// Need to ensure that result isn't more poisonous if skipping both the
|
||||
// extract+insert.
|
||||
if (N0.getOpcode() == ISD::POISON)
|
||||
return N1.getOperand(0);
|
||||
if (VT.isFixedLengthVector() && N1VT.isFixedLengthVector()) {
|
||||
unsigned SubVecNumElts = N1VT.getVectorNumElements();
|
||||
APInt EltMask = APInt::getBitsSet(VT.getVectorNumElements(), InsIdx,
|
||||
InsIdx + SubVecNumElts);
|
||||
if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0), ~EltMask))
|
||||
return N1.getOperand(0);
|
||||
} else if (DAG.isGuaranteedNotToBePoison(N1.getOperand(0)))
|
||||
return N1.getOperand(0);
|
||||
}
|
||||
// TODO: To remove the zero check, need to adjust the offset to
|
||||
// a multiple of the new src type.
|
||||
if (isNullConstant(N2)) {
|
||||
|
||||
@ -5513,8 +5513,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
|
||||
APInt InVecDemandedElts = DemandedElts;
|
||||
InVecDemandedElts.clearBit(IndexC->getZExtValue());
|
||||
if (!!InVecDemandedElts &&
|
||||
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
|
||||
PoisonOnly, Depth + 1))
|
||||
!isGuaranteedNotToBeUndefOrPoison(
|
||||
peekThroughInsertVectorElt(InVec, InVecDemandedElts),
|
||||
InVecDemandedElts, PoisonOnly, Depth + 1))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -8215,23 +8216,42 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
|
||||
// for scalable vectors where we will generate appropriate code to
|
||||
// deal with out-of-bounds cases correctly.
|
||||
if (N3C && N1.getValueType().isFixedLengthVector() &&
|
||||
N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
|
||||
if (N3C && VT.isFixedLengthVector() &&
|
||||
N3C->getZExtValue() >= VT.getVectorNumElements())
|
||||
return getUNDEF(VT);
|
||||
|
||||
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
|
||||
if (N3.isUndef())
|
||||
return getUNDEF(VT);
|
||||
|
||||
// If the inserted element is an UNDEF, just use the input vector.
|
||||
if (N2.isUndef())
|
||||
// If inserting poison, just use the input vector.
|
||||
if (N2.getOpcode() == ISD::POISON)
|
||||
return N1;
|
||||
|
||||
// Inserting undef into undef/poison is still undef.
|
||||
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
|
||||
return getUNDEF(VT);
|
||||
|
||||
// If the inserted element is an UNDEF, just use the input vector.
|
||||
// But not if skipping the insert could make the result more poisonous.
|
||||
if (N2.isUndef()) {
|
||||
if (N3C && VT.isFixedLengthVector()) {
|
||||
APInt EltMask =
|
||||
APInt::getOneBitSet(VT.getVectorNumElements(), N3C->getZExtValue());
|
||||
if (isGuaranteedNotToBePoison(N1, EltMask))
|
||||
return N1;
|
||||
} else if (isGuaranteedNotToBePoison(N1))
|
||||
return N1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
// Inserting undef into undef is still undef.
|
||||
if (N1.isUndef() && N2.isUndef())
|
||||
// If inserting poison, just use the input vector,
|
||||
if (N2.getOpcode() == ISD::POISON)
|
||||
return N1;
|
||||
|
||||
// Inserting undef into undef/poison is still undef.
|
||||
if (N2.getOpcode() == ISD::UNDEF && N1.isUndef())
|
||||
return getUNDEF(VT);
|
||||
|
||||
EVT N2VT = N2.getValueType();
|
||||
@ -8260,11 +8280,37 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
if (VT == N2VT)
|
||||
return N2;
|
||||
|
||||
// If this is an insert of an extracted vector into an undef vector, we
|
||||
// can just use the input to the extract.
|
||||
// If this is an insert of an extracted vector into an undef/poison vector,
|
||||
// we can just use the input to the extract. But not if skipping the
|
||||
// extract+insert could make the result more poisonous.
|
||||
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
|
||||
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
|
||||
return N2.getOperand(0);
|
||||
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) {
|
||||
if (N1.getOpcode() == ISD::POISON)
|
||||
return N2.getOperand(0);
|
||||
if (VT.isFixedLengthVector() && N2VT.isFixedLengthVector()) {
|
||||
unsigned LoBit = N3->getAsZExtVal();
|
||||
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
|
||||
APInt EltMask =
|
||||
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
|
||||
if (isGuaranteedNotToBePoison(N2.getOperand(0), ~EltMask))
|
||||
return N2.getOperand(0);
|
||||
} else if (isGuaranteedNotToBePoison(N2.getOperand(0)))
|
||||
return N2.getOperand(0);
|
||||
}
|
||||
|
||||
// If the inserted subvector is UNDEF, just use the input vector.
|
||||
// But not if skipping the insert could make the result more poisonous.
|
||||
if (N2.isUndef()) {
|
||||
if (VT.isFixedLengthVector()) {
|
||||
unsigned LoBit = N3->getAsZExtVal();
|
||||
unsigned HiBit = LoBit + N2VT.getVectorNumElements();
|
||||
APInt EltMask =
|
||||
APInt::getBitsSet(VT.getVectorNumElements(), LoBit, HiBit);
|
||||
if (isGuaranteedNotToBePoison(N1, EltMask))
|
||||
return N1;
|
||||
} else if (isGuaranteedNotToBePoison(N1))
|
||||
return N1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::BITCAST:
|
||||
@ -12729,6 +12775,23 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue llvm::peekThroughInsertVectorElt(SDValue V, const APInt &DemandedElts) {
|
||||
while (V.getOpcode() == ISD::INSERT_VECTOR_ELT) {
|
||||
SDValue InVec = V.getOperand(0);
|
||||
SDValue EltNo = V.getOperand(2);
|
||||
EVT VT = InVec.getValueType();
|
||||
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
|
||||
if (IndexC && VT.isFixedLengthVector() &&
|
||||
IndexC->getAPIntValue().ult(VT.getVectorNumElements()) &&
|
||||
!DemandedElts[IndexC->getZExtValue()]) {
|
||||
V = InVec;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue llvm::peekThroughTruncates(SDValue V) {
|
||||
while (V.getOpcode() == ISD::TRUNCATE)
|
||||
V = V.getOperand(0);
|
||||
|
||||
@ -3446,8 +3446,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
break;
|
||||
}
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
// Demand any elements from the subvector and the remainder from the src its
|
||||
// inserted into.
|
||||
// Demand any elements from the subvector and the remainder from the src it
|
||||
// is inserted into.
|
||||
SDValue Src = Op.getOperand(0);
|
||||
SDValue Sub = Op.getOperand(1);
|
||||
uint64_t Idx = Op.getConstantOperandVal(2);
|
||||
@ -3456,6 +3456,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
||||
APInt DemandedSrcElts = DemandedElts;
|
||||
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
|
||||
|
||||
// If none of the sub operand elements are demanded, bypass the insert.
|
||||
if (!DemandedSubElts)
|
||||
return TLO.CombineTo(Op, Src);
|
||||
|
||||
APInt SubUndef, SubZero;
|
||||
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
|
||||
Depth + 1))
|
||||
|
||||
@ -15357,7 +15357,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue V = Op.getOperand(i);
|
||||
SDValue LaneIdx = DAG.getConstant(i, DL, MVT::i64);
|
||||
if (!isIntOrFPConstant(V))
|
||||
if (!isIntOrFPConstant(V) && !V.isUndef())
|
||||
// Note that type legalization likely mucked about with the VT of the
|
||||
// source operand, so we may have to convert it here before inserting.
|
||||
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Val, V, LaneIdx);
|
||||
|
||||
@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
|
||||
; CHECK-LABEL: combine_undef_add_8xi32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov s1, w0
|
||||
; CHECK-NEXT: movi v0.2d, #0000000000000000
|
||||
; CHECK-NEXT: dup v0.4s, w8
|
||||
; CHECK-NEXT: mov v1.s[1], w1
|
||||
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
|
||||
; CHECK-NEXT: mov v1.s[2], w2
|
||||
; CHECK-NEXT: mov v1.s[3], w3
|
||||
; CHECK-NEXT: xtn v2.4h, v1.4s
|
||||
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
|
||||
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
|
||||
; CHECK-NEXT: mov v1.d[1], v0.d[0]
|
||||
; CHECK-NEXT: uaddlv s0, v1.8h
|
||||
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
|
||||
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
|
||||
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
|
||||
; CHECK-NEXT: uaddlv s0, v0.8h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0
|
||||
|
||||
@ -1198,11 +1198,15 @@ define void @masked_gather_passthru(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #
|
||||
; CHECK-NEXT: ptrue p0.s, vl32
|
||||
; CHECK-NEXT: ptrue p2.d, vl32
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
|
||||
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
|
||||
; CHECK-NEXT: ld1d { z0.d }, p2/z, [x1]
|
||||
; CHECK-NEXT: punpklo p2.h, p1.b
|
||||
; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ld1w { z0.d }, p2/z, [z0.d]
|
||||
; CHECK-NEXT: and z1.s, z1.s, #0x1
|
||||
; CHECK-NEXT: cmpne p1.s, p1/z, z1.s, #0
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING1
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING2
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN
|
||||
; Check that the default value enables the web folding and
|
||||
@ -8,20 +8,35 @@
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
|
||||
|
||||
define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) {
|
||||
; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users:
|
||||
; NO_FOLDING: # %bb.0:
|
||||
; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
|
||||
; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
|
||||
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
|
||||
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
|
||||
; NO_FOLDING-NEXT: vse32.v v10, (a0)
|
||||
; NO_FOLDING-NEXT: vse32.v v11, (a1)
|
||||
; NO_FOLDING-NEXT: vse32.v v8, (a2)
|
||||
; NO_FOLDING-NEXT: ret
|
||||
; NO_FOLDING1-LABEL: vfwmul_v2f116_multiple_users:
|
||||
; NO_FOLDING1: # %bb.0:
|
||||
; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
|
||||
; NO_FOLDING1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
|
||||
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
|
||||
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
|
||||
; NO_FOLDING1-NEXT: vse32.v v10, (a0)
|
||||
; NO_FOLDING1-NEXT: vse32.v v11, (a1)
|
||||
; NO_FOLDING1-NEXT: vse32.v v8, (a2)
|
||||
; NO_FOLDING1-NEXT: ret
|
||||
;
|
||||
; NO_FOLDING2-LABEL: vfwmul_v2f116_multiple_users:
|
||||
; NO_FOLDING2: # %bb.0:
|
||||
; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
|
||||
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
|
||||
; NO_FOLDING2-NEXT: vse32.v v9, (a0)
|
||||
; NO_FOLDING2-NEXT: vse32.v v11, (a1)
|
||||
; NO_FOLDING2-NEXT: vse32.v v8, (a2)
|
||||
; NO_FOLDING2-NEXT: ret
|
||||
;
|
||||
; ZVFH-LABEL: vfwmul_v2f116_multiple_users:
|
||||
; ZVFH: # %bb.0:
|
||||
@ -61,20 +76,35 @@ define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a,
|
||||
}
|
||||
|
||||
define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
|
||||
; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users:
|
||||
; NO_FOLDING: # %bb.0:
|
||||
; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
|
||||
; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
|
||||
; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
|
||||
; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
|
||||
; NO_FOLDING-NEXT: vse64.v v10, (a0)
|
||||
; NO_FOLDING-NEXT: vse64.v v11, (a1)
|
||||
; NO_FOLDING-NEXT: vse64.v v8, (a2)
|
||||
; NO_FOLDING-NEXT: ret
|
||||
; NO_FOLDING1-LABEL: vfwmul_v2f32_multiple_users:
|
||||
; NO_FOLDING1: # %bb.0:
|
||||
; NO_FOLDING1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
|
||||
; NO_FOLDING1-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
|
||||
; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
|
||||
; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
|
||||
; NO_FOLDING1-NEXT: vse64.v v10, (a0)
|
||||
; NO_FOLDING1-NEXT: vse64.v v11, (a1)
|
||||
; NO_FOLDING1-NEXT: vse64.v v8, (a2)
|
||||
; NO_FOLDING1-NEXT: ret
|
||||
;
|
||||
; NO_FOLDING2-LABEL: vfwmul_v2f32_multiple_users:
|
||||
; NO_FOLDING2: # %bb.0:
|
||||
; NO_FOLDING2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
|
||||
; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e64, m1, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
|
||||
; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
|
||||
; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
|
||||
; NO_FOLDING2-NEXT: vse64.v v9, (a0)
|
||||
; NO_FOLDING2-NEXT: vse64.v v11, (a1)
|
||||
; NO_FOLDING2-NEXT: vse64.v v8, (a2)
|
||||
; NO_FOLDING2-NEXT: ret
|
||||
;
|
||||
; FOLDING-LABEL: vfwmul_v2f32_multiple_users:
|
||||
; FOLDING: # %bb.0:
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING1
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING1
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING2
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING2
|
||||
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING
|
||||
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING
|
||||
; Check that the default value enables the web folding and
|
||||
@ -16,21 +16,38 @@
|
||||
; We need the web size to be at least 3 for the folding to happen, because
|
||||
; %c has 3 uses.
|
||||
define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
|
||||
; NO_FOLDING-LABEL: vwmul_v2i16_multiple_users:
|
||||
; NO_FOLDING: # %bb.0:
|
||||
; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING-NEXT: vle8.v v8, (a0)
|
||||
; NO_FOLDING-NEXT: vle8.v v9, (a1)
|
||||
; NO_FOLDING-NEXT: vle8.v v10, (a2)
|
||||
; NO_FOLDING-NEXT: vsext.vf2 v11, v8
|
||||
; NO_FOLDING-NEXT: vsext.vf2 v8, v9
|
||||
; NO_FOLDING-NEXT: vsext.vf2 v9, v10
|
||||
; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
|
||||
; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
|
||||
; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
|
||||
; NO_FOLDING-NEXT: vor.vv v8, v8, v10
|
||||
; NO_FOLDING-NEXT: vor.vv v8, v8, v9
|
||||
; NO_FOLDING-NEXT: ret
|
||||
; NO_FOLDING1-LABEL: vwmul_v2i16_multiple_users:
|
||||
; NO_FOLDING1: # %bb.0:
|
||||
; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING1-NEXT: vle8.v v8, (a0)
|
||||
; NO_FOLDING1-NEXT: vle8.v v9, (a1)
|
||||
; NO_FOLDING1-NEXT: vle8.v v10, (a2)
|
||||
; NO_FOLDING1-NEXT: vsext.vf2 v11, v8
|
||||
; NO_FOLDING1-NEXT: vsext.vf2 v8, v9
|
||||
; NO_FOLDING1-NEXT: vsext.vf2 v9, v10
|
||||
; NO_FOLDING1-NEXT: vmul.vv v8, v11, v8
|
||||
; NO_FOLDING1-NEXT: vadd.vv v10, v11, v9
|
||||
; NO_FOLDING1-NEXT: vsub.vv v9, v11, v9
|
||||
; NO_FOLDING1-NEXT: vor.vv v8, v8, v10
|
||||
; NO_FOLDING1-NEXT: vor.vv v8, v8, v9
|
||||
; NO_FOLDING1-NEXT: ret
|
||||
;
|
||||
; NO_FOLDING2-LABEL: vwmul_v2i16_multiple_users:
|
||||
; NO_FOLDING2: # %bb.0:
|
||||
; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
|
||||
; NO_FOLDING2-NEXT: vle8.v v8, (a0)
|
||||
; NO_FOLDING2-NEXT: vle8.v v9, (a1)
|
||||
; NO_FOLDING2-NEXT: vle8.v v10, (a2)
|
||||
; NO_FOLDING2-NEXT: vsext.vf2 v11, v8
|
||||
; NO_FOLDING2-NEXT: vsext.vf2 v8, v9
|
||||
; NO_FOLDING2-NEXT: vmul.vv v8, v11, v8
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
|
||||
; NO_FOLDING2-NEXT: vwadd.wv v9, v11, v10
|
||||
; NO_FOLDING2-NEXT: vwsub.wv v11, v11, v10
|
||||
; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
|
||||
; NO_FOLDING2-NEXT: vor.vv v8, v8, v9
|
||||
; NO_FOLDING2-NEXT: vor.vv v8, v8, v11
|
||||
; NO_FOLDING2-NEXT: ret
|
||||
;
|
||||
; FOLDING-LABEL: vwmul_v2i16_multiple_users:
|
||||
; FOLDING: # %bb.0:
|
||||
|
||||
@ -3572,45 +3572,53 @@ define void @SpinningCube() {
|
||||
; SSE2-LABEL: SpinningCube:
|
||||
; SSE2: # %bb.0: # %entry
|
||||
; SSE2-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
|
||||
; SSE2-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm2 = [u,u,-2.0E+0,u]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSE2-NEXT: xorps %xmm3, %xmm3
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
|
||||
; SSE2-NEXT: addps %xmm3, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, (%rax)
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
|
||||
; SSE2-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSE2-NEXT: addps %xmm0, %xmm1
|
||||
; SSE2-NEXT: movaps %xmm1, (%rax)
|
||||
; SSE2-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,1]
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm3 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSE2-NEXT: movapd {{.*#+}} xmm4 = [u,u,-2.0E+0,u]
|
||||
; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,0]
|
||||
; SSE2-NEXT: movq {{.*#+}} xmm3 = xmm3[0],zero
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm1[2,0]
|
||||
; SSE2-NEXT: addps %xmm0, %xmm3
|
||||
; SSE2-NEXT: movaps %xmm3, (%rax)
|
||||
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
; SSE2-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSE2-NEXT: addps %xmm2, %xmm0
|
||||
; SSE2-NEXT: movaps %xmm0, (%rax)
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSSE3-LABEL: SpinningCube:
|
||||
; SSSE3: # %bb.0: # %entry
|
||||
; SSSE3-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
|
||||
; SSSE3-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSSE3-NEXT: movapd {{.*#+}} xmm2 = [u,u,-2.0E+0,u]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
|
||||
; SSSE3-NEXT: xorps %xmm3, %xmm3
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
|
||||
; SSSE3-NEXT: addps %xmm3, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, (%rax)
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,2]
|
||||
; SSSE3-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; SSSE3-NEXT: addps %xmm0, %xmm1
|
||||
; SSSE3-NEXT: movaps %xmm1, (%rax)
|
||||
; SSSE3-NEXT: xorps %xmm0, %xmm0
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,1]
|
||||
; SSSE3-NEXT: xorps %xmm2, %xmm2
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm3 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSSE3-NEXT: movapd {{.*#+}} xmm4 = [u,u,-2.0E+0,u]
|
||||
; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,0]
|
||||
; SSSE3-NEXT: movq {{.*#+}} xmm3 = xmm3[0],zero
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm1[2,0]
|
||||
; SSSE3-NEXT: addps %xmm0, %xmm3
|
||||
; SSSE3-NEXT: movaps %xmm3, (%rax)
|
||||
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,2]
|
||||
; SSSE3-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
|
||||
; SSSE3-NEXT: addps %xmm2, %xmm0
|
||||
; SSSE3-NEXT: movaps %xmm0, (%rax)
|
||||
; SSSE3-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: SpinningCube:
|
||||
; SSE41: # %bb.0: # %entry
|
||||
; SSE41-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [u,u,u,1.0E+0]
|
||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,mem[0]
|
||||
; SSE41-NEXT: movaps {{.*#+}} xmm1 = [0.0E+0,0.0E+0,-2.0E+0,u]
|
||||
; SSE41-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; SSE41-NEXT: movaps %xmm1, %xmm3
|
||||
@ -3629,7 +3637,7 @@ define void @SpinningCube() {
|
||||
; AVX-LABEL: SpinningCube:
|
||||
; AVX: # %bb.0: # %entry
|
||||
; AVX-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
|
||||
; AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,0.0E+0,1.0E+0]
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [0.0E+0,0.0E+0,-2.0E+0,u]
|
||||
; AVX-NEXT: vmovss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
|
||||
; AVX-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user