This commit is contained in:
Qi Zhao 2025-08-22 16:12:27 +08:00
parent b99249e3f7
commit 6e525512d0

View File

@ -2520,76 +2520,62 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (!IsConstant) { if (!IsConstant) {
// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
// the sub-sequence of the vector and then broadcast the sub-sequence. // the sub-sequence of the vector and then broadcast the sub-sequence.
//
// TODO: If the BUILD_VECTOR contains undef elements, consider falling
// back to use INSERT_VECTOR_ELT to materialize the vector, because it
// generates worse code in some cases. This could be further optimized
// with more consideration.
SmallVector<SDValue> Sequence; SmallVector<SDValue> Sequence;
BitVector UndefElements; BitVector UndefElements;
if (Node->getRepeatedSequence(Sequence, &UndefElements)) { if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
// TODO: If the BUILD_VECTOR contains undef elements, consider falling UndefElements.count() == 0) {
// back to use INSERT_VECTOR_ELT to materialize the vector, because it SDValue Vector = DAG.getUNDEF(ResTy);
// generates worse code in some cases. This could be further optimized SDValue FillVec = Vector;
// with more consideration. EVT FillTy = ResTy;
if (UndefElements.count() == 0) {
unsigned SeqLen = Sequence.size();
SDValue Vector = DAG.getUNDEF(ResTy); // Using LSX instructions to fill the sub-sequence of 256-bits vector,
SDValue FillVec = Vector; // because the high part can be simply treated as undef.
EVT FillTy = ResTy; if (Is256Vec) {
FillTy = ResTy.getHalfNumVectorElementsVT(*DAG.getContext());
// Using LSX instructions to fill the sub-sequence of 256-bits vector, FillVec = DAG.getExtractSubvector(DL, FillTy, Vector, 0);
// because the high part can be simply treated as undef.
if (ResTy.is256BitVector()) {
MVT HalfEltTy;
if (ResTy.isFloatingPoint())
HalfEltTy = MVT::getFloatingPointVT(VT.getScalarSizeInBits());
else
HalfEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits());
EVT HalfTy = MVT::getVectorVT(HalfEltTy, NumElts / 2);
SDValue HalfVec = DAG.getExtractSubvector(DL, HalfTy, Vector, 0);
FillVec = HalfVec;
FillTy = HalfTy;
}
SDValue Op0 = Sequence[0];
if (!Op0.isUndef())
FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
for (unsigned i = 1; i < SeqLen; ++i) {
SDValue Opi = Sequence[i];
if (Opi.isUndef())
continue;
FillVec =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
if (ResTy.is256BitVector())
Vector = DAG.getInsertSubvector(DL, Vector, FillVec, 0);
else
Vector = FillVec;
unsigned SplatLen = NumElts / SeqLen;
MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
// If size of the sub-sequence is half of a 256-bits vector, bitcast the
// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
if (SplatEltTy == MVT::i128)
SplatTy = MVT::v4i64;
SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
SDValue SplatVec;
if (SplatTy.is256BitVector()) {
SplatVec =
DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
: LoongArchISD::XVREPLVE0,
DL, SplatTy, SrcVec);
} else {
SplatVec =
DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
return DAG.getBitcast(ResTy, SplatVec);
} }
SDValue Op0 = Sequence[0];
unsigned SeqLen = Sequence.size();
if (!Op0.isUndef())
FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
for (unsigned i = 1; i < SeqLen; ++i) {
SDValue Opi = Sequence[i];
if (Opi.isUndef())
continue;
FillVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
unsigned SplatLen = NumElts / SeqLen;
MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
// If size of the sub-sequence is half of a 256-bits vector, bitcast the
// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
if (SplatEltTy == MVT::i128)
SplatTy = MVT::v4i64;
SDValue SplatVec;
SDValue SrcVec = DAG.getBitcast(
SplatTy,
Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
if (Is256Vec) {
SplatVec =
DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
: LoongArchISD::XVREPLVE0,
DL, SplatTy, SrcVec);
} else {
SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
return DAG.getBitcast(ResTy, SplatVec);
} }
// Use INSERT_VECTOR_ELT operations rather than expand to stores. // Use INSERT_VECTOR_ELT operations rather than expand to stores.