This commit is contained in:
Qi Zhao 2025-08-22 16:12:27 +08:00
parent b99249e3f7
commit 6e525512d0

View File

@ -2520,52 +2520,38 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (!IsConstant) {
// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
// the sub-sequence of the vector and then broadcast the sub-sequence.
SmallVector<SDValue> Sequence;
BitVector UndefElements;
if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
//
// TODO: If the BUILD_VECTOR contains undef elements, consider falling
// back to use INSERT_VECTOR_ELT to materialize the vector, because it
// generates worse code in some cases. This could be further optimized
// with more consideration.
if (UndefElements.count() == 0) {
unsigned SeqLen = Sequence.size();
SmallVector<SDValue> Sequence;
BitVector UndefElements;
if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
UndefElements.count() == 0) {
SDValue Vector = DAG.getUNDEF(ResTy);
SDValue FillVec = Vector;
EVT FillTy = ResTy;
// Using LSX instructions to fill the sub-sequence of 256-bits vector,
// because the high part can be simply treated as undef.
if (ResTy.is256BitVector()) {
MVT HalfEltTy;
if (ResTy.isFloatingPoint())
HalfEltTy = MVT::getFloatingPointVT(VT.getScalarSizeInBits());
else
HalfEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits());
EVT HalfTy = MVT::getVectorVT(HalfEltTy, NumElts / 2);
SDValue HalfVec = DAG.getExtractSubvector(DL, HalfTy, Vector, 0);
FillVec = HalfVec;
FillTy = HalfTy;
if (Is256Vec) {
FillTy = ResTy.getHalfNumVectorElementsVT(*DAG.getContext());
FillVec = DAG.getExtractSubvector(DL, FillTy, Vector, 0);
}
SDValue Op0 = Sequence[0];
unsigned SeqLen = Sequence.size();
if (!Op0.isUndef())
FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
for (unsigned i = 1; i < SeqLen; ++i) {
SDValue Opi = Sequence[i];
if (Opi.isUndef())
continue;
FillVec =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
FillVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
}
if (ResTy.is256BitVector())
Vector = DAG.getInsertSubvector(DL, Vector, FillVec, 0);
else
Vector = FillVec;
unsigned SplatLen = NumElts / SeqLen;
MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
@ -2575,22 +2561,22 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
if (SplatEltTy == MVT::i128)
SplatTy = MVT::v4i64;
SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
SDValue SplatVec;
if (SplatTy.is256BitVector()) {
SDValue SrcVec = DAG.getBitcast(
SplatTy,
Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
if (Is256Vec) {
SplatVec =
DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
: LoongArchISD::XVREPLVE0,
DL, SplatTy, SrcVec);
} else {
SplatVec =
DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
return DAG.getBitcast(ResTy, SplatVec);
}
}
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
// The resulting code is the same length as the expansion, but it doesn't