update

2025-08-22 16:12:27 +08:00 · 2025-08-22 16:12:27 +08:00 · 6e525512d0
commit 6e525512d0
parent b99249e3f7
1 changed files with 52 additions and 66 deletions
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@ -2520,52 +2520,38 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
  if (!IsConstant) {
    // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
    // the sub-sequence of the vector and then broadcast the sub-sequence.
-    SmallVector<SDValue> Sequence;
-    BitVector UndefElements;
-    if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
+    //
    // TODO: If the BUILD_VECTOR contains undef elements, consider falling
    // back to use INSERT_VECTOR_ELT to materialize the vector, because it
    // generates worse code in some cases. This could be further optimized
    // with more consideration.
-      if (UndefElements.count() == 0) {
-        unsigned SeqLen = Sequence.size();
-
+    SmallVector<SDValue> Sequence;
+    BitVector UndefElements;
+    if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
+        UndefElements.count() == 0) {
      SDValue Vector = DAG.getUNDEF(ResTy);
      SDValue FillVec = Vector;
      EVT FillTy = ResTy;

      // Using LSX instructions to fill the sub-sequence of 256-bits vector,
      // because the high part can be simply treated as undef.
-        if (ResTy.is256BitVector()) {
-          MVT HalfEltTy;
-          if (ResTy.isFloatingPoint())
-            HalfEltTy = MVT::getFloatingPointVT(VT.getScalarSizeInBits());
-          else
-            HalfEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits());
-          EVT HalfTy = MVT::getVectorVT(HalfEltTy, NumElts / 2);
-          SDValue HalfVec = DAG.getExtractSubvector(DL, HalfTy, Vector, 0);
-
-          FillVec = HalfVec;
-          FillTy = HalfTy;
+      if (Is256Vec) {
+        FillTy = ResTy.getHalfNumVectorElementsVT(*DAG.getContext());
+        FillVec = DAG.getExtractSubvector(DL, FillTy, Vector, 0);
      }

      SDValue Op0 = Sequence[0];
+      unsigned SeqLen = Sequence.size();
      if (!Op0.isUndef())
        FillVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, FillTy, Op0);
      for (unsigned i = 1; i < SeqLen; ++i) {
        SDValue Opi = Sequence[i];
        if (Opi.isUndef())
          continue;
-          FillVec =
-              DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
+        FillVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, FillTy, FillVec, Opi,
                              DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
      }

-        if (ResTy.is256BitVector())
-          Vector = DAG.getInsertSubvector(DL, Vector, FillVec, 0);
-        else
-          Vector = FillVec;
-
      unsigned SplatLen = NumElts / SeqLen;
      MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
      MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
@ -2575,22 +2561,22 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
      if (SplatEltTy == MVT::i128)
        SplatTy = MVT::v4i64;

-        SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
      SDValue SplatVec;
-        if (SplatTy.is256BitVector()) {
+      SDValue SrcVec = DAG.getBitcast(
+          SplatTy,
+          Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
+      if (Is256Vec) {
        SplatVec =
            DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
                                                  : LoongArchISD::XVREPLVE0,
                        DL, SplatTy, SrcVec);
      } else {
-          SplatVec =
-              DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
+        SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
                               DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
      }

      return DAG.getBitcast(ResTy, SplatVec);
    }
-    }

    // Use INSERT_VECTOR_ELT operations rather than expand to stores.
    // The resulting code is the same length as the expansion, but it doesn't