From 512208b498d27e885cd9164bed516eeb910a4933 Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Tue, 12 Nov 2024 19:07:53 +0800 Subject: [PATCH] [LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (#115803) Inspired by https://github.com/llvm/llvm-project/issues/101624. --- .../LoongArch/LoongArchISelLowering.cpp | 10 +++---- .../Target/LoongArch/LoongArchISelLowering.h | 1 + .../LoongArch/LoongArchLASXInstrInfo.td | 9 ++++++ .../Target/LoongArch/LoongArchLSXInstrInfo.td | 12 ++++++++ .../lasx/intrinsic-repl-ins-gr2vr.ll | 12 ++------ .../LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll | 30 +++---------------- 6 files changed, 33 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index afc016c84162..4d77912b9ed5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, case Intrinsic::loongarch_lasx_xvreplgr2vr_b: case Intrinsic::loongarch_lasx_xvreplgr2vr_h: case Intrinsic::loongarch_lasx_xvreplgr2vr_w: - case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { - EVT ResTy = N->getValueType(0); - SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); - return DAG.getBuildVector(ResTy, DL, Ops); - } + case Intrinsic::loongarch_lasx_xvreplgr2vr_d: + return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0), + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), + N->getOperand(1))); case Intrinsic::loongarch_lsx_vreplve_b: case Intrinsic::loongarch_lsx_vreplve_h: case Intrinsic::loongarch_lsx_vreplve_w: @@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VILVH) NODE_NAME_CASE(VSHUF4I) NODE_NAME_CASE(VREPLVEI) + NODE_NAME_CASE(VREPLGR2VR) NODE_NAME_CASE(XVPERMI) NODE_NAME_CASE(VPICK_SEXT_ELT) NODE_NAME_CASE(VPICK_ZEXT_ELT) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index df6a55a2b831..c10acc043c50 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -129,6 +129,7 @@ enum NodeType : unsigned { VILVH, VSHUF4I, VREPLVEI, + VREPLGR2VR, XVPERMI, // Extended vector element extraction diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 3e39e2c10a61..49ae440073f2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; +def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>; +def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>; +def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>; +def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>; + // XVREPLVE_{B/H/W/D} def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 25e70b4e6b35..250896cbbe5f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>; +def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>; def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>; @@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>; def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>; def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>; +def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>; + def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>; def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>; @@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; +def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v16i8 (VREPLGR2VR_B GRLenVT:$rj))>; +def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v8i16 (VREPLGR2VR_H GRLenVT:$rj))>; +def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v4i32 (VREPLGR2VR_W GRLenVT:$rj))>; +def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)), + (v2i64 (VREPLGR2VR_D GRLenVT:$rj))>; + // VREPLVE_{B/H/W/D} def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll index b3dcd373b60e..2e538ed66b25 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll @@ -4,14 +4,8 @@ define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) { ; CHECK-LABEL: xvrepl_ins_w: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 ; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6 -; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7 ; CHECK-NEXT: ret entry: %0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) @@ -22,10 +16,8 @@ entry: define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) { ; CHECK-LABEL: xvrepl_ins_d: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0 +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 ; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2 -; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3 ; CHECK-NEXT: ret entry: %0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll index 3eb061490104..aee749294682 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll @@ -4,22 +4,8 @@ define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) { ; CHECK-LABEL: vrepl_ins_b: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 ; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14 -; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 ; CHECK-NEXT: ret entry: %0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) @@ -30,14 +16,8 @@ entry: define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) { ; CHECK-LABEL: vrepl_ins_h: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 ; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6 -; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 ; CHECK-NEXT: ret entry: %0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) @@ -48,10 +28,8 @@ entry: define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) { ; CHECK-LABEL: vrepl_ins_w: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 ; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2 -; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 ; CHECK-NEXT: ret entry: %0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) @@ -62,7 +40,7 @@ entry: define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) { ; CHECK-LABEL: vrepl_ins_d: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0 +; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 ; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 ; CHECK-NEXT: ret entry: