[LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (#115803)

Inspired by https://github.com/llvm/llvm-project/issues/101624.
This commit is contained in:
ZhaoQi 2024-11-12 19:07:53 +08:00 committed by GitHub
parent 3c585bdd3c
commit 512208b498
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 33 additions and 41 deletions

View File

@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
EVT ResTy = N->getValueType(0);
SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
return DAG.getBuildVector(ResTy, DL, Ops);
}
case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
N->getOperand(1)));
case Intrinsic::loongarch_lsx_vreplve_b:
case Intrinsic::loongarch_lsx_vreplve_h:
case Intrinsic::loongarch_lsx_vreplve_w:
@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VILVH)
NODE_NAME_CASE(VSHUF4I)
NODE_NAME_CASE(VREPLVEI)
NODE_NAME_CASE(VREPLGR2VR)
NODE_NAME_CASE(XVPERMI)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)

View File

@ -129,6 +129,7 @@ enum NodeType : unsigned {
VILVH,
VSHUF4I,
VREPLVEI,
VREPLGR2VR,
XVPERMI,
// Extended vector element extraction

View File

@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
// XVREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;

View File

@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
(v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
// VREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;

View File

@ -4,14 +4,8 @@
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: xvrepl_ins_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
@ -22,10 +16,8 @@ entry:
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: xvrepl_ins_d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)

View File

@ -4,22 +4,8 @@
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15
; CHECK-NEXT: ret
entry:
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
@ -30,14 +16,8 @@ entry:
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_h:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
@ -48,10 +28,8 @@ entry:
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_w:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
@ -62,7 +40,7 @@ entry:
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: vrepl_ins_d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
; CHECK-NEXT: ret
entry: