[LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (#115803)
Inspired by https://github.com/llvm/llvm-project/issues/101624.
This commit is contained in:
parent
3c585bdd3c
commit
512208b498
@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
|
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
|
||||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
|
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
|
||||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
|
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
|
||||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
|
case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
|
||||||
EVT ResTy = N->getValueType(0);
|
return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
|
||||||
SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
|
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
|
||||||
return DAG.getBuildVector(ResTy, DL, Ops);
|
N->getOperand(1)));
|
||||||
}
|
|
||||||
case Intrinsic::loongarch_lsx_vreplve_b:
|
case Intrinsic::loongarch_lsx_vreplve_b:
|
||||||
case Intrinsic::loongarch_lsx_vreplve_h:
|
case Intrinsic::loongarch_lsx_vreplve_h:
|
||||||
case Intrinsic::loongarch_lsx_vreplve_w:
|
case Intrinsic::loongarch_lsx_vreplve_w:
|
||||||
@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(VILVH)
|
NODE_NAME_CASE(VILVH)
|
||||||
NODE_NAME_CASE(VSHUF4I)
|
NODE_NAME_CASE(VSHUF4I)
|
||||||
NODE_NAME_CASE(VREPLVEI)
|
NODE_NAME_CASE(VREPLVEI)
|
||||||
|
NODE_NAME_CASE(VREPLGR2VR)
|
||||||
NODE_NAME_CASE(XVPERMI)
|
NODE_NAME_CASE(XVPERMI)
|
||||||
NODE_NAME_CASE(VPICK_SEXT_ELT)
|
NODE_NAME_CASE(VPICK_SEXT_ELT)
|
||||||
NODE_NAME_CASE(VPICK_ZEXT_ELT)
|
NODE_NAME_CASE(VPICK_ZEXT_ELT)
|
||||||
|
@ -129,6 +129,7 @@ enum NodeType : unsigned {
|
|||||||
VILVH,
|
VILVH,
|
||||||
VSHUF4I,
|
VSHUF4I,
|
||||||
VREPLVEI,
|
VREPLVEI,
|
||||||
|
VREPLGR2VR,
|
||||||
XVPERMI,
|
XVPERMI,
|
||||||
|
|
||||||
// Extended vector element extraction
|
// Extended vector element extraction
|
||||||
|
@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
|
|||||||
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
|
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
|
||||||
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
|
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
|
||||||
|
|
||||||
|
def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
|
||||||
|
|
||||||
// XVREPLVE_{B/H/W/D}
|
// XVREPLVE_{B/H/W/D}
|
||||||
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
|
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
|
||||||
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
|
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
|
||||||
|
@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
|
|||||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
||||||
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
|
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||||
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
|
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
|
||||||
|
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
|
||||||
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||||
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||||
|
|
||||||
@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
|
|||||||
|
|
||||||
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
|
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
|
||||||
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
|
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
|
||||||
|
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
|
||||||
|
|
||||||
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
|
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
|
||||||
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
|
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
|
||||||
|
|
||||||
@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
|
|||||||
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
|
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
|
||||||
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
|
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
|
||||||
|
|
||||||
|
def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
|
||||||
|
def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||||
|
(v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
|
||||||
|
|
||||||
// VREPLVE_{B/H/W/D}
|
// VREPLVE_{B/H/W/D}
|
||||||
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
|
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
|
||||||
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
|
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
|
||||||
|
@ -4,14 +4,8 @@
|
|||||||
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
|
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: xvrepl_ins_w:
|
; CHECK-LABEL: xvrepl_ins_w:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
|
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
|
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
|
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
|
||||||
@ -22,10 +16,8 @@ entry:
|
|||||||
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
|
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
|
||||||
; CHECK-LABEL: xvrepl_ins_d:
|
; CHECK-LABEL: xvrepl_ins_d:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
|
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
|
||||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
|
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
|
||||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
|
|
||||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
|
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
|
||||||
|
@ -4,22 +4,8 @@
|
|||||||
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
|
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: vrepl_ins_b:
|
; CHECK-LABEL: vrepl_ins_b:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
|
; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
|
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14
|
|
||||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
|
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
|
||||||
@ -30,14 +16,8 @@ entry:
|
|||||||
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
|
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: vrepl_ins_h:
|
; CHECK-LABEL: vrepl_ins_h:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
|
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2
|
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
|
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4
|
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5
|
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6
|
|
||||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
|
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
|
||||||
@ -48,10 +28,8 @@ entry:
|
|||||||
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
|
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
|
||||||
; CHECK-LABEL: vrepl_ins_w:
|
; CHECK-LABEL: vrepl_ins_w:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
|
||||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
|
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
|
||||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
|
|
||||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
|
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
|
||||||
@ -62,7 +40,7 @@ entry:
|
|||||||
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
|
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
|
||||||
; CHECK-LABEL: vrepl_ins_d:
|
; CHECK-LABEL: vrepl_ins_d:
|
||||||
; CHECK: # %bb.0: # %entry
|
; CHECK: # %bb.0: # %entry
|
||||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
|
||||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
|
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
entry:
|
entry:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user