[LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (#115803)
Inspired by https://github.com/llvm/llvm-project/issues/101624.
This commit is contained in:
parent
3c585bdd3c
commit
512208b498
@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
|
||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
|
||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
|
||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
|
||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
|
||||
EVT ResTy = N->getValueType(0);
|
||||
SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
|
||||
return DAG.getBuildVector(ResTy, DL, Ops);
|
||||
}
|
||||
case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
|
||||
return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
|
||||
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
|
||||
N->getOperand(1)));
|
||||
case Intrinsic::loongarch_lsx_vreplve_b:
|
||||
case Intrinsic::loongarch_lsx_vreplve_h:
|
||||
case Intrinsic::loongarch_lsx_vreplve_w:
|
||||
@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(VILVH)
|
||||
NODE_NAME_CASE(VSHUF4I)
|
||||
NODE_NAME_CASE(VREPLVEI)
|
||||
NODE_NAME_CASE(VREPLGR2VR)
|
||||
NODE_NAME_CASE(XVPERMI)
|
||||
NODE_NAME_CASE(VPICK_SEXT_ELT)
|
||||
NODE_NAME_CASE(VPICK_ZEXT_ELT)
|
||||
|
@ -129,6 +129,7 @@ enum NodeType : unsigned {
|
||||
VILVH,
|
||||
VSHUF4I,
|
||||
VREPLVEI,
|
||||
VREPLGR2VR,
|
||||
XVPERMI,
|
||||
|
||||
// Extended vector element extraction
|
||||
|
@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
|
||||
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
|
||||
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
|
||||
|
||||
def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
|
||||
def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
|
||||
def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
|
||||
def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
|
||||
|
||||
// XVREPLVE_{B/H/W/D}
|
||||
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
|
||||
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
|
||||
|
@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
|
||||
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
|
||||
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
|
||||
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||
|
||||
@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
|
||||
|
||||
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
|
||||
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
|
||||
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
|
||||
|
||||
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
|
||||
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
|
||||
|
||||
@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
|
||||
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
|
||||
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
|
||||
|
||||
def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
|
||||
def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
|
||||
def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
|
||||
def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
|
||||
(v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
|
||||
|
||||
// VREPLVE_{B/H/W/D}
|
||||
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
|
||||
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
|
||||
|
@ -4,14 +4,8 @@
|
||||
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: xvrepl_ins_w:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
|
||||
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
|
||||
@ -22,10 +16,8 @@ entry:
|
||||
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: xvrepl_ins_d:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
|
||||
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
|
||||
|
@ -4,22 +4,8 @@
|
||||
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: vrepl_ins_b:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14
|
||||
; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
|
||||
@ -30,14 +16,8 @@ entry:
|
||||
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: vrepl_ins_h:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6
|
||||
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
|
||||
@ -48,10 +28,8 @@ entry:
|
||||
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: vrepl_ins_w:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
|
||||
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
|
||||
@ -62,7 +40,7 @@ entry:
|
||||
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: vrepl_ins_d:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
|
||||
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
|
||||
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user