[LoongArch] Optimize insertelement containing variable index using compare+select (#151131)
This commit is contained in:
parent
16d5db71b3
commit
ece7a72aa2
@ -2621,9 +2621,38 @@ LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
SDValue
|
||||
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
if (isa<ConstantSDNode>(Op->getOperand(2)))
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT EltVT = VT.getVectorElementType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
|
||||
SDLoc DL(Op);
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue Op2 = Op.getOperand(2);
|
||||
|
||||
if (isa<ConstantSDNode>(Op2))
|
||||
return Op;
|
||||
|
||||
MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
|
||||
MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
|
||||
|
||||
if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
|
||||
return SDValue();
|
||||
|
||||
SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
|
||||
SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
|
||||
|
||||
SmallVector<SDValue, 32> RawIndices;
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
|
||||
SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
|
||||
|
||||
// insert vec, elt, idx
|
||||
// =>
|
||||
// select (splatidx == {0,1,2...}) ? splatelt : vec
|
||||
SDValue SelectCC =
|
||||
DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
|
||||
return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
|
||||
}
|
||||
|
||||
SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
|
||||
|
@ -114,22 +114,15 @@ define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
|
||||
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_32xi8_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0
|
||||
; CHECK-NEXT: st.b $a2, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
|
||||
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI8_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
|
||||
; CHECK-NEXT: xvseq.b $xr0, $xr2, $xr0
|
||||
; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <32 x i8>, ptr %src
|
||||
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
|
||||
@ -140,22 +133,15 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
|
||||
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_16xi16_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1
|
||||
; CHECK-NEXT: st.h $a2, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
|
||||
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI9_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
|
||||
; CHECK-NEXT: xvseq.h $xr0, $xr2, $xr0
|
||||
; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <16 x i16>, ptr %src
|
||||
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
|
||||
@ -166,22 +152,15 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
|
||||
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_8xi32_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2
|
||||
; CHECK-NEXT: st.w $a2, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI10_0)
|
||||
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI10_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
|
||||
; CHECK-NEXT: xvseq.w $xr0, $xr2, $xr0
|
||||
; CHECK-NEXT: xvreplgr2vr.w $xr2, $a2
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <8 x i32>, ptr %src
|
||||
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
|
||||
@ -192,22 +171,15 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
|
||||
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_4xi64_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr0, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr0, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3
|
||||
; CHECK-NEXT: st.d $a2, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI11_0)
|
||||
; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI11_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a0
|
||||
; CHECK-NEXT: xvseq.d $xr0, $xr2, $xr0
|
||||
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <4 x i64>, ptr %src
|
||||
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
|
||||
@ -218,22 +190,16 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
|
||||
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_8xfloat_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr1, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2
|
||||
; CHECK-NEXT: fst.s $fa0, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
|
||||
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI12_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI12_0)
|
||||
; CHECK-NEXT: xvld $xr2, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.w $xr3, $a0
|
||||
; CHECK-NEXT: xvseq.w $xr1, $xr3, $xr1
|
||||
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <8 x float>, ptr %src
|
||||
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
|
||||
@ -244,22 +210,16 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
|
||||
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_4xdouble_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -96
|
||||
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 96
|
||||
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
|
||||
; CHECK-NEXT: xvld $xr1, $a0, 0
|
||||
; CHECK-NEXT: xvst $xr1, $sp, 32
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 32
|
||||
; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3
|
||||
; CHECK-NEXT: fst.d $fa0, $a0, 0
|
||||
; CHECK-NEXT: xvld $xr0, $sp, 32
|
||||
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
|
||||
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI13_0)
|
||||
; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI13_0)
|
||||
; CHECK-NEXT: xvld $xr2, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
|
||||
; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
|
||||
; CHECK-NEXT: xvseq.d $xr1, $xr3, $xr1
|
||||
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
|
||||
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
|
||||
; CHECK-NEXT: xvst $xr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $fp, -96
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 96
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <4 x double>, ptr %src
|
||||
%v_new = insertelement <4 x double> %v, double %in, i32 %idx
|
||||
|
@ -84,15 +84,15 @@ define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
|
||||
define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_16xi8_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vst $vr0, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0
|
||||
; CHECK-NEXT: st.b $a2, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
|
||||
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.b $vr2, $a0
|
||||
; CHECK-NEXT: vseq.b $vr0, $vr2, $vr0
|
||||
; CHECK-NEXT: vreplgr2vr.b $vr2, $a2
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <16 x i8>, ptr %src
|
||||
%v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx
|
||||
@ -103,15 +103,15 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
|
||||
define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_8xi16_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vst $vr0, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1
|
||||
; CHECK-NEXT: st.h $a2, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
|
||||
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
|
||||
; CHECK-NEXT: vseq.h $vr0, $vr2, $vr0
|
||||
; CHECK-NEXT: vreplgr2vr.h $vr2, $a2
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <8 x i16>, ptr %src
|
||||
%v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx
|
||||
@ -122,15 +122,15 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
|
||||
define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_4xi32_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vst $vr0, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2
|
||||
; CHECK-NEXT: st.w $a2, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
|
||||
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
|
||||
; CHECK-NEXT: vseq.w $vr0, $vr2, $vr0
|
||||
; CHECK-NEXT: vreplgr2vr.w $vr2, $a2
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <4 x i32>, ptr %src
|
||||
%v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx
|
||||
@ -141,15 +141,15 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
|
||||
define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_2xi64_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr0, $a0, 0
|
||||
; CHECK-NEXT: vst $vr0, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3
|
||||
; CHECK-NEXT: st.d $a2, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
|
||||
; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.d $vr2, $a0
|
||||
; CHECK-NEXT: vseq.d $vr0, $vr2, $vr0
|
||||
; CHECK-NEXT: vreplgr2vr.d $vr2, $a2
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <2 x i64>, ptr %src
|
||||
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx
|
||||
@ -160,15 +160,16 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
|
||||
define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_4xfloat_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: vst $vr1, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2
|
||||
; CHECK-NEXT: fst.s $fa0, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
|
||||
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
|
||||
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
|
||||
; CHECK-NEXT: vld $vr2, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.w $vr3, $a0
|
||||
; CHECK-NEXT: vseq.w $vr1, $vr3, $vr1
|
||||
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <4 x float>, ptr %src
|
||||
%v_new = insertelement <4 x float> %v, float %ins, i32 %idx
|
||||
@ -179,15 +180,16 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
|
||||
define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind {
|
||||
; CHECK-LABEL: insert_2xdouble_idx:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: vld $vr1, $a0, 0
|
||||
; CHECK-NEXT: vst $vr1, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3
|
||||
; CHECK-NEXT: fst.d $fa0, $a0, 0
|
||||
; CHECK-NEXT: vld $vr0, $sp, 0
|
||||
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
|
||||
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
|
||||
; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
|
||||
; CHECK-NEXT: vld $vr2, $a0, 0
|
||||
; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
|
||||
; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
|
||||
; CHECK-NEXT: vseq.d $vr1, $vr3, $vr1
|
||||
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
|
||||
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
|
||||
; CHECK-NEXT: vst $vr0, $a1, 0
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%v = load volatile <2 x double>, ptr %src
|
||||
%v_new = insertelement <2 x double> %v, double %ins, i32 %idx
|
||||
|
Loading…
x
Reference in New Issue
Block a user