[LoongArch] Optimize vector bitreverse using scalar bitrev and vshuf4i (#118054)
Custom lower vector type bitreverse to scalar bitrev and vshuf4i instructions. Keep `v2i64` and `v4i64` bitreverse `Expand`, it's good enough.
This commit is contained in:
parent
41ed16c3b3
commit
953838dcea
@ -270,6 +270,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
|
|||||||
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
|
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
|
||||||
Expand);
|
Expand);
|
||||||
}
|
}
|
||||||
|
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
|
||||||
|
setOperationAction(ISD::BITREVERSE, VT, Custom);
|
||||||
for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
|
for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
|
||||||
setOperationAction(ISD::BSWAP, VT, Legal);
|
setOperationAction(ISD::BSWAP, VT, Legal);
|
||||||
for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
|
for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
|
||||||
@ -324,6 +326,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
|
|||||||
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
|
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
|
||||||
Expand);
|
Expand);
|
||||||
}
|
}
|
||||||
|
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
|
||||||
|
setOperationAction(ISD::BITREVERSE, VT, Custom);
|
||||||
for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
|
for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
|
||||||
setOperationAction(ISD::BSWAP, VT, Legal);
|
setOperationAction(ISD::BSWAP, VT, Legal);
|
||||||
for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
|
for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
|
||||||
@ -440,10 +444,56 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
|
|||||||
return lowerBUILD_VECTOR(Op, DAG);
|
return lowerBUILD_VECTOR(Op, DAG);
|
||||||
case ISD::VECTOR_SHUFFLE:
|
case ISD::VECTOR_SHUFFLE:
|
||||||
return lowerVECTOR_SHUFFLE(Op, DAG);
|
return lowerVECTOR_SHUFFLE(Op, DAG);
|
||||||
|
case ISD::BITREVERSE:
|
||||||
|
return lowerBITREVERSE(Op, DAG);
|
||||||
}
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
EVT ResTy = Op->getValueType(0);
|
||||||
|
SDValue Src = Op->getOperand(0);
|
||||||
|
SDLoc DL(Op);
|
||||||
|
|
||||||
|
EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
|
||||||
|
unsigned int OrigEltNum = ResTy.getVectorNumElements();
|
||||||
|
unsigned int NewEltNum = NewVT.getVectorNumElements();
|
||||||
|
|
||||||
|
SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
|
||||||
|
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
for (unsigned int i = 0; i < NewEltNum; i++) {
|
||||||
|
SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
|
||||||
|
DAG.getConstant(i, DL, MVT::i64));
|
||||||
|
SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
|
||||||
|
? LoongArchISD::BITREV_8B
|
||||||
|
: ISD::BITREVERSE,
|
||||||
|
DL, MVT::i64, Op);
|
||||||
|
Ops.push_back(RevOp);
|
||||||
|
}
|
||||||
|
SDValue Res =
|
||||||
|
DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
|
||||||
|
|
||||||
|
switch (ResTy.getSimpleVT().SimpleTy) {
|
||||||
|
default:
|
||||||
|
return SDValue();
|
||||||
|
case MVT::v16i8:
|
||||||
|
case MVT::v32i8:
|
||||||
|
return Res;
|
||||||
|
case MVT::v8i16:
|
||||||
|
case MVT::v16i16:
|
||||||
|
case MVT::v4i32:
|
||||||
|
case MVT::v8i32: {
|
||||||
|
SmallVector<int, 32> Mask;
|
||||||
|
for (unsigned int i = 0; i < NewEltNum; i++)
|
||||||
|
for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
|
||||||
|
Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
|
||||||
|
return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Determine whether a range fits a regular pattern of values.
|
/// Determine whether a range fits a regular pattern of values.
|
||||||
/// This function accounts for the possibility of jumping over the End iterator.
|
/// This function accounts for the possibility of jumping over the End iterator.
|
||||||
template <typename ValType>
|
template <typename ValType>
|
||||||
@ -4685,6 +4735,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
NODE_NAME_CASE(REVB_2H)
|
NODE_NAME_CASE(REVB_2H)
|
||||||
NODE_NAME_CASE(REVB_2W)
|
NODE_NAME_CASE(REVB_2W)
|
||||||
NODE_NAME_CASE(BITREV_4B)
|
NODE_NAME_CASE(BITREV_4B)
|
||||||
|
NODE_NAME_CASE(BITREV_8B)
|
||||||
NODE_NAME_CASE(BITREV_W)
|
NODE_NAME_CASE(BITREV_W)
|
||||||
NODE_NAME_CASE(ROTR_W)
|
NODE_NAME_CASE(ROTR_W)
|
||||||
NODE_NAME_CASE(ROTL_W)
|
NODE_NAME_CASE(ROTL_W)
|
||||||
|
@ -68,6 +68,7 @@ enum NodeType : unsigned {
|
|||||||
REVB_2H,
|
REVB_2H,
|
||||||
REVB_2W,
|
REVB_2W,
|
||||||
BITREV_4B,
|
BITREV_4B,
|
||||||
|
BITREV_8B,
|
||||||
BITREV_W,
|
BITREV_W,
|
||||||
|
|
||||||
// Intrinsic operations start ============================================
|
// Intrinsic operations start ============================================
|
||||||
@ -334,6 +335,7 @@ private:
|
|||||||
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
||||||
bool ForCodeSize) const override;
|
bool ForCodeSize) const override;
|
||||||
|
@ -112,6 +112,7 @@ def loongarch_bstrpick
|
|||||||
def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
|
def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
|
||||||
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;
|
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;
|
||||||
def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>;
|
def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>;
|
||||||
|
def loongarch_bitrev_8b : SDNode<"LoongArchISD::BITREV_8B", SDTUnaryOp>;
|
||||||
def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>;
|
def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>;
|
||||||
def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>;
|
def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>;
|
||||||
def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>;
|
def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>;
|
||||||
@ -1765,6 +1766,7 @@ def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_4B GPR:$rj)>;
|
|||||||
let Predicates = [IsLA64] in {
|
let Predicates = [IsLA64] in {
|
||||||
def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>;
|
def : Pat<(loongarch_revb_2w GPR:$rj), (REVB_2W GPR:$rj)>;
|
||||||
def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>;
|
def : Pat<(bswap GPR:$rj), (REVB_D GPR:$rj)>;
|
||||||
|
def : Pat<(loongarch_bitrev_8b GPR:$rj), (BITREV_8B GPR:$rj)>;
|
||||||
def : Pat<(loongarch_bitrev_w GPR:$rj), (BITREV_W GPR:$rj)>;
|
def : Pat<(loongarch_bitrev_w GPR:$rj), (BITREV_W GPR:$rj)>;
|
||||||
def : Pat<(bitreverse GPR:$rj), (BITREV_D GPR:$rj)>;
|
def : Pat<(bitreverse GPR:$rj), (BITREV_D GPR:$rj)>;
|
||||||
def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_8B GPR:$rj)>;
|
def : Pat<(bswap (bitreverse GPR:$rj)), (BITREV_8B GPR:$rj)>;
|
||||||
|
@ -7,19 +7,19 @@ declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
|
|||||||
define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
|
define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v32i8:
|
; CHECK-LABEL: test_bitreverse_v32i8:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: xvslli.b $xr1, $xr0, 4
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
|
||||||
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 4
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
|
||||||
; CHECK-NEXT: xvandi.b $xr1, $xr0, 51
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
|
||||||
; CHECK-NEXT: xvslli.b $xr1, $xr1, 2
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 2
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
|
||||||
; CHECK-NEXT: xvandi.b $xr0, $xr0, 51
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: xvandi.b $xr1, $xr0, 85
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
|
||||||
; CHECK-NEXT: xvslli.b $xr1, $xr1, 1
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
|
||||||
; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: xvandi.b $xr0, $xr0, 85
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
|
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
|
%b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
|
||||||
ret <32 x i8> %b
|
ret <32 x i8> %b
|
||||||
@ -30,25 +30,19 @@ declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
|
|||||||
define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
|
define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v16i16:
|
; CHECK-LABEL: test_bitreverse_v16i16:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
|
||||||
; CHECK-NEXT: xvsrli.h $xr1, $xr0, 4
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 15
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvslli.h $xr0, $xr0, 4
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
|
||||||
; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 51
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvslli.h $xr0, $xr0, 2
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
; CHECK-NEXT: xvshuf4i.h $xr0, $xr1, 27
|
||||||
; CHECK-NEXT: xvsrli.h $xr1, $xr0, 1
|
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 85
|
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
|
||||||
; CHECK-NEXT: xvslli.h $xr0, $xr0, 1
|
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
%b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
|
||||||
ret <16 x i16> %b
|
ret <16 x i16> %b
|
||||||
@ -59,25 +53,19 @@ declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
|
|||||||
define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
|
define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v8i32:
|
; CHECK-LABEL: test_bitreverse_v8i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
|
||||||
; CHECK-NEXT: xvsrli.w $xr1, $xr0, 4
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 15
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 0
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvslli.w $xr0, $xr0, 4
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
|
||||||
; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 51
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 2
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: xvslli.w $xr0, $xr0, 2
|
; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 3
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
; CHECK-NEXT: xvshuf4i.w $xr0, $xr1, 177
|
||||||
; CHECK-NEXT: xvsrli.w $xr1, $xr0, 1
|
|
||||||
; CHECK-NEXT: xvrepli.b $xr2, 85
|
|
||||||
; CHECK-NEXT: xvand.v $xr1, $xr1, $xr2
|
|
||||||
; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
|
|
||||||
; CHECK-NEXT: xvslli.w $xr0, $xr0, 1
|
|
||||||
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
%b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
|
||||||
ret <8 x i32> %b
|
ret <8 x i32> %b
|
||||||
|
@ -7,19 +7,13 @@ declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
|
|||||||
define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
|
define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v16i8:
|
; CHECK-LABEL: test_bitreverse_v16i8:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vslli.b $vr1, $vr0, 4
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
|
||||||
; CHECK-NEXT: vsrli.b $vr0, $vr0, 4
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
|
||||||
; CHECK-NEXT: vandi.b $vr1, $vr0, 51
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
|
||||||
; CHECK-NEXT: vslli.b $vr1, $vr1, 2
|
; CHECK-NEXT: bitrev.8b $a0, $a0
|
||||||
; CHECK-NEXT: vsrli.b $vr0, $vr0, 2
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
|
||||||
; CHECK-NEXT: vandi.b $vr0, $vr0, 51
|
; CHECK-NEXT: vori.b $vr0, $vr1, 0
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
|
||||||
; CHECK-NEXT: vandi.b $vr1, $vr0, 85
|
|
||||||
; CHECK-NEXT: vslli.b $vr1, $vr1, 1
|
|
||||||
; CHECK-NEXT: vsrli.b $vr0, $vr0, 1
|
|
||||||
; CHECK-NEXT: vandi.b $vr0, $vr0, 85
|
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
%b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
|
||||||
ret <16 x i8> %b
|
ret <16 x i8> %b
|
||||||
@ -30,25 +24,13 @@ declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
|
|||||||
define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
|
define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v8i16:
|
; CHECK-LABEL: test_bitreverse_v8i16:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
|
||||||
; CHECK-NEXT: vsrli.h $vr1, $vr0, 4
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 15
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: vslli.h $vr0, $vr0, 4
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
; CHECK-NEXT: vshuf4i.h $vr0, $vr1, 27
|
||||||
; CHECK-NEXT: vsrli.h $vr1, $vr0, 2
|
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 51
|
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
|
||||||
; CHECK-NEXT: vslli.h $vr0, $vr0, 2
|
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
|
||||||
; CHECK-NEXT: vsrli.h $vr1, $vr0, 1
|
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 85
|
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
|
||||||
; CHECK-NEXT: vslli.h $vr0, $vr0, 1
|
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
|
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
|
||||||
ret <8 x i16> %b
|
ret <8 x i16> %b
|
||||||
@ -59,25 +41,13 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
|
|||||||
define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
|
define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
|
||||||
; CHECK-LABEL: test_bitreverse_v4i32:
|
; CHECK-LABEL: test_bitreverse_v4i32:
|
||||||
; CHECK: # %bb.0:
|
; CHECK: # %bb.0:
|
||||||
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
|
||||||
; CHECK-NEXT: vsrli.w $vr1, $vr0, 4
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 15
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
; CHECK-NEXT: bitrev.d $a0, $a0
|
||||||
; CHECK-NEXT: vslli.w $vr0, $vr0, 4
|
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
; CHECK-NEXT: vshuf4i.w $vr0, $vr1, 177
|
||||||
; CHECK-NEXT: vsrli.w $vr1, $vr0, 2
|
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 51
|
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
|
||||||
; CHECK-NEXT: vslli.w $vr0, $vr0, 2
|
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
|
||||||
; CHECK-NEXT: vsrli.w $vr1, $vr0, 1
|
|
||||||
; CHECK-NEXT: vrepli.b $vr2, 85
|
|
||||||
; CHECK-NEXT: vand.v $vr1, $vr1, $vr2
|
|
||||||
; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
|
|
||||||
; CHECK-NEXT: vslli.w $vr0, $vr0, 1
|
|
||||||
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
|
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
|
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
|
||||||
ret <4 x i32> %b
|
ret <4 x i32> %b
|
||||||
|
Loading…
x
Reference in New Issue
Block a user