diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 329d9d13ebdd..11fa36becaad 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -501,6 +501,17 @@ undefined. %2:_(s33) = G_CTLZ_ZERO_UNDEF %1 %2:_(s33) = G_CTTZ_ZERO_UNDEF %1 +G_CTLS +^^^^^^ + +Count leading redundant sign bits. If the value is positive then the result is +the number of extra leading zeros. If the value is negative then the result is +the number of extra leading ones. + +.. code-block:: none + + %2:_(s32) = G_CTLS %1 + G_ABDS, G_ABDU ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 9de1a643f100..5d4347066a40 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -1046,6 +1046,10 @@ public: bool matchRedundantSextInReg(MachineInstr &Root, MachineInstr &Other, BuildFnTy &MatchInfo) const; + // (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1) or + // (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x) + bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const; + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 004c7e6fa0b5..c04f917f1e7e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -476,6 +476,8 @@ public: LLT Ty); LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, + LLT Ty); LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 5f3f1d386569..30bf7e3a4a3d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -2063,6 +2063,11 @@ public: return buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, {Dst}, {Src0}); } + /// Build and insert \p Res = G_CTLS \p Op0, \p Src0 + MachineInstrBuilder buildCTLS(const DstOp &Dst, const SrcOp &Src0) { + return buildInstr(TargetOpcode::G_CTLS, {Dst}, {Src0}); + } + /// Build and insert \p Dst = G_BSWAP \p Src0 MachineInstrBuilder buildBSwap(const DstOp &Dst, const SrcOp &Src0) { return buildInstr(TargetOpcode::G_BSWAP, {Dst}, {Src0}); diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 0d92f50a09d3..3217ffafc235 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -839,6 +839,9 @@ HANDLE_TARGET_OPCODE(G_CTLZ) /// Same as above, undefined for zero inputs. HANDLE_TARGET_OPCODE(G_CTLZ_ZERO_UNDEF) +/// Generic count extra sign bits. +HANDLE_TARGET_OPCODE(G_CTLS) + /// Generic count bits. HANDLE_TARGET_OPCODE(G_CTPOP) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 1b65b8b73527..b785847b53f0 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -214,6 +214,12 @@ def G_CTTZ_ZERO_UNDEF : GenericInstruction { let hasSideEffects = false; } +def G_CTLS : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = false; +} + def G_CTPOP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 0ab2d9487a29..a9b4932b2e31 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -2004,6 +2004,22 @@ class narrow_binop_opcode : GICombineRule < [{ return Helper.matchNarrowBinop(*${Trunc}, *${Binop}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${Trunc}, ${matchinfo}); }])>; +// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1). +// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x) +class ctlz_to_ctls_op : GICombineRule < + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (ctlzOpcode $dst, $src):$root, + [{ return Helper.matchCtls(*${root}, ${matchinfo}); }]), + (apply [{Helper.applyBuildFn(*${root}, ${matchinfo});}])>; + +def ctlz_to_ctls : ctlz_to_ctls_op; +def ctlz_zero_undef_to_ctls : ctlz_to_ctls_op; + +def ctls_combines : GICombineGroup<[ + ctlz_to_ctls, + ctlz_zero_undef_to_ctls, +]>; + def narrow_binop_add : narrow_binop_opcode; def narrow_binop_sub : narrow_binop_opcode; def narrow_binop_mul : narrow_binop_opcode; @@ -2166,7 +2182,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, simplify_neg_minmax, combine_concat_vector, sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines, combine_use_vector_truncate, merge_combines, overflow_combines, - truncsat_combines, lshr_of_trunc_of_lshr]>; + truncsat_combines, lshr_of_trunc_of_lshr, ctls_combines]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index a69e08977931..4b0c89b780c6 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -154,6 +154,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index c58969144dc2..85d5f06b9813 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/ErrorHandling.h" @@ -8512,3 +8513,69 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI, return false; } + +// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1). +// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x) +bool CombinerHelper::matchCtls(MachineInstr &CtlzMI, + BuildFnTy &MatchInfo) const { + assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ || + CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) && + "Expected G_CTLZ variant"); + + const Register Dst = CtlzMI.getOperand(0).getReg(); + Register Src = CtlzMI.getOperand(1).getReg(); + + LLT Ty = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (!(Ty.isValid() && Ty.isScalar())) + return false; + + if (!LI) + return false; + + SmallVector QueryTypes = {Ty, SrcTy}; + LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes); + + switch (LI->getAction(Query).Action) { + default: + return false; + case LegalizeActions::Legal: + case LegalizeActions::Custom: + case LegalizeActions::WidenScalar: + break; + } + + // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False + Register V; + bool NeedAdd = true; + if (mi_match(Src, MRI, + m_OneUse(m_GOr(m_OneUse(m_GShl(m_Reg(V), m_SpecificICst(1))), + m_SpecificICst(1))))) { + NeedAdd = false; + Src = V; + } + + unsigned BitWidth = Ty.getScalarSizeInBits(); + + Register X; + if (!mi_match(Src, MRI, + m_OneUse(m_GXor(m_Reg(X), m_OneUse(m_GAShr( + m_DeferredReg(X), + m_SpecificICst(BitWidth - 1))))))) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + if (!NeedAdd) { + B.buildCTLS(Dst, X); + return; + } + + auto Ctls = B.buildCTLS(Ty, X); + auto One = B.buildConstant(Ty, 1); + + B.buildAdd(Dst, Ctls, One); + }; + + return true; +} diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 783be7a9b0ca..7e6d31b126ed 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1754,6 +1754,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTLS: case TargetOpcode::G_CTPOP: if (TypeIdx == 1) switch (MI.getOpcode()) { @@ -1765,6 +1766,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); case TargetOpcode::G_CTPOP: return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTLS: + return narrowScalarCTLS(MI, TypeIdx, NarrowTy); default: return UnableToLegalize; } @@ -2792,6 +2795,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTLS: case TargetOpcode::G_CTPOP: { if (TypeIdx == 0) { Observer.changingInstr(MI); @@ -2803,10 +2807,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Register SrcReg = MI.getOperand(1).getReg(); // First extend the input. - unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ || - Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF - ? TargetOpcode::G_ANYEXT - : TargetOpcode::G_ZEXT; + unsigned ExtOpc; + switch (Opcode) { + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + ExtOpc = TargetOpcode::G_ANYEXT; + break; + case TargetOpcode::G_CTLS: + ExtOpc = TargetOpcode::G_SEXT; + break; + default: + ExtOpc = TargetOpcode::G_ZEXT; + } + auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg}); LLT CurTy = MRI.getType(SrcReg); unsigned NewOpc = Opcode; @@ -2836,7 +2849,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs - if (Opcode == TargetOpcode::G_CTLZ) { + if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) { // The correct result is NewOp - (Difference in widety and current ty). MIBNewOp = MIRBuilder.buildSub( WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); @@ -4649,6 +4662,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: + case TargetOpcode::G_CTLS: return lowerBitCount(MI); case G_UADDO: { auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs(); @@ -7540,6 +7554,52 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize) + return UnableToLegalize; + + MachineIRBuilder &B = MIRBuilder; + + auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); + Register Lo = UnmergeSrc.getReg(0); + Register Hi = UnmergeSrc.getReg(1); + + auto ShAmt = B.buildConstant(NarrowTy, NarrowSize - 1); + auto Sign = B.buildAShr(NarrowTy, Hi, ShAmt); + + auto LoSign = B.buildAShr(NarrowTy, Lo, ShAmt); + auto LoSameSign = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + LoSign.getReg(0), Sign.getReg(0)); + + auto HiIsSign = + B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), Hi, Sign.getReg(0)); + + auto LoCTLS = B.buildCTLS(DstTy, Lo); + auto GNarrowSize = B.buildConstant(DstTy, NarrowSize); + auto HiIsSignCTLS = B.buildAdd(DstTy, LoCTLS, GNarrowSize); + + // If the low half flips sign, the run of redundant bits stops at the + // boundary, so use (NarrowSize - 1) instead of extending into Lo. + auto GNarrowSizeMinus1 = B.buildConstant(DstTy, NarrowSize - 1); + auto HiSignResult = + B.buildSelect(DstTy, LoSameSign, HiIsSignCTLS, GNarrowSizeMinus1); + + auto HiCTLS = B.buildCTLS(DstTy, Hi); + + B.buildSelect(DstReg, HiIsSign, HiSignResult, HiCTLS); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { @@ -7762,6 +7822,23 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_CTLS: { + auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + + // ctls(x) -> ctlz(x ^ (x >> (N - 1))) - 1 + auto SignIdxC = + MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1); + auto OneC = MIRBuilder.buildConstant(DstTy, 1); + + auto Shr = MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC); + + auto Xor = MIRBuilder.buildXor(SrcTy, SrcReg, Shr); + auto Ctlz = MIRBuilder.buildCTLZ(DstTy, Xor); + + MIRBuilder.buildSub(DstReg, Ctlz, OneC); + MI.eraseFromParent(); + return Legalized; + } } } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 35e89cae9e92..658774ec3fcb 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1877,6 +1877,7 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI, return true; case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTLS: case TargetOpcode::G_ABS: case TargetOpcode::G_CTPOP: case TargetOpcode::G_BSWAP: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 2d80212aa2a6..013bc11d7b03 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3308,13 +3308,6 @@ def : Pat<(cttz GPR32:$Rn), (CLZWr (RBITWr GPR32:$Rn))>; def : Pat<(cttz GPR64:$Rn), (CLZXr (RBITXr GPR64:$Rn))>; -// FIXME: Remove these patterns when gisel supports ctls -def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), - (i32 1))), - (CLSWr GPR32:$Rn)>; -def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), - (i64 1))), - (CLSXr GPR64:$Rn)>; // Unlike the other one operand instructions, the instructions with the "rev" // mnemonic do *not* just different in the size bit, but actually use different diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e067489283b2..131c72a24964 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -339,7 +339,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .moreElementsToNextPow2(0) .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); - getActionDefinitionsBuilder(G_CTLZ) + getActionDefinitionsBuilder({G_CTLZ, G_CTLS}) .legalFor({{s32, s32}, {s64, s64}, {v8s8, v8s8}, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index ae43fccf8e81..7766a4540f2d 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -233,6 +233,17 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) } CountZerosUndefActions.lower(); + auto &CountSignActions = getActionDefinitionsBuilder(G_CTLS); + if (ST.hasStdExtP()) { + CountSignActions.legalFor({{sXLen, sXLen}}) + .customFor({{s32, s32}}) + .clampScalar(0, s32, sXLen) + .widenScalarToNextPow2(0) + .scalarSameSizeAs(1, 0); + } else { + CountSignActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower(); + } + auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP); if (ST.hasStdExtZbb()) { CTPOPActions.legalFor({{sXLen, sXLen}}) @@ -1385,6 +1396,8 @@ static unsigned getRISCVWOpcode(unsigned Opcode) { return RISCV::G_CLZW; case TargetOpcode::G_CTTZ: return RISCV::G_CTZW; + case TargetOpcode::G_CTLS: + return RISCV::G_CLSW; case TargetOpcode::G_FPTOSI: return RISCV::G_FCVT_W_RV64; case TargetOpcode::G_FPTOUI: @@ -1497,7 +1510,8 @@ bool RISCVLegalizerInfo::legalizeCustom( return true; } case TargetOpcode::G_CTLZ: - case TargetOpcode::G_CTTZ: { + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTLS: { Helper.Observer.changingInstr(MI); Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT); Helper.widenScalarDst(MI, sXLen); diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td index bd3ecd737bed..5da2ca8d7c7b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td +++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -178,3 +178,10 @@ def G_VSLIDEUP_VL : RISCVGenericInstruction { } def : GINodeEquiv; +// Pseudo equivalent to RISCVISD::CLSW +def G_CLSW : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; +} +def : GINodeEquiv; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 8a0071c9ea5c..655e793f55c5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -746,6 +746,10 @@ # DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_CTLS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll index dc4983aa7386..4597c6178e2b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-clrsb.ll +++ b/llvm/test/CodeGen/AArch64/arm64-clrsb.ll @@ -22,18 +22,11 @@ entry: } define i32 @clrsb32_2(i32 %x) #2 { -; CHECK-SD-LABEL: clrsb32_2: -; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: cls w8, w0 -; CHECK-SD-NEXT: add w0, w8, #2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: clrsb32_2: -; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: eor w8, w0, w0, asr #31 -; CHECK-GI-NEXT: clz w8, w8 -; CHECK-GI-NEXT: add w0, w8, #1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: clrsb32_2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls w8, w0 +; CHECK-NEXT: add w0, w8, #2 +; CHECK-NEXT: ret entry: %shr = ashr i32 %x, 31 %xor = xor i32 %shr, %x @@ -57,18 +50,11 @@ entry: } define i64 @clrsb64_2(i64 %x) #3 { -; CHECK-SD-LABEL: clrsb64_2: -; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: cls x8, x0 -; CHECK-SD-NEXT: add x0, x8, #2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: clrsb64_2: -; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: eor x8, x0, x0, asr #63 -; CHECK-GI-NEXT: clz x8, x8 -; CHECK-GI-NEXT: add x0, x8, #1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: clrsb64_2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cls x8, x0 +; CHECK-NEXT: add x0, x8, #2 +; CHECK-NEXT: ret entry: %shr = ashr i64 %x, 63 %xor = xor i64 %shr, %x @@ -105,3 +91,6 @@ entry: ret i64 %0 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt index 62e07445ad12..15d4589580b7 100644 --- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt @@ -369,6 +369,7 @@ Key: G_CONCAT_VECTORS: [ 0.00 0.00 ] Key: G_CONSTANT: [ 0.00 0.00 ] Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ] Key: G_CONSTANT_POOL: [ 0.00 0.00 ] +Key: G_CTLS: [ 0.00 0.00 ] Key: G_CTLZ: [ 0.00 0.00 ] Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ] Key: G_CTPOP: [ 0.00 0.00 ] diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt index 03a3fafc6b80..3cc389021656 100644 --- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt +++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt @@ -369,6 +369,7 @@ Key: G_CONCAT_VECTORS: [ 0.00 0.00 ] Key: G_CONSTANT: [ 0.00 0.00 ] Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ] Key: G_CONSTANT_POOL: [ 0.00 0.00 ] +Key: G_CTLS: [ 0.00 0.00 ] Key: G_CTLZ: [ 0.00 0.00 ] Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ] Key: G_CTPOP: [ 0.00 0.00 ] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index a823a4ccf3e4..43ef36da6a11 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -736,6 +736,9 @@ # DEBUG-NEXT: G_CTLZ_ZERO_UNDEF (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_CTLS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_CTPOP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir new file mode 100644 index 000000000000..924415f85b09 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir @@ -0,0 +1,431 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=riscv32 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV32I +# RUN: llc -mtriple=riscv32 -mattr=+experimental-p -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV32P +# RUN: llc -mtriple=riscv32 -mattr=+zbb -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV32ZBB + +--- +name: cls_i8 +body: | + bb.1: + liveins: $x10 + + ; RV32I-LABEL: name: cls_i8 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; RV32I-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) + ; RV32I-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) + ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]] + ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; RV32I-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C3]] + ; RV32I-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; RV32I-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[LSHR]] + ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32I-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] + ; RV32I-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C4]](s32) + ; RV32I-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[LSHR1]] + ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; RV32I-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] + ; RV32I-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) + ; RV32I-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[LSHR2]] + ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C3]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C1]](s32) + ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 85 + ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C6]] + ; RV32I-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[OR2]], [[AND4]] + ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C3]] + ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C4]](s32) + ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C7]] + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C7]] + ; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND6]], [[AND7]] + ; RV32I-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C5]](s32) + ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR5]], [[ADD]] + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C8]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: $x10 = COPY [[AND8]](s32) + ; RV32I-NEXT: $x11 = COPY [[C1]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[C9]](s32) + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR6]] + ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] + ; RV32I-NEXT: $x10 = COPY [[SUB2]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32P-LABEL: name: cls_i8 + ; RV32P: liveins: $x10 + ; RV32P-NEXT: {{ $}} + ; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32P-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[ASHR]](s32) + ; RV32P-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLS]], [[C]] + ; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; RV32P-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32P-NEXT: PseudoRET implicit $x10 + ; + ; RV32ZBB-LABEL: name: cls_i8 + ; RV32ZBB: liveins: $x10 + ; RV32ZBB-NEXT: {{ $}} + ; RV32ZBB-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32ZBB-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; RV32ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]] + ; RV32ZBB-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; RV32ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C2]] + ; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C3]] + ; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] + ; RV32ZBB-NEXT: $x10 = COPY [[SUB1]](s32) + ; RV32ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s32) + %2:_(s8) = G_CTLS %0(s8) + %3:_(s32) = G_ANYEXT %2(s8) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... + +--- +name: cls_i16 +body: | + bb.1: + liveins: $x10 + + ; RV32I-LABEL: name: cls_i16 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; RV32I-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C2]](s32) + ; RV32I-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[C]](s32) + ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]] + ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; RV32I-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C3]] + ; RV32I-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[C1]](s32) + ; RV32I-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[LSHR]] + ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32I-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C3]] + ; RV32I-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[C4]](s32) + ; RV32I-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[LSHR1]] + ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; RV32I-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C3]] + ; RV32I-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C5]](s32) + ; RV32I-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[LSHR2]] + ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C3]] + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[C6]](s32) + ; RV32I-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[LSHR3]] + ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C3]] + ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[C1]](s32) + ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 21845 + ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C7]] + ; RV32I-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[OR3]], [[AND5]] + ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C3]] + ; RV32I-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[C4]](s32) + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 13107 + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C8]] + ; RV32I-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C8]] + ; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND7]], [[AND8]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C5]](s32) + ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR6]], [[ADD]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 3855 + ; RV32I-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C9]] + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 + ; RV32I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: $x10 = COPY [[AND9]](s32) + ; RV32I-NEXT: $x11 = COPY [[C10]](s32) + ; RV32I-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV32I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[C6]](s32) + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[LSHR7]] + ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] + ; RV32I-NEXT: $x10 = COPY [[SUB2]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32P-LABEL: name: cls_i16 + ; RV32P: liveins: $x10 + ; RV32P-NEXT: {{ $}} + ; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; RV32P-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[ASHR]](s32) + ; RV32P-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLS]], [[C]] + ; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; RV32P-NEXT: $x10 = COPY [[COPY1]](s32) + ; RV32P-NEXT: PseudoRET implicit $x10 + ; + ; RV32ZBB-LABEL: name: cls_i16 + ; RV32ZBB: liveins: $x10 + ; RV32ZBB-NEXT: {{ $}} + ; RV32ZBB-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32ZBB-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; RV32ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]] + ; RV32ZBB-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; RV32ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C2]] + ; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C3]] + ; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) + ; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]] + ; RV32ZBB-NEXT: $x10 = COPY [[SUB1]](s32) + ; RV32ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s32) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s32) + %2:_(s16) = G_CTLS %0(s16) + %3:_(s32) = G_ANYEXT %2(s16) + $x10 = COPY %3(s32) + PseudoRET implicit $x10 +... + +--- +name: cls_i32 +body: | + bb.1: + liveins: $x10 + + ; RV32I-LABEL: name: cls_i32 + ; RV32I: liveins: $x10 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]] + ; RV32I-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[XOR]], [[C1]](s32) + ; RV32I-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[LSHR]] + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32I-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; RV32I-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[LSHR1]] + ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; RV32I-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) + ; RV32I-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[LSHR2]] + ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) + ; RV32I-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[LSHR3]] + ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C5]](s32) + ; RV32I-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[LSHR4]] + ; RV32I-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[C1]](s32) + ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1431655765 + ; RV32I-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C6]] + ; RV32I-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[OR4]], [[AND]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C2]](s32) + ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 858993459 + ; RV32I-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C7]] + ; RV32I-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C7]] + ; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C3]](s32) + ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[ADD]] + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 + ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C8]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[SHL]] + ; RV32I-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C5]](s32) + ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[SHL1]] + ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C9]](s32) + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR8]] + ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] + ; RV32I-NEXT: $x10 = COPY [[SUB2]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10 + ; + ; RV32P-LABEL: name: cls_i32 + ; RV32P: liveins: $x10 + ; RV32P-NEXT: {{ $}} + ; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[COPY]](s32) + ; RV32P-NEXT: $x10 = COPY [[CTLS]](s32) + ; RV32P-NEXT: PseudoRET implicit $x10 + ; + ; RV32ZBB-LABEL: name: cls_i32 + ; RV32ZBB: liveins: $x10 + ; RV32ZBB-NEXT: {{ $}} + ; RV32ZBB-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR]] + ; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[XOR]](s32) + ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV32ZBB-NEXT: $x10 = COPY [[SUB]](s32) + ; RV32ZBB-NEXT: PseudoRET implicit $x10 + %0:_(s32) = COPY $x10 + %1:_(s32) = G_CTLS %0(s32) + $x10 = COPY %1(s32) + PseudoRET implicit $x10 +... + +--- +name: cls_i64 +body: | + bb.1: + liveins: $x10, $x11 + + ; RV32I-LABEL: name: cls_i64 + ; RV32I: liveins: $x10, $x11 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; RV32I-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; RV32I-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]] + ; RV32I-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]] + ; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32I-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]] + ; RV32I-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[XOR]], [[C1]](s32) + ; RV32I-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[LSHR]] + ; RV32I-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; RV32I-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) + ; RV32I-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[LSHR1]] + ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; RV32I-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) + ; RV32I-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[LSHR2]] + ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; RV32I-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[OR2]], [[C4]](s32) + ; RV32I-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[LSHR3]] + ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; RV32I-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[OR3]], [[C5]](s32) + ; RV32I-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[LSHR4]] + ; RV32I-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[C1]](s32) + ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1431655765 + ; RV32I-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C6]] + ; RV32I-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[OR4]], [[AND]] + ; RV32I-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C2]](s32) + ; RV32I-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 858993459 + ; RV32I-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C7]] + ; RV32I-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C7]] + ; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND1]], [[AND2]] + ; RV32I-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C3]](s32) + ; RV32I-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR7]], [[ADD]] + ; RV32I-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 252645135 + ; RV32I-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C8]] + ; RV32I-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; RV32I-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; RV32I-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND3]], [[SHL]] + ; RV32I-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C5]](s32) + ; RV32I-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[SHL1]] + ; RV32I-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C9]](s32) + ; RV32I-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; RV32I-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR8]] + ; RV32I-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C1]] + ; RV32I-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[C10]] + ; RV32I-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD4]], [[C]] + ; RV32I-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[ASHR]] + ; RV32I-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[XOR1]], [[C1]](s32) + ; RV32I-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[XOR1]], [[LSHR9]] + ; RV32I-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[OR5]], [[C2]](s32) + ; RV32I-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[LSHR10]] + ; RV32I-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[OR6]], [[C3]](s32) + ; RV32I-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[LSHR11]] + ; RV32I-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[OR7]], [[C4]](s32) + ; RV32I-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[LSHR12]] + ; RV32I-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[OR8]], [[C5]](s32) + ; RV32I-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[LSHR13]] + ; RV32I-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[OR9]], [[C1]](s32) + ; RV32I-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C6]] + ; RV32I-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[OR9]], [[AND4]] + ; RV32I-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C2]](s32) + ; RV32I-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR15]], [[C7]] + ; RV32I-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C7]] + ; RV32I-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[AND6]] + ; RV32I-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[ADD5]], [[C3]](s32) + ; RV32I-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[LSHR16]], [[ADD5]] + ; RV32I-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ADD6]], [[C8]] + ; RV32I-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32) + ; RV32I-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[AND7]], [[SHL2]] + ; RV32I-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ADD7]], [[C5]](s32) + ; RV32I-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[SHL3]] + ; RV32I-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[ADD8]], [[C9]](s32) + ; RV32I-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[LSHR17]] + ; RV32I-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[C1]] + ; RV32I-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SUB5]] + ; RV32I-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32I-NEXT: $x10 = COPY [[SELECT1]](s32) + ; RV32I-NEXT: $x11 = COPY [[C11]](s32) + ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; RV32P-LABEL: name: cls_i64 + ; RV32P: liveins: $x10, $x11 + ; RV32P-NEXT: {{ $}} + ; RV32P-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32P-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32P-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; RV32P-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; RV32P-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; RV32P-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]] + ; RV32P-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]] + ; RV32P-NEXT: [[CTLS:%[0-9]+]]:_(s32) = G_CTLS [[COPY]](s32) + ; RV32P-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; RV32P-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLS]], [[C1]] + ; RV32P-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[C]] + ; RV32P-NEXT: [[CTLS1:%[0-9]+]]:_(s32) = G_CTLS [[COPY1]](s32) + ; RV32P-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[CTLS1]] + ; RV32P-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32P-NEXT: $x10 = COPY [[SELECT1]](s32) + ; RV32P-NEXT: $x11 = COPY [[C2]](s32) + ; RV32P-NEXT: PseudoRET implicit $x10, implicit $x11 + ; + ; RV32ZBB-LABEL: name: cls_i64 + ; RV32ZBB: liveins: $x10, $x11 + ; RV32ZBB-NEXT: {{ $}} + ; RV32ZBB-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 + ; RV32ZBB-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; RV32ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32) + ; RV32ZBB-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; RV32ZBB-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[ASHR1]](s32), [[ASHR]] + ; RV32ZBB-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[ASHR]] + ; RV32ZBB-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32ZBB-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[ASHR1]] + ; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[XOR]](s32) + ; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]] + ; RV32ZBB-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; RV32ZBB-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] + ; RV32ZBB-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[ADD]], [[C]] + ; RV32ZBB-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[ASHR]] + ; RV32ZBB-NEXT: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[XOR1]](s32) + ; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ1]], [[C1]] + ; RV32ZBB-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[SELECT]], [[SUB1]] + ; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32ZBB-NEXT: $x10 = COPY [[SELECT1]](s32) + ; RV32ZBB-NEXT: $x11 = COPY [[C3]](s32) + ; RV32ZBB-NEXT: PseudoRET implicit $x10, implicit $x11 + %1:_(s32) = COPY $x10 + %2:_(s32) = COPY $x11 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(s64) = G_CTLS %0(s64) + %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3(s64) + $x10 = COPY %4(s32) + $x11 = COPY %5(s32) + PseudoRET implicit $x10, implicit $x11 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv64.mir new file mode 100644 index 000000000000..3ff9c44fd4ca --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv64.mir @@ -0,0 +1,395 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=riscv64 -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV64I +# RUN: llc -mtriple=riscv64 -mattr=+experimental-p -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV64P +# RUN: llc -mtriple=riscv64 -mattr=+zbb -run-pass=legalizer %s -o - \ +# RUN: | FileCheck %s --check-prefix=RV64ZBB + +--- +name: cls_i8 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: cls_i8 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s64) + ; RV64I-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) + ; RV64I-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C]](s64) + ; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C3]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s64) + ; RV64I-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[LSHR]] + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C3]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C4]](s64) + ; RV64I-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[LSHR1]] + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR1]], [[C3]] + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C5]](s64) + ; RV64I-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[LSHR2]] + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[OR2]], [[C3]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C1]](s64) + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 85 + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[LSHR3]], [[C6]] + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[OR2]], [[AND4]] + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] + ; RV64I-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[AND5]], [[C4]](s64) + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 51 + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[LSHR4]], [[C7]] + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C7]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND6]], [[AND7]] + ; RV64I-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C5]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR5]], [[ADD]] + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C8]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: $x10 = COPY [[AND8]](s64) + ; RV64I-NEXT: $x11 = COPY [[C1]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C3]] + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[AND9]], [[C9]](s64) + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C10]], [[LSHR6]] + ; RV64I-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C1]] + ; RV64I-NEXT: $x10 = COPY [[SUB2]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64P-LABEL: name: cls_i8 + ; RV64P: liveins: $x10 + ; RV64P-NEXT: {{ $}} + ; RV64P-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64P-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64P-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; RV64P-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; RV64P-NEXT: [[CLSW:%[0-9]+]]:_(s64) = G_CLSW [[ASHR]] + ; RV64P-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; RV64P-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CLSW]], [[C1]] + ; RV64P-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64P-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; RV64P-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: cls_i8 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64ZBB-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 8 + ; RV64ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s64) + ; RV64ZBB-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64ZBB-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 255 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C2]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CLZW]], [[C3]] + ; RV64ZBB-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG1]], [[C1]] + ; RV64ZBB-NEXT: $x10 = COPY [[SUB1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s8) = G_TRUNC %1(s64) + %2:_(s8) = G_CTLS %0(s8) + %3:_(s64) = G_ANYEXT %2(s8) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... + +--- +name: cls_i16 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: cls_i16 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s64) + ; RV64I-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C2]](s64) + ; RV64I-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C]](s64) + ; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C3]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s64) + ; RV64I-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[LSHR]] + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C3]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C4]](s64) + ; RV64I-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[LSHR1]] + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR1]], [[C3]] + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C5]](s64) + ; RV64I-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[LSHR2]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[OR2]], [[C3]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C6]](s64) + ; RV64I-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[LSHR3]] + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[OR3]], [[C3]] + ; RV64I-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[AND4]], [[C1]](s64) + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 21845 + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[LSHR4]], [[C7]] + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[OR3]], [[AND5]] + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C3]] + ; RV64I-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[AND6]], [[C4]](s64) + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 13107 + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[LSHR5]], [[C8]] + ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C8]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND7]], [[AND8]] + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C5]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR6]], [[ADD]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 3855 + ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C9]] + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 257 + ; RV64I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: $x10 = COPY [[AND9]](s64) + ; RV64I-NEXT: $x11 = COPY [[C10]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C3]] + ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[AND10]], [[C6]](s64) + ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C11]], [[LSHR7]] + ; RV64I-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C1]] + ; RV64I-NEXT: $x10 = COPY [[SUB2]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64P-LABEL: name: cls_i16 + ; RV64P: liveins: $x10 + ; RV64P-NEXT: {{ $}} + ; RV64P-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64P-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; RV64P-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s64) + ; RV64P-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]](s64) + ; RV64P-NEXT: [[CLSW:%[0-9]+]]:_(s64) = G_CLSW [[ASHR]] + ; RV64P-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; RV64P-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CLSW]], [[C1]] + ; RV64P-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64P-NEXT: $x10 = COPY [[SEXT_INREG]](s64) + ; RV64P-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: cls_i16 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64ZBB-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 16 + ; RV64ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s64) + ; RV64ZBB-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64ZBB-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 65535 + ; RV64ZBB-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C2]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[AND]] + ; RV64ZBB-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CLZW]], [[C3]] + ; RV64ZBB-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG1]], [[C1]] + ; RV64ZBB-NEXT: $x10 = COPY [[SUB1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s16) = G_TRUNC %1(s64) + %2:_(s16) = G_CTLS %0(s16) + %3:_(s64) = G_ANYEXT %2(s16) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... + +--- +name: cls_i32 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: cls_i32 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; RV64I-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s64) + ; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C2]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C1]](s64) + ; RV64I-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[LSHR]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[OR]], [[C2]] + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND1]], [[C3]](s64) + ; RV64I-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[LSHR1]] + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[OR1]], [[C2]] + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[AND2]], [[C4]](s64) + ; RV64I-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[LSHR2]] + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[OR2]], [[C2]] + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C5]](s64) + ; RV64I-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[LSHR3]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; RV64I-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[OR3]], [[C2]] + ; RV64I-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[AND4]], [[C6]](s64) + ; RV64I-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[OR3]], [[LSHR4]] + ; RV64I-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[OR4]], [[C2]] + ; RV64I-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[AND5]], [[C1]](s64) + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1431655765 + ; RV64I-NEXT: [[AND6:%[0-9]+]]:_(s64) = G_AND [[LSHR5]], [[C7]] + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[OR4]], [[AND6]] + ; RV64I-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64I-NEXT: [[AND7:%[0-9]+]]:_(s64) = G_AND [[SEXT_INREG1]], [[C2]] + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[AND7]], [[C3]](s64) + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 858993459 + ; RV64I-NEXT: [[AND8:%[0-9]+]]:_(s64) = G_AND [[LSHR6]], [[C8]] + ; RV64I-NEXT: [[AND9:%[0-9]+]]:_(s64) = G_AND [[SEXT_INREG1]], [[C8]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND8]], [[AND9]] + ; RV64I-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 32 + ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[SEXT_INREG2]], [[C4]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR7]], [[SEXT_INREG2]] + ; RV64I-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD1]], 32 + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 252645135 + ; RV64I-NEXT: [[AND10:%[0-9]+]]:_(s64) = G_AND [[SEXT_INREG3]], [[C9]] + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 16843009 + ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; RV64I-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: $x10 = COPY [[AND10]](s64) + ; RV64I-NEXT: $x11 = COPY [[C10]](s64) + ; RV64I-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 + ; RV64I-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 + ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[AND11:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C2]] + ; RV64I-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[AND11]], [[C11]](s64) + ; RV64I-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C12]], [[LSHR8]] + ; RV64I-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB1]], 32 + ; RV64I-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG4]], [[C1]] + ; RV64I-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB2]], 32 + ; RV64I-NEXT: $x10 = COPY [[SEXT_INREG5]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64P-LABEL: name: cls_i32 + ; RV64P: liveins: $x10 + ; RV64P-NEXT: {{ $}} + ; RV64P-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64P-NEXT: [[CLSW:%[0-9]+]]:_(s64) = G_CLSW [[COPY]] + ; RV64P-NEXT: $x10 = COPY [[CLSW]](s64) + ; RV64P-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: cls_i32 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64ZBB-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 + ; RV64ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s64) + ; RV64ZBB-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64ZBB-NEXT: [[CLZW:%[0-9]+]]:_(s64) = G_CLZW [[XOR]] + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CLZW]], [[C1]] + ; RV64ZBB-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; RV64ZBB-NEXT: $x10 = COPY [[SEXT_INREG1]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %1:_(s64) = COPY $x10 + %0:_(s32) = G_TRUNC %1(s64) + %2:_(s32) = G_CTLS %0(s32) + %3:_(s64) = G_ANYEXT %2(s32) + $x10 = COPY %3(s64) + PseudoRET implicit $x10 +... + +--- +name: cls_i64 +body: | + bb.1: + liveins: $x10 + + ; RV64I-LABEL: name: cls_i64 + ; RV64I: liveins: $x10 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; RV64I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64I-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) + ; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64I-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[XOR]], [[C1]](s64) + ; RV64I-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[XOR]], [[LSHR]] + ; RV64I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64I-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[OR]], [[C2]](s64) + ; RV64I-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[LSHR1]] + ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; RV64I-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[OR1]], [[C3]](s64) + ; RV64I-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[LSHR2]] + ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; RV64I-NEXT: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[OR2]], [[C4]](s64) + ; RV64I-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[LSHR3]] + ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; RV64I-NEXT: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[OR3]], [[C5]](s64) + ; RV64I-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[OR3]], [[LSHR4]] + ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; RV64I-NEXT: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[OR4]], [[C6]](s64) + ; RV64I-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[LSHR5]] + ; RV64I-NEXT: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[OR5]], [[C1]](s64) + ; RV64I-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 6148914691236517205 + ; RV64I-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[LSHR6]], [[C7]] + ; RV64I-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[OR5]], [[AND]] + ; RV64I-NEXT: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[SUB]], [[C2]](s64) + ; RV64I-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 3689348814741910323 + ; RV64I-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[LSHR7]], [[C8]] + ; RV64I-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C8]] + ; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[AND1]], [[AND2]] + ; RV64I-NEXT: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[ADD]], [[C3]](s64) + ; RV64I-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[LSHR8]], [[ADD]] + ; RV64I-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 1085102592571150095 + ; RV64I-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[ADD1]], [[C9]] + ; RV64I-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; RV64I-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND3]], [[C4]](s64) + ; RV64I-NEXT: [[ADD2:%[0-9]+]]:_(s64) = G_ADD [[AND3]], [[SHL]] + ; RV64I-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ADD2]], [[C5]](s64) + ; RV64I-NEXT: [[ADD3:%[0-9]+]]:_(s64) = G_ADD [[ADD2]], [[SHL1]] + ; RV64I-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ADD3]], [[C6]](s64) + ; RV64I-NEXT: [[ADD4:%[0-9]+]]:_(s64) = G_ADD [[ADD3]], [[SHL2]] + ; RV64I-NEXT: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[ADD4]], [[C10]](s64) + ; RV64I-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; RV64I-NEXT: [[SUB1:%[0-9]+]]:_(s64) = G_SUB [[C11]], [[LSHR9]] + ; RV64I-NEXT: [[SUB2:%[0-9]+]]:_(s64) = G_SUB [[SUB1]], [[C1]] + ; RV64I-NEXT: $x10 = COPY [[SUB2]](s64) + ; RV64I-NEXT: PseudoRET implicit $x10 + ; + ; RV64P-LABEL: name: cls_i64 + ; RV64P: liveins: $x10 + ; RV64P-NEXT: {{ $}} + ; RV64P-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64P-NEXT: [[CTLS:%[0-9]+]]:_(s64) = G_CTLS [[COPY]](s64) + ; RV64P-NEXT: $x10 = COPY [[CTLS]](s64) + ; RV64P-NEXT: PseudoRET implicit $x10 + ; + ; RV64ZBB-LABEL: name: cls_i64 + ; RV64ZBB: liveins: $x10 + ; RV64ZBB-NEXT: {{ $}} + ; RV64ZBB-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; RV64ZBB-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; RV64ZBB-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64ZBB-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64) + ; RV64ZBB-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY]], [[ASHR]] + ; RV64ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[XOR]](s64) + ; RV64ZBB-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]] + ; RV64ZBB-NEXT: $x10 = COPY [[SUB]](s64) + ; RV64ZBB-NEXT: PseudoRET implicit $x10 + %0:_(s64) = COPY $x10 + %1:_(s64) = G_CTLS %0(s64) + $x10 = COPY %1(s64) + PseudoRET implicit $x10 +... + diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll new file mode 100644 index 000000000000..3f403fd8cb9e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32p.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+experimental-p,+zbb -verify-machineinstrs \ +; RUN: < %s | FileCheck %s + +define i8 @cls_i8(i8 %x) { +; CHECK-LABEL: cls_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: addi a0, a0, -24 +; CHECK-NEXT: ret + %a = ashr i8 %x, 7 + %b = xor i8 %x, %a + %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false) + %d = sub i8 %c, 1 + ret i8 %d +} + +define i8 @cls_i8_2(i8 %x) { +; CHECK-LABEL: cls_i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: addi a0, a0, -24 +; CHECK-NEXT: ret + %a = ashr i8 %x, 7 + %b = xor i8 %x, %a + %c = shl i8 %b, 1 + %d = or i8 %c, 1 + %e = call i8 @llvm.ctlz.i8(i8 %d, i1 true) + ret i8 %e +} + +define i16 @cls_i16(i16 %x) { +; CHECK-LABEL: cls_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: ret + %a = ashr i16 %x, 15 + %b = xor i16 %x, %a + %c = call i16 @llvm.ctlz.i16(i16 %b, i1 false) + %d = sub i16 %c, 1 + ret i16 %d +} + +define i16 @cls_i16_2(i16 %x) { +; CHECK-LABEL: cls_i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: ret + %a = ashr i16 %x, 15 + %b = xor i16 %x, %a + %c = shl i16 %b, 1 + %d = or i16 %c, 1 + %e = call i16 @llvm.ctlz.i16(i16 %d, i1 true) + ret i16 %e +} + +define i32 @cls_i32(i32 %x) { +; CHECK-LABEL: cls_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + ret i32 %d +} + +define i32 @cls_i32_2(i32 %x) { +; CHECK-LABEL: cls_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = shl i32 %b, 1 + %d = or i32 %c, 1 + %e = call i32 @llvm.ctlz.i32(i32 %d, i1 true) + ret i32 %e +} + +define i64 @cls_i64(i64 %x) { +; CHECK-LABEL: cls_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: srai a2, a1, 31 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: beqz a1, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: clz a0, a1 +; CHECK-NEXT: j .LBB6_3 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: clz a0, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: .LBB6_3: +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: sltiu a1, a0, -1 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: ret + %a = ashr i64 %x, 63 + %b = xor i64 %x, %a + %c = call i64 @llvm.ctlz.i64(i64 %b, i1 false) + %d = sub i64 %c, 1 + ret i64 %d +} + +define i64 @cls_i64_2(i64 %x) { +; CHECK-LABEL: cls_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: srai a2, a1, 31 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: srli a2, a0, 31 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: beqz a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: clz a0, a1 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: ori a0, a0, 1 +; CHECK-NEXT: clz a0, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: ret + %a = ashr i64 %x, 63 + %b = xor i64 %x, %a + %c = shl i64 %b, 1 + %d = or i64 %c, 1 + %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true) + ret i64 %e +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll new file mode 100644 index 000000000000..5faf1079a780 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64p.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+experimental-p,+zbb -verify-machineinstrs < %s | FileCheck %s + +define i8 @cls_i8(i8 %x) { +; CHECK-LABEL: cls_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: subw a0, a0, a1 +; CHECK-NEXT: ret + %a = ashr i8 %x, 7 + %b = xor i8 %x, %a + %c = call i8 @llvm.ctlz.i8(i8 %b, i1 false) + %d = sub i8 %c, 1 + ret i8 %d +} + +define i8 @cls_i8_2(i8 %x) { +; CHECK-LABEL: cls_i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.b a0, a0 +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: subw a0, a0, a1 +; CHECK-NEXT: ret + %a = ashr i8 %x, 7 + %b = xor i8 %x, %a + %c = shl i8 %b, 1 + %d = or i8 %c, 1 + %e = call i8 @llvm.ctlz.i8(i8 %d, i1 true) + ret i8 %e +} + +define i16 @cls_i16(i16 %x) { +; CHECK-LABEL: cls_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: subw a0, a0, a1 +; CHECK-NEXT: ret + %a = ashr i16 %x, 15 + %b = xor i16 %x, %a + %c = call i16 @llvm.ctlz.i16(i16 %b, i1 false) + %d = sub i16 %c, 1 + ret i16 %d +} + +define i16 @cls_i16_2(i16 %x) { +; CHECK-LABEL: cls_i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: sext.h a0, a0 +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: subw a0, a0, a1 +; CHECK-NEXT: ret + %a = ashr i16 %x, 15 + %b = xor i16 %x, %a + %c = shl i16 %b, 1 + %d = or i16 %c, 1 + %e = call i16 @llvm.ctlz.i16(i16 %d, i1 true) + ret i16 %e +} + +define i32 @cls_i32(i32 %x) { +; CHECK-LABEL: cls_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = call i32 @llvm.ctlz.i32(i32 %b, i1 false) + %d = sub i32 %c, 1 + ret i32 %d +} + +define i32 @cls_i32_2(i32 %x) { +; CHECK-LABEL: cls_i32_2: +; CHECK: # %bb.0: +; CHECK-NEXT: clsw a0, a0 +; CHECK-NEXT: ret + %a = ashr i32 %x, 31 + %b = xor i32 %x, %a + %c = shl i32 %b, 1 + %d = or i32 %c, 1 + %e = call i32 @llvm.ctlz.i32(i32 %d, i1 true) + ret i32 %e +} + +define i64 @cls_i64(i64 %x) { +; CHECK-LABEL: cls_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %a = ashr i64 %x, 63 + %b = xor i64 %x, %a + %c = call i64 @llvm.ctlz.i64(i64 %b, i1 false) + %d = sub i64 %c, 1 + ret i64 %d +} + +define i64 @cls_i64_2(i64 %x) { +; CHECK-LABEL: cls_i64_2: +; CHECK: # %bb.0: +; CHECK-NEXT: cls a0, a0 +; CHECK-NEXT: ret + %a = ashr i64 %x, 63 + %b = xor i64 %x, %a + %c = shl i64 %b, 1 + %d = or i64 %c, 1 + %e = call i64 @llvm.ctlz.i64(i64 %d, i1 true) + ret i64 %e +}