llvm-project/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp

//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "../Target.h"
#include "../ParallelSnippetGenerator.h"
#include "../SerialSnippetGenerator.h"
#include "../SnippetGenerator.h"

#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVExegesisPasses.h"
#include "RISCVInstrInfo.h"
#include "RISCVRegisterInfo.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"

// include computeAvailableFeatures and computeRequiredFeatures.
#define GET_AVAILABLE_OPCODE_CHECKER
#include "RISCVGenInstrInfo.inc"

#include "llvm/CodeGen/MachineInstrBuilder.h"

#include <vector>

namespace llvm {
namespace exegesis {

static cl::opt<bool>
    OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
                       cl::desc("Only enumerate VLMAX for VL operand"),
                       cl::init(false), cl::Hidden);

static cl::opt<bool>
    EnumerateRoundingModes("riscv-enumerate-rounding-modes",
                           cl::desc("Enumerate different FRM and VXRM"),
                           cl::init(true), cl::Hidden);

static cl::opt<std::string>
    FilterConfig("riscv-filter-config",
                 cl::desc("Show only the configs matching this regex"),
                 cl::init(""), cl::Hidden);

#include "RISCVGenExegesis.inc"

namespace {

template <class BaseT> class RISCVSnippetGenerator : public BaseT {
  static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
    if (UsesVXRM) {
      assert(RISCVVXRndMode::isValidRoundingMode(Val));
      OS << RISCVVXRndMode::roundingModeToString(
          static_cast<RISCVVXRndMode::RoundingMode>(Val));
    } else {
      assert(RISCVFPRndMode::isValidRoundingMode(Val));
      OS << RISCVFPRndMode::roundingModeToString(
          static_cast<RISCVFPRndMode::RoundingMode>(Val));
    }
  }

  static constexpr unsigned MinSEW = 8;
  // ELEN is basically SEW_max.
  unsigned ELEN = 64;

  // We can't know the real min/max VLEN w/o a Function, so we're
  // using the VLen from Zvl.
  unsigned ZvlVLen = 32;

  /// Mask for registers that are NOT standalone registers like X0 and V0
  BitVector AggregateRegisters;

  // Returns true when opcode is available in any of the FBs.
  static bool
  isOpcodeAvailableIn(unsigned Opcode,
                      ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
    FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
    for (uint8_t FB : FBs) {
      if (RequiredFeatures[FB])
        return true;
    }
    return false;
  }

  static bool isRVVFloatingPointOp(unsigned Opcode) {
    return isOpcodeAvailableIn(Opcode,
                               {RISCV_MC::Feature_HasVInstructionsAnyFBit});
  }

  // Get the element group width of each vector cryptor extension.
  static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
    using namespace RISCV_MC;
    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit,
                                     Feature_HasStdExtZvknedBit,
                                     Feature_HasStdExtZvksedBit}))
      return 128U;
    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit}))
      return 256U;
    if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit}))
      // In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
      // Otherwise it's 128.
      return SEW == 64 ? 256U : 128U;

    llvm_unreachable("Unsupported opcode");
  }

  // A handy utility to multiply or divide an integer by LMUL.
  template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
    auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
    return IsFractional ? Val / LMul : Val * LMul;
  }

  /// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
  /// nullopt if BaseOpcode is not a vector sext/zext.
  static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
    switch (BaseOpcode) {
    case RISCV::VSEXT_VF2:
    case RISCV::VZEXT_VF2:
      return 2;
    case RISCV::VSEXT_VF4:
    case RISCV::VZEXT_VF4:
      return 4;
    case RISCV::VSEXT_VF8:
    case RISCV::VZEXT_VF8:
      return 8;
    default:
      return std::nullopt;
    }
  }

  void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
                         unsigned BaseOpcode,
                         const BitVector &ForbiddenRegisters,
                         std::vector<CodeTemplate> &Result) const;

public:
  RISCVSnippetGenerator(const LLVMState &State,
                        const SnippetGenerator::Options &Opts)
      : BaseT(State, Opts),
        AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
    // Initialize standalone registers mask.
    const MCRegisterInfo &RegInfo = State.getRegInfo();
    const unsigned StandaloneRegClasses[] = {
        RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};

    for (unsigned RegClassID : StandaloneRegClasses)
      for (unsigned Reg : RegInfo.getRegClass(RegClassID))
        AggregateRegisters.reset(Reg);

    // Initialize ELEN and VLEN.
    // FIXME: We could have obtained these two constants from RISCVSubtarget
    // but in order to get that from TargetMachine, we need a Function.
    const MCSubtargetInfo &STI = State.getSubtargetInfo();
    ELEN = STI.hasFeature(RISCV::FeatureStdExtZve64x) ? 64 : 32;

    const unsigned ZvlFeatures[] = {
        RISCV::FeatureStdExtZvl32b,    RISCV::FeatureStdExtZvl64b,
        RISCV::FeatureStdExtZvl128b,   RISCV::FeatureStdExtZvl256b,
        RISCV::FeatureStdExtZvl512b,   RISCV::FeatureStdExtZvl1024b,
        RISCV::FeatureStdExtZvl2048b,  RISCV::FeatureStdExtZvl4096b,
        RISCV::FeatureStdExtZvl8192b,  RISCV::FeatureStdExtZvl16384b,
        RISCV::FeatureStdExtZvl32768b, RISCV::FeatureStdExtZvl65536b};
    for (auto [Idx, Feature] : enumerate(ZvlFeatures)) {
      if (STI.hasFeature(Feature))
        ZvlVLen = std::max(ZvlVLen, 1u << (Idx + 5));
    }
  }

  Expected<std::vector<CodeTemplate>>
  generateCodeTemplates(InstructionTemplate Variant,
                        const BitVector &ForbiddenRegisters) const override;
};

static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
  const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
  return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
}

// There are primarily two kinds of opcodes that are not eligible
// in a serial snippet:
// (1) Has a use operand that can not overlap with the def operand
// (i.e. early clobber).
// (2) The register file of the only use operand is different from
// that of the def operand. For instance, use operand is vector and
// the result is a scalar.
static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
                                         const Instruction &I) {
  if (llvm::any_of(I.Operands,
                   [](const Operand &Op) { return Op.isEarlyClobber(); }))
    return true;

  switch (BaseOpcode) {
  case RISCV::VCOMPRESS_VM:
  case RISCV::VCPOP_M:
  case RISCV::VCPOP_V:
  // The permutation instructions listed below cannot have destination
  // overlapping with the source.
  case RISCV::VRGATHEREI16_VV:
  case RISCV::VRGATHER_VI:
  case RISCV::VRGATHER_VV:
  case RISCV::VRGATHER_VX:
  case RISCV::VSLIDE1UP_VX:
  case RISCV::VSLIDEUP_VI:
  case RISCV::VSLIDEUP_VX:
    return true;
  default:
    return false;
  }
}

static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
  switch (BaseOpcode) {
  case RISCV::VFNCVT_F_F_W:
  case RISCV::VFWCVT_F_F_V:
  case RISCV::VFNCVTBF16_F_F_W:
  case RISCV::VFWCVTBF16_F_F_V:
    return true;
  default:
    return false;
  }
}

static bool isVectorReduction(unsigned BaseOpcode) {
  switch (BaseOpcode) {
  case RISCV::VREDAND_VS:
  case RISCV::VREDMAXU_VS:
  case RISCV::VREDMAX_VS:
  case RISCV::VREDMINU_VS:
  case RISCV::VREDMIN_VS:
  case RISCV::VREDOR_VS:
  case RISCV::VREDSUM_VS:
  case RISCV::VREDXOR_VS:
  case RISCV::VWREDSUMU_VS:
  case RISCV::VWREDSUM_VS:
  case RISCV::VFREDMAX_VS:
  case RISCV::VFREDMIN_VS:
  case RISCV::VFREDOSUM_VS:
  case RISCV::VFREDUSUM_VS:
    return true;
  default:
    return false;
  }
}

template <class BaseT>
void RISCVSnippetGenerator<BaseT>::annotateWithVType(
    const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
    const BitVector &ForbiddenRegisters,
    std::vector<CodeTemplate> &Result) const {
  const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
  unsigned VPseudoOpcode = Instr.getOpcode();

  bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;

  const MCInstrDesc &MIDesc = Instr.Description;
  const uint64_t TSFlags = MIDesc.TSFlags;

  RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);

  const size_t StartingResultSize = Result.size();

  SmallPtrSet<const Operand *, 4> VTypeOperands;
  std::optional<AliasingConfigurations> SelfAliasing;
  // Exegesis see instructions with tied operands being inherently serial.
  // But for RVV instructions, those tied operands are passthru rather
  // than real read operands. So we manually put dependency between
  // destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
  // operands.
  auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
    // Initialize SelfAliasing on first use.
    if (!SelfAliasing.has_value()) {
      BitVector ExcludeRegs = ForbiddenRegisters;
      ExcludeRegs |= AggregateRegisters;
      SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
      bool EmptyUses = false;
      for (auto &ARO : SelfAliasing->Configurations) {
        auto &Uses = ARO.Uses;
        for (auto ROA = Uses.begin(); ROA != Uses.end();) {
          const Operand *Op = ROA->Op;
          // Exclude tied operand(s).
          if (Op->isTied()) {
            ROA = Uses.erase(ROA);
            continue;
          }

          // Special handling for reduction operations: for a given reduction
          // `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
          // since we're only reading `vs1[0]` and many implementations
          // optimize for this case (e.g. chaining). Instead, we're forcing
          // it to create alias between vd and vs2.
          if (isVectorReduction(BaseOpcode) &&
              // vs1's operand index is always 3.
              Op->getIndex() == 3) {
            ROA = Uses.erase(ROA);
            continue;
          }

          // Exclude any special operands like SEW and VL -- we've already
          // assigned values to them.
          if (VTypeOperands.count(Op)) {
            ROA = Uses.erase(ROA);
            continue;
          }
          ++ROA;
        }

        // If any of the use operand candidate lists is empty, there is
        // no point to assign self aliasing registers.
        if (Uses.empty()) {
          EmptyUses = true;
          break;
        }
      }
      if (EmptyUses)
        SelfAliasing->Configurations.clear();
    }

    // This is a self aliasing instruction so defs and uses are from the same
    // instance, hence twice IT in the following call.
    if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
      setRandomAliasing(*SelfAliasing, IT, IT);
  };

  // We are going to create a CodeTemplate (configuration) for each supported
  // SEW, policy, and VL.
  // FIXME: Account for EEW and EMUL.
  SmallVector<std::optional<unsigned>, 4> Log2SEWs;
  SmallVector<std::optional<unsigned>, 4> Policies;
  SmallVector<std::optional<int>, 3> AVLs;
  SmallVector<std::optional<unsigned>, 8> RoundingModes;

  bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
  bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
  bool HasVLOp = RISCVII::hasVLOp(TSFlags);
  bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
  bool UsesVXRM = RISCVII::usesVXRM(TSFlags);

  if (HasSEWOp) {
    const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
    VTypeOperands.insert(&SEWOp);

    if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
      // If it's a mask-producing instruction, the SEW operand is always zero.
      Log2SEWs.push_back(0);
    } else {
      SmallVector<unsigned, 4> SEWCandidates;

      // (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
      // [SEW_min, LMUL * ELEN].
      unsigned SEWUpperBound =
          VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
      for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
        SEWCandidates.push_back(SEW);

        // Some scheduling classes already integrate SEW; only put
        // their corresponding SEW values at the SEW operands.
        // NOTE: It is imperative to put this condition in the front, otherwise
        // it is tricky and difficult to know if there is an integrated
        // SEW after other rules are applied to filter the candidates.
        const auto *RVVBase =
            RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
        if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
                        isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
                        isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
          // There is an integrated SEW, remove all but the SEW pushed last.
          SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
          break;
        }
      }

      // Filter out some candidates.
      for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
        // For floating point operations, only select SEW of the supported FLEN.
        if (isRVVFloatingPointOp(VPseudoOpcode)) {
          bool Supported = false;
          Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
          Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16;
          Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32;
          Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64;
          if (!Supported) {
            SEW = SEWCandidates.erase(SEW);
            continue;
          }
        }

        // The EEW for source operand in VSEXT and VZEXT is a fraction
        // of the SEW, hence only SEWs that will lead to valid EEW are allowed.
        if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
          if (*SEW / *Frac < MinSEW) {
            SEW = SEWCandidates.erase(SEW);
            continue;
          }

        // Most vector crypto 1.0 instructions only work on SEW=32.
        using namespace RISCV_MC;
        if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit,
                                             Feature_HasStdExtZvknedBit,
                                             Feature_HasStdExtZvknhaOrZvknhbBit,
                                             Feature_HasStdExtZvksedBit,
                                             Feature_HasStdExtZvkshBit})) {
          if (*SEW != 32)
            // Zvknhb supports SEW=64 as well.
            if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
                !isOpcodeAvailableIn(BaseOpcode,
                                     {Feature_HasStdExtZvknhaOrZvknhbBit})) {
              SEW = SEWCandidates.erase(SEW);
              continue;
            }

          // We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
          // because some of the extensions have SEW-dependant EGW.
          unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW);
          if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
            SEW = SEWCandidates.erase(SEW);
            continue;
          }
        }

        ++SEW;
      }

      // We're not going to produce any result with zero SEW candidate.
      if (SEWCandidates.empty())
        return;

      for (unsigned SEW : SEWCandidates)
        Log2SEWs.push_back(Log2_32(SEW));
    }
  } else {
    Log2SEWs.push_back(std::nullopt);
  }

  if (HasPolicyOp) {
    VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]);

    Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC,
                (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)};
  } else {
    Policies.push_back(std::nullopt);
  }

  if (HasVLOp) {
    VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]);

    if (OnlyUsesVLMAXForVL)
      AVLs.push_back(-1);
    else
      AVLs = {// 5-bit immediate value
              1,
              // VLMAX
              -1,
              // Non-X0 register
              0};
  } else {
    AVLs.push_back(std::nullopt);
  }

  if (HasRMOp) {
    VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]);

    if (UsesVXRM) {
      // Use RNU as the default VXRM.
      RoundingModes = {RISCVVXRndMode::RNU};
      if (EnumerateRoundingModes)
        RoundingModes.append(
            {RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD});
    } else {
      if (EnumerateRoundingModes)
        RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ,
                         RISCVFPRndMode::RDN, RISCVFPRndMode::RUP,
                         RISCVFPRndMode::RMM};
      else
        // If we're not enumerating FRM, use DYN to instruct
        // RISCVInsertReadWriteCSRPass to insert nothing.
        RoundingModes = {RISCVFPRndMode::DYN};
    }
  } else {
    RoundingModes = {std::nullopt};
  }

  std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
                      std::optional<unsigned>, std::optional<unsigned>>>
      Combinations;
  for (auto AVL : AVLs) {
    for (auto Log2SEW : Log2SEWs)
      for (auto Policy : Policies) {
        for (auto RM : RoundingModes)
          Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy));
      }
  }

  std::string ConfigStr;
  SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
  for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
    InstructionTemplate IT(&Instr);

    ListSeparator LS;
    ConfigStr = "vtype = {";
    raw_string_ostream SS(ConfigStr);

    ValueAssignments.clear();

    if (RM) {
      const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1];
      ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)});
      printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM,
                        UsesVXRM);
    }

    if (AVL) {
      MCOperand OpVal;
      if (*AVL < 0) {
        // VLMAX
        OpVal = MCOperand::createImm(-1);
        SS << LS << "AVL: VLMAX";
      } else if (*AVL == 0) {
        // A register holding AVL.
        // TODO: Generate a random register.
        OpVal = MCOperand::createReg(RISCV::X5);
        OpVal.print(SS << LS << "AVL: ");
      } else {
        // A 5-bit immediate.
        // The actual value assignment is deferred to
        // RISCVExegesisTarget::randomizeTargetMCOperand.
        SS << LS << "AVL: simm5";
      }
      if (OpVal.isValid()) {
        const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)];
        ValueAssignments.push_back({&Op, OpVal});
      }
    }

    if (Log2SEW) {
      const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
      ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)});
      SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
    }

    if (Policy) {
      const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)];
      ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)});
      SS << LS
         << "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
         << "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
    }

    SS << "}";

    // Filter out some configurations, if needed.
    if (!FilterConfig.empty()) {
      if (!Regex(FilterConfig).match(ConfigStr))
        continue;
    }

    CodeTemplate CT = OrigCT.clone();
    CT.Config = std::move(ConfigStr);
    for (InstructionTemplate &IT : CT.Instructions) {
      if (IsSerial) {
        // Reset this template's value assignments and do it
        // ourselves.
        IT = InstructionTemplate(&Instr);
        assignSerialRVVOperands(IT);
      }

      for (const auto &[Op, OpVal] : ValueAssignments)
        IT.getValueFor(*Op) = OpVal;
    }
    Result.push_back(std::move(CT));
    if (Result.size() - StartingResultSize >=
        SnippetGenerator::Opts.MaxConfigsPerOpcode)
      return;
  }
}

template <class BaseT>
Expected<std::vector<CodeTemplate>>
RISCVSnippetGenerator<BaseT>::generateCodeTemplates(
    InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
  const Instruction &Instr = Variant.getInstr();

  bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;

  unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode());

  // Bail out ineligible opcodes before generating base code templates since
  // the latter is quite expensive.
  if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr))
    return std::vector<CodeTemplate>{};

  auto BaseCodeTemplates =
      BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
  if (!BaseCodeTemplates)
    return BaseCodeTemplates.takeError();

  if (!BaseOpcode)
    return BaseCodeTemplates;

  // Specialize for RVV pseudo.
  std::vector<CodeTemplate> ExpandedTemplates;
  for (const auto &BaseCT : *BaseCodeTemplates)
    annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
                      ExpandedTemplates);

  return ExpandedTemplates;
}

// Stores constant value to a general-purpose (integer) register.
static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
                                      MCRegister Reg, const APInt &Value) {
  SmallVector<MCInst, 8> MCInstSeq;
  MCRegister DestReg = Reg;

  RISCVMatInt::generateMCInstSeq(Value.getSExtValue(), STI, DestReg, MCInstSeq);

  std::vector<MCInst> MatIntInstrs(MCInstSeq.begin(), MCInstSeq.end());
  return MatIntInstrs;
}

const MCPhysReg ScratchIntReg = RISCV::X30; // t5

// Stores constant bits to a floating-point register.
static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI,
                                         MCRegister Reg, const APInt &Bits,
                                         unsigned FmvOpcode) {
  std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
  Instrs.push_back(MCInstBuilder(FmvOpcode).addReg(Reg).addReg(ScratchIntReg));
  return Instrs;
}

// main idea is:
// we support APInt only if (represented as double) it has zero fractional
// part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5
// and then do FCVT this is only reliable thing in 32-bit mode, otherwise we
// need to use __floatsidf
static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
                                             MCRegister Reg,
                                             const APInt &Bits) {
  double D = Bits.bitsToDouble();
  double IPart;
  double FPart = std::modf(D, &IPart);

  if (std::abs(FPart) > std::numeric_limits<double>::epsilon()) {
    errs() << "loadFP64RegBits32 is not implemented for doubles like " << D
           << ", please remove fractional part\n";
    return {};
  }

  std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
  Instrs.push_back(MCInstBuilder(RISCV::FCVT_D_W)
                       .addReg(Reg)
                       .addReg(ScratchIntReg)
                       .addImm(RISCVFPRndMode::RNE));
  return Instrs;
}

class ExegesisRISCVTarget : public ExegesisTarget {
  // NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
  // opcodes is just a small portion of the entire opcode space, so I thought it
  // would be a waste of space to use BitVector.
  mutable SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;

public:
  ExegesisRISCVTarget();

  bool matchesArch(Triple::ArchType Arch) const override;

  std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
                               const APInt &Value) const override;

  const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
                                           unsigned Opcode) const override {
    // We don't want to support RVV instructions that depend on VTYPE, because
    // those instructions by themselves don't carry any additional information
    // for us to setup the proper VTYPE environment via VSETVL instructions.
    // FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
    // LMUL and SEW and I don't think enumerating those combinations is any
    // better than the ugly trick here that memorizes the corresponding MC
    // opcodes of the RVV pseudo we have processed previously. This works most
    // of the time because RVV pseudo opcodes are placed before any other RVV
    // opcodes. Of course this doesn't work if we're asked to benchmark only a
    // certain subset of opcodes.
    if (RVVMCOpcodesWithPseudos.count(Opcode))
      return "The MC opcode of RVV instructions are ignored";

    // We want to support all RVV pseudos.
    if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) {
      RVVMCOpcodesWithPseudos.insert(MCOpcode);
      return nullptr;
    }

    return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
  }

  MCRegister getDefaultLoopCounterRegister(const Triple &) const override;

  void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
                                   MachineBasicBlock &TargetMBB,
                                   const MCInstrInfo &MII,
                                   MCRegister LoopRegister) const override;

  MCRegister getScratchMemoryRegister(const Triple &TT) const override;

  void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
                          unsigned Offset) const override;

  ArrayRef<MCPhysReg> getUnavailableRegisters() const override;

  bool allowAsBackToBack(const Instruction &Instr) const override {
    return !Instr.Description.isPseudo();
  }

  Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
                                 MCOperand &AssignedValue,
                                 const BitVector &ForbiddenRegs) const override;

  std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
      const LLVMState &State,
      const SnippetGenerator::Options &Opts) const override {
    return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
        State, Opts);
  }

  std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
      const LLVMState &State,
      const SnippetGenerator::Options &Opts) const override {
    return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
        State, Opts);
  }

  std::vector<InstructionTemplate>
  generateInstructionVariants(const Instruction &Instr,
                              unsigned MaxConfigsPerOpcode) const override;

  void addTargetSpecificPasses(PassManagerBase &PM) const override {
    // Turn AVL operand of physical registers into virtual registers.
    PM.add(exegesis::createRISCVPreprocessingPass());
    PM.add(createRISCVInsertVSETVLIPass());
    // Setting up the correct FRM.
    PM.add(createRISCVInsertReadWriteCSRPass());
    PM.add(createRISCVInsertWriteVXRMPass());
    // This will assign physical register to the result of VSETVLI instructions
    // that produce VLMAX.
    PM.add(exegesis::createRISCVPostprocessingPass());
    // PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
    // PseudoMovImm with RISCVPostRAExpandPseudoPass though.
    PM.add(createRISCVPostRAExpandPseudoPass());
  }
};

ExegesisRISCVTarget::ExegesisRISCVTarget()
    : ExegesisTarget(RISCVCpuPfmCounters, RISCV_MC::isOpcodeAvailable) {}

bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const {
  return Arch == Triple::riscv32 || Arch == Triple::riscv64;
}

std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
                                                  MCRegister Reg,
                                                  const APInt &Value) const {
  if (RISCV::GPRRegClass.contains(Reg))
    return loadIntReg(STI, Reg, Value);
  if (RISCV::FPR16RegClass.contains(Reg))
    return loadFPRegBits(STI, Reg, Value, RISCV::FMV_H_X);
  if (RISCV::FPR32RegClass.contains(Reg))
    return loadFPRegBits(STI, Reg, Value, RISCV::FMV_W_X);
  if (RISCV::FPR64RegClass.contains(Reg)) {
    if (STI.hasFeature(RISCV::Feature64Bit))
      return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
    return loadFP64RegBits32(STI, Reg, Value);
  }
  // TODO: Emit proper code to initialize other kinds of registers.
  return {};
}

const MCPhysReg DefaultLoopCounterReg = RISCV::X31; // t6
const MCPhysReg ScratchMemoryReg = RISCV::X10;      // a0

MCRegister
ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const {
  return DefaultLoopCounterReg;
}

void ExegesisRISCVTarget::decrementLoopCounterAndJump(
    MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
    const MCInstrInfo &MII, MCRegister LoopRegister) const {
  BuildMI(&MBB, DebugLoc(), MII.get(RISCV::ADDI))
      .addDef(LoopRegister)
      .addUse(LoopRegister)
      .addImm(-1);
  BuildMI(&MBB, DebugLoc(), MII.get(RISCV::BNE))
      .addUse(LoopRegister)
      .addUse(RISCV::X0)
      .addMBB(&TargetMBB);
}

MCRegister
ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const {
  return ScratchMemoryReg; // a0
}

void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT,
                                             MCRegister Reg,
                                             unsigned Offset) const {
  // TODO: for now we ignore Offset because have no way
  // to detect it in instruction.
  auto &I = IT.getInstr();

  auto MemOpIt =
      find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
  assert(MemOpIt != I.Operands.end() &&
         "Instruction must have memory operands");

  const Operand &MemOp = *MemOpIt;

  assert(MemOp.isReg() && "Memory operand expected to be register");

  IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
}

const MCPhysReg UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg,
                                           ScratchIntReg, ScratchMemoryReg};

ArrayRef<MCPhysReg> ExegesisRISCVTarget::getUnavailableRegisters() const {
  return UnavailableRegisters;
}

Error ExegesisRISCVTarget::randomizeTargetMCOperand(
    const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
    const BitVector &ForbiddenRegs) const {
  uint8_t OperandType =
      Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType;

  switch (OperandType) {
  case RISCVOp::OPERAND_FRMARG:
    AssignedValue = MCOperand::createImm(RISCVFPRndMode::DYN);
    break;
  case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
    AssignedValue = MCOperand::createImm(0b1 << 4);
    break;
  case RISCVOp::OPERAND_SIMM6_NONZERO:
  case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
    AssignedValue = MCOperand::createImm(1);
    break;
  case RISCVOp::OPERAND_SIMM5:
    // 5-bit signed immediate value.
    AssignedValue = MCOperand::createImm(randomIndex(31) - 16);
    break;
  case RISCVOp::OPERAND_AVL:
  case RISCVOp::OPERAND_UIMM5:
    // 5-bit unsigned immediate value.
    AssignedValue = MCOperand::createImm(randomIndex(31));
    break;
  default:
    if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
        OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
      AssignedValue = MCOperand::createImm(0);
  }
  return Error::success();
}

std::vector<InstructionTemplate>
ExegesisRISCVTarget::generateInstructionVariants(
    const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const {
  InstructionTemplate IT{&Instr};
  for (const Operand &Op : Instr.Operands)
    if (Op.isMemory()) {
      IT.getValueFor(Op) = MCOperand::createReg(ScratchMemoryReg);
    }
  return {IT};
}

} // anonymous namespace

static ExegesisTarget *getTheRISCVExegesisTarget() {
  static ExegesisRISCVTarget Target;
  return &Target;
}

void InitializeRISCVExegesisTarget() {
  ExegesisTarget::registerTarget(getTheRISCVExegesisTarget());
}

} // namespace exegesis
} // namespace llvm