
We encountered the index of operand out of bounds crash because FCVT_D_W lacks frm operand.
890 lines
32 KiB
C++
890 lines
32 KiB
C++
//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "../Target.h"
|
|
#include "../ParallelSnippetGenerator.h"
|
|
#include "../SerialSnippetGenerator.h"
|
|
#include "../SnippetGenerator.h"
|
|
|
|
#include "MCTargetDesc/RISCVBaseInfo.h"
|
|
#include "MCTargetDesc/RISCVMCTargetDesc.h"
|
|
#include "MCTargetDesc/RISCVMatInt.h"
|
|
#include "RISCV.h"
|
|
#include "RISCVExegesisPasses.h"
|
|
#include "RISCVInstrInfo.h"
|
|
#include "RISCVRegisterInfo.h"
|
|
#include "llvm/Support/Regex.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
// include computeAvailableFeatures and computeRequiredFeatures.
|
|
#define GET_AVAILABLE_OPCODE_CHECKER
|
|
#include "RISCVGenInstrInfo.inc"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
namespace exegesis {
|
|
|
|
static cl::opt<bool>
|
|
OnlyUsesVLMAXForVL("riscv-vlmax-for-vl",
|
|
cl::desc("Only enumerate VLMAX for VL operand"),
|
|
cl::init(false), cl::Hidden);
|
|
|
|
static cl::opt<bool>
|
|
EnumerateRoundingModes("riscv-enumerate-rounding-modes",
|
|
cl::desc("Enumerate different FRM and VXRM"),
|
|
cl::init(true), cl::Hidden);
|
|
|
|
static cl::opt<std::string>
|
|
FilterConfig("riscv-filter-config",
|
|
cl::desc("Show only the configs matching this regex"),
|
|
cl::init(""), cl::Hidden);
|
|
|
|
#include "RISCVGenExegesis.inc"
|
|
|
|
namespace {
|
|
|
|
template <class BaseT> class RISCVSnippetGenerator : public BaseT {
|
|
static void printRoundingMode(raw_ostream &OS, unsigned Val, bool UsesVXRM) {
|
|
if (UsesVXRM) {
|
|
assert(RISCVVXRndMode::isValidRoundingMode(Val));
|
|
OS << RISCVVXRndMode::roundingModeToString(
|
|
static_cast<RISCVVXRndMode::RoundingMode>(Val));
|
|
} else {
|
|
assert(RISCVFPRndMode::isValidRoundingMode(Val));
|
|
OS << RISCVFPRndMode::roundingModeToString(
|
|
static_cast<RISCVFPRndMode::RoundingMode>(Val));
|
|
}
|
|
}
|
|
|
|
static constexpr unsigned MinSEW = 8;
|
|
// ELEN is basically SEW_max.
|
|
unsigned ELEN = 64;
|
|
|
|
// We can't know the real min/max VLEN w/o a Function, so we're
|
|
// using the VLen from Zvl.
|
|
unsigned ZvlVLen = 32;
|
|
|
|
/// Mask for registers that are NOT standalone registers like X0 and V0
|
|
BitVector AggregateRegisters;
|
|
|
|
// Returns true when opcode is available in any of the FBs.
|
|
static bool
|
|
isOpcodeAvailableIn(unsigned Opcode,
|
|
ArrayRef<RISCV_MC::SubtargetFeatureBits> FBs) {
|
|
FeatureBitset RequiredFeatures = RISCV_MC::computeRequiredFeatures(Opcode);
|
|
for (uint8_t FB : FBs) {
|
|
if (RequiredFeatures[FB])
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool isRVVFloatingPointOp(unsigned Opcode) {
|
|
return isOpcodeAvailableIn(Opcode,
|
|
{RISCV_MC::Feature_HasVInstructionsAnyFBit});
|
|
}
|
|
|
|
// Get the element group width of each vector cryptor extension.
|
|
static unsigned getZvkEGWSize(unsigned Opcode, unsigned SEW) {
|
|
using namespace RISCV_MC;
|
|
if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkgBit,
|
|
Feature_HasStdExtZvknedBit,
|
|
Feature_HasStdExtZvksedBit}))
|
|
return 128U;
|
|
if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvkshBit}))
|
|
return 256U;
|
|
if (isOpcodeAvailableIn(Opcode, {Feature_HasStdExtZvknhaOrZvknhbBit}))
|
|
// In Zvknh[ab], when SEW=64 is used (i.e. Zvknhb), EGW is 256.
|
|
// Otherwise it's 128.
|
|
return SEW == 64 ? 256U : 128U;
|
|
|
|
llvm_unreachable("Unsupported opcode");
|
|
}
|
|
|
|
// A handy utility to multiply or divide an integer by LMUL.
|
|
template <typename T> static T multiplyLMul(T Val, RISCVVType::VLMUL VLMul) {
|
|
auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
|
|
return IsFractional ? Val / LMul : Val * LMul;
|
|
}
|
|
|
|
/// Return the denominator of the fractional (i.e. the `x` in .vfx suffix) or
|
|
/// nullopt if BaseOpcode is not a vector sext/zext.
|
|
static std::optional<unsigned> isRVVSignZeroExtend(unsigned BaseOpcode) {
|
|
switch (BaseOpcode) {
|
|
case RISCV::VSEXT_VF2:
|
|
case RISCV::VZEXT_VF2:
|
|
return 2;
|
|
case RISCV::VSEXT_VF4:
|
|
case RISCV::VZEXT_VF4:
|
|
return 4;
|
|
case RISCV::VSEXT_VF8:
|
|
case RISCV::VZEXT_VF8:
|
|
return 8;
|
|
default:
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
void annotateWithVType(const CodeTemplate &CT, const Instruction &Instr,
|
|
unsigned BaseOpcode,
|
|
const BitVector &ForbiddenRegisters,
|
|
std::vector<CodeTemplate> &Result) const;
|
|
|
|
public:
|
|
RISCVSnippetGenerator(const LLVMState &State,
|
|
const SnippetGenerator::Options &Opts)
|
|
: BaseT(State, Opts),
|
|
AggregateRegisters(State.getRegInfo().getNumRegs(), /*initVal=*/true) {
|
|
// Initialize standalone registers mask.
|
|
const MCRegisterInfo &RegInfo = State.getRegInfo();
|
|
const unsigned StandaloneRegClasses[] = {
|
|
RISCV::GPRRegClassID, RISCV::FPR16RegClassID, RISCV::VRRegClassID};
|
|
|
|
for (unsigned RegClassID : StandaloneRegClasses)
|
|
for (unsigned Reg : RegInfo.getRegClass(RegClassID))
|
|
AggregateRegisters.reset(Reg);
|
|
|
|
// Initialize ELEN and VLEN.
|
|
// FIXME: We could have obtained these two constants from RISCVSubtarget
|
|
// but in order to get that from TargetMachine, we need a Function.
|
|
const MCSubtargetInfo &STI = State.getSubtargetInfo();
|
|
ELEN = STI.hasFeature(RISCV::FeatureStdExtZve64x) ? 64 : 32;
|
|
|
|
const unsigned ZvlFeatures[] = {
|
|
RISCV::FeatureStdExtZvl32b, RISCV::FeatureStdExtZvl64b,
|
|
RISCV::FeatureStdExtZvl128b, RISCV::FeatureStdExtZvl256b,
|
|
RISCV::FeatureStdExtZvl512b, RISCV::FeatureStdExtZvl1024b,
|
|
RISCV::FeatureStdExtZvl2048b, RISCV::FeatureStdExtZvl4096b,
|
|
RISCV::FeatureStdExtZvl8192b, RISCV::FeatureStdExtZvl16384b,
|
|
RISCV::FeatureStdExtZvl32768b, RISCV::FeatureStdExtZvl65536b};
|
|
for (auto [Idx, Feature] : enumerate(ZvlFeatures)) {
|
|
if (STI.hasFeature(Feature))
|
|
ZvlVLen = std::max(ZvlVLen, 1u << (Idx + 5));
|
|
}
|
|
}
|
|
|
|
Expected<std::vector<CodeTemplate>>
|
|
generateCodeTemplates(InstructionTemplate Variant,
|
|
const BitVector &ForbiddenRegisters) const override;
|
|
};
|
|
|
|
static bool isMaskedSibling(unsigned MaskedOp, unsigned UnmaskedOp) {
|
|
const auto *RVVMasked = RISCV::getMaskedPseudoInfo(MaskedOp);
|
|
return RVVMasked && RVVMasked->UnmaskedPseudo == UnmaskedOp;
|
|
}
|
|
|
|
// There are primarily two kinds of opcodes that are not eligible
|
|
// in a serial snippet:
|
|
// (1) Has a use operand that can not overlap with the def operand
|
|
// (i.e. early clobber).
|
|
// (2) The register file of the only use operand is different from
|
|
// that of the def operand. For instance, use operand is vector and
|
|
// the result is a scalar.
|
|
static bool isIneligibleOfSerialSnippets(unsigned BaseOpcode,
|
|
const Instruction &I) {
|
|
if (llvm::any_of(I.Operands,
|
|
[](const Operand &Op) { return Op.isEarlyClobber(); }))
|
|
return true;
|
|
|
|
switch (BaseOpcode) {
|
|
case RISCV::VCOMPRESS_VM:
|
|
case RISCV::VCPOP_M:
|
|
case RISCV::VCPOP_V:
|
|
// The permutation instructions listed below cannot have destination
|
|
// overlapping with the source.
|
|
case RISCV::VRGATHEREI16_VV:
|
|
case RISCV::VRGATHER_VI:
|
|
case RISCV::VRGATHER_VV:
|
|
case RISCV::VRGATHER_VX:
|
|
case RISCV::VSLIDE1UP_VX:
|
|
case RISCV::VSLIDEUP_VI:
|
|
case RISCV::VSLIDEUP_VX:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool isZvfhminZvfbfminOpcodes(unsigned BaseOpcode) {
|
|
switch (BaseOpcode) {
|
|
case RISCV::VFNCVT_F_F_W:
|
|
case RISCV::VFWCVT_F_F_V:
|
|
case RISCV::VFNCVTBF16_F_F_W:
|
|
case RISCV::VFWCVTBF16_F_F_V:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool isVectorReduction(unsigned BaseOpcode) {
|
|
switch (BaseOpcode) {
|
|
case RISCV::VREDAND_VS:
|
|
case RISCV::VREDMAXU_VS:
|
|
case RISCV::VREDMAX_VS:
|
|
case RISCV::VREDMINU_VS:
|
|
case RISCV::VREDMIN_VS:
|
|
case RISCV::VREDOR_VS:
|
|
case RISCV::VREDSUM_VS:
|
|
case RISCV::VREDXOR_VS:
|
|
case RISCV::VWREDSUMU_VS:
|
|
case RISCV::VWREDSUM_VS:
|
|
case RISCV::VFREDMAX_VS:
|
|
case RISCV::VFREDMIN_VS:
|
|
case RISCV::VFREDOSUM_VS:
|
|
case RISCV::VFREDUSUM_VS:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
template <class BaseT>
|
|
void RISCVSnippetGenerator<BaseT>::annotateWithVType(
|
|
const CodeTemplate &OrigCT, const Instruction &Instr, unsigned BaseOpcode,
|
|
const BitVector &ForbiddenRegisters,
|
|
std::vector<CodeTemplate> &Result) const {
|
|
const MCSubtargetInfo &STI = SnippetGenerator::State.getSubtargetInfo();
|
|
unsigned VPseudoOpcode = Instr.getOpcode();
|
|
|
|
bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
|
|
|
|
const MCInstrDesc &MIDesc = Instr.Description;
|
|
const uint64_t TSFlags = MIDesc.TSFlags;
|
|
|
|
RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
|
|
|
|
const size_t StartingResultSize = Result.size();
|
|
|
|
SmallPtrSet<const Operand *, 4> VTypeOperands;
|
|
std::optional<AliasingConfigurations> SelfAliasing;
|
|
// Exegesis see instructions with tied operands being inherently serial.
|
|
// But for RVV instructions, those tied operands are passthru rather
|
|
// than real read operands. So we manually put dependency between
|
|
// destination (i.e. def) and any of the non-tied/SEW/policy/AVL/RM
|
|
// operands.
|
|
auto assignSerialRVVOperands = [&, this](InstructionTemplate &IT) {
|
|
// Initialize SelfAliasing on first use.
|
|
if (!SelfAliasing.has_value()) {
|
|
BitVector ExcludeRegs = ForbiddenRegisters;
|
|
ExcludeRegs |= AggregateRegisters;
|
|
SelfAliasing = AliasingConfigurations(Instr, Instr, ExcludeRegs);
|
|
bool EmptyUses = false;
|
|
for (auto &ARO : SelfAliasing->Configurations) {
|
|
auto &Uses = ARO.Uses;
|
|
for (auto ROA = Uses.begin(); ROA != Uses.end();) {
|
|
const Operand *Op = ROA->Op;
|
|
// Exclude tied operand(s).
|
|
if (Op->isTied()) {
|
|
ROA = Uses.erase(ROA);
|
|
continue;
|
|
}
|
|
|
|
// Special handling for reduction operations: for a given reduction
|
|
// `vredop vd, vs2, vs1`, we don't want vd to be aliased with vs1
|
|
// since we're only reading `vs1[0]` and many implementations
|
|
// optimize for this case (e.g. chaining). Instead, we're forcing
|
|
// it to create alias between vd and vs2.
|
|
if (isVectorReduction(BaseOpcode) &&
|
|
// vs1's operand index is always 3.
|
|
Op->getIndex() == 3) {
|
|
ROA = Uses.erase(ROA);
|
|
continue;
|
|
}
|
|
|
|
// Exclude any special operands like SEW and VL -- we've already
|
|
// assigned values to them.
|
|
if (VTypeOperands.count(Op)) {
|
|
ROA = Uses.erase(ROA);
|
|
continue;
|
|
}
|
|
++ROA;
|
|
}
|
|
|
|
// If any of the use operand candidate lists is empty, there is
|
|
// no point to assign self aliasing registers.
|
|
if (Uses.empty()) {
|
|
EmptyUses = true;
|
|
break;
|
|
}
|
|
}
|
|
if (EmptyUses)
|
|
SelfAliasing->Configurations.clear();
|
|
}
|
|
|
|
// This is a self aliasing instruction so defs and uses are from the same
|
|
// instance, hence twice IT in the following call.
|
|
if (!SelfAliasing->empty() && !SelfAliasing->hasImplicitAliasing())
|
|
setRandomAliasing(*SelfAliasing, IT, IT);
|
|
};
|
|
|
|
// We are going to create a CodeTemplate (configuration) for each supported
|
|
// SEW, policy, and VL.
|
|
// FIXME: Account for EEW and EMUL.
|
|
SmallVector<std::optional<unsigned>, 4> Log2SEWs;
|
|
SmallVector<std::optional<unsigned>, 4> Policies;
|
|
SmallVector<std::optional<int>, 3> AVLs;
|
|
SmallVector<std::optional<unsigned>, 8> RoundingModes;
|
|
|
|
bool HasSEWOp = RISCVII::hasSEWOp(TSFlags);
|
|
bool HasPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
|
|
bool HasVLOp = RISCVII::hasVLOp(TSFlags);
|
|
bool HasRMOp = RISCVII::hasRoundModeOp(TSFlags);
|
|
bool UsesVXRM = RISCVII::usesVXRM(TSFlags);
|
|
|
|
if (HasSEWOp) {
|
|
const Operand &SEWOp = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
|
|
VTypeOperands.insert(&SEWOp);
|
|
|
|
if (SEWOp.Info->OperandType == RISCVOp::OPERAND_SEW_MASK) {
|
|
// If it's a mask-producing instruction, the SEW operand is always zero.
|
|
Log2SEWs.push_back(0);
|
|
} else {
|
|
SmallVector<unsigned, 4> SEWCandidates;
|
|
|
|
// (RVV spec 3.4.2) For fractional LMUL, the supported SEW are between
|
|
// [SEW_min, LMUL * ELEN].
|
|
unsigned SEWUpperBound =
|
|
VLMul >= RISCVVType::LMUL_F8 ? multiplyLMul(ELEN, VLMul) : ELEN;
|
|
for (unsigned SEW = MinSEW; SEW <= SEWUpperBound; SEW <<= 1) {
|
|
SEWCandidates.push_back(SEW);
|
|
|
|
// Some scheduling classes already integrate SEW; only put
|
|
// their corresponding SEW values at the SEW operands.
|
|
// NOTE: It is imperative to put this condition in the front, otherwise
|
|
// it is tricky and difficult to know if there is an integrated
|
|
// SEW after other rules are applied to filter the candidates.
|
|
const auto *RVVBase =
|
|
RISCVVInversePseudosTable::getBaseInfo(BaseOpcode, VLMul, SEW);
|
|
if (RVVBase && (RVVBase->Pseudo == VPseudoOpcode ||
|
|
isMaskedSibling(VPseudoOpcode, RVVBase->Pseudo) ||
|
|
isMaskedSibling(RVVBase->Pseudo, VPseudoOpcode))) {
|
|
// There is an integrated SEW, remove all but the SEW pushed last.
|
|
SEWCandidates.erase(SEWCandidates.begin(), SEWCandidates.end() - 1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Filter out some candidates.
|
|
for (auto SEW = SEWCandidates.begin(); SEW != SEWCandidates.end();) {
|
|
// For floating point operations, only select SEW of the supported FLEN.
|
|
if (isRVVFloatingPointOp(VPseudoOpcode)) {
|
|
bool Supported = false;
|
|
Supported |= isZvfhminZvfbfminOpcodes(BaseOpcode) && *SEW == 16;
|
|
Supported |= STI.hasFeature(RISCV::FeatureStdExtZvfh) && *SEW == 16;
|
|
Supported |= STI.hasFeature(RISCV::FeatureStdExtF) && *SEW == 32;
|
|
Supported |= STI.hasFeature(RISCV::FeatureStdExtD) && *SEW == 64;
|
|
if (!Supported) {
|
|
SEW = SEWCandidates.erase(SEW);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// The EEW for source operand in VSEXT and VZEXT is a fraction
|
|
// of the SEW, hence only SEWs that will lead to valid EEW are allowed.
|
|
if (auto Frac = isRVVSignZeroExtend(BaseOpcode))
|
|
if (*SEW / *Frac < MinSEW) {
|
|
SEW = SEWCandidates.erase(SEW);
|
|
continue;
|
|
}
|
|
|
|
// Most vector crypto 1.0 instructions only work on SEW=32.
|
|
using namespace RISCV_MC;
|
|
if (isOpcodeAvailableIn(BaseOpcode, {Feature_HasStdExtZvkgBit,
|
|
Feature_HasStdExtZvknedBit,
|
|
Feature_HasStdExtZvknhaOrZvknhbBit,
|
|
Feature_HasStdExtZvksedBit,
|
|
Feature_HasStdExtZvkshBit})) {
|
|
if (*SEW != 32)
|
|
// Zvknhb supports SEW=64 as well.
|
|
if (*SEW != 64 || !STI.hasFeature(RISCV::FeatureStdExtZvknhb) ||
|
|
!isOpcodeAvailableIn(BaseOpcode,
|
|
{Feature_HasStdExtZvknhaOrZvknhbBit})) {
|
|
SEW = SEWCandidates.erase(SEW);
|
|
continue;
|
|
}
|
|
|
|
// We're also enforcing the requirement of `LMUL * VLEN >= EGW` here,
|
|
// because some of the extensions have SEW-dependant EGW.
|
|
unsigned EGW = getZvkEGWSize(BaseOpcode, *SEW);
|
|
if (multiplyLMul(ZvlVLen, VLMul) < EGW) {
|
|
SEW = SEWCandidates.erase(SEW);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
++SEW;
|
|
}
|
|
|
|
// We're not going to produce any result with zero SEW candidate.
|
|
if (SEWCandidates.empty())
|
|
return;
|
|
|
|
for (unsigned SEW : SEWCandidates)
|
|
Log2SEWs.push_back(Log2_32(SEW));
|
|
}
|
|
} else {
|
|
Log2SEWs.push_back(std::nullopt);
|
|
}
|
|
|
|
if (HasPolicyOp) {
|
|
VTypeOperands.insert(&Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)]);
|
|
|
|
Policies = {0, RISCVVType::TAIL_AGNOSTIC, RISCVVType::MASK_AGNOSTIC,
|
|
(RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC)};
|
|
} else {
|
|
Policies.push_back(std::nullopt);
|
|
}
|
|
|
|
if (HasVLOp) {
|
|
VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc)]);
|
|
|
|
if (OnlyUsesVLMAXForVL)
|
|
AVLs.push_back(-1);
|
|
else
|
|
AVLs = {// 5-bit immediate value
|
|
1,
|
|
// VLMAX
|
|
-1,
|
|
// Non-X0 register
|
|
0};
|
|
} else {
|
|
AVLs.push_back(std::nullopt);
|
|
}
|
|
|
|
if (HasRMOp) {
|
|
VTypeOperands.insert(&Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1]);
|
|
|
|
if (UsesVXRM) {
|
|
// Use RNU as the default VXRM.
|
|
RoundingModes = {RISCVVXRndMode::RNU};
|
|
if (EnumerateRoundingModes)
|
|
RoundingModes.append(
|
|
{RISCVVXRndMode::RNE, RISCVVXRndMode::RDN, RISCVVXRndMode::ROD});
|
|
} else {
|
|
if (EnumerateRoundingModes)
|
|
RoundingModes = {RISCVFPRndMode::RNE, RISCVFPRndMode::RTZ,
|
|
RISCVFPRndMode::RDN, RISCVFPRndMode::RUP,
|
|
RISCVFPRndMode::RMM};
|
|
else
|
|
// If we're not enumerating FRM, use DYN to instruct
|
|
// RISCVInsertReadWriteCSRPass to insert nothing.
|
|
RoundingModes = {RISCVFPRndMode::DYN};
|
|
}
|
|
} else {
|
|
RoundingModes = {std::nullopt};
|
|
}
|
|
|
|
std::set<std::tuple<std::optional<unsigned>, std::optional<int>,
|
|
std::optional<unsigned>, std::optional<unsigned>>>
|
|
Combinations;
|
|
for (auto AVL : AVLs) {
|
|
for (auto Log2SEW : Log2SEWs)
|
|
for (auto Policy : Policies) {
|
|
for (auto RM : RoundingModes)
|
|
Combinations.insert(std::make_tuple(RM, AVL, Log2SEW, Policy));
|
|
}
|
|
}
|
|
|
|
std::string ConfigStr;
|
|
SmallVector<std::pair<const Operand *, MCOperand>, 4> ValueAssignments;
|
|
for (const auto &[RM, AVL, Log2SEW, Policy] : Combinations) {
|
|
InstructionTemplate IT(&Instr);
|
|
|
|
ListSeparator LS;
|
|
ConfigStr = "vtype = {";
|
|
raw_string_ostream SS(ConfigStr);
|
|
|
|
ValueAssignments.clear();
|
|
|
|
if (RM) {
|
|
const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc) - 1];
|
|
ValueAssignments.push_back({&Op, MCOperand::createImm(*RM)});
|
|
printRoundingMode(SS << LS << (UsesVXRM ? "VXRM" : "FRM") << ": ", *RM,
|
|
UsesVXRM);
|
|
}
|
|
|
|
if (AVL) {
|
|
MCOperand OpVal;
|
|
if (*AVL < 0) {
|
|
// VLMAX
|
|
OpVal = MCOperand::createImm(-1);
|
|
SS << LS << "AVL: VLMAX";
|
|
} else if (*AVL == 0) {
|
|
// A register holding AVL.
|
|
// TODO: Generate a random register.
|
|
OpVal = MCOperand::createReg(RISCV::X5);
|
|
OpVal.print(SS << LS << "AVL: ");
|
|
} else {
|
|
// A 5-bit immediate.
|
|
// The actual value assignment is deferred to
|
|
// RISCVExegesisTarget::randomizeTargetMCOperand.
|
|
SS << LS << "AVL: simm5";
|
|
}
|
|
if (OpVal.isValid()) {
|
|
const Operand &Op = Instr.Operands[RISCVII::getVLOpNum(MIDesc)];
|
|
ValueAssignments.push_back({&Op, OpVal});
|
|
}
|
|
}
|
|
|
|
if (Log2SEW) {
|
|
const Operand &Op = Instr.Operands[RISCVII::getSEWOpNum(MIDesc)];
|
|
ValueAssignments.push_back({&Op, MCOperand::createImm(*Log2SEW)});
|
|
SS << LS << "SEW: e" << (*Log2SEW ? 1 << *Log2SEW : 8);
|
|
}
|
|
|
|
if (Policy) {
|
|
const Operand &Op = Instr.Operands[RISCVII::getVecPolicyOpNum(MIDesc)];
|
|
ValueAssignments.push_back({&Op, MCOperand::createImm(*Policy)});
|
|
SS << LS
|
|
<< "Policy: " << (*Policy & RISCVVType::TAIL_AGNOSTIC ? "ta" : "tu")
|
|
<< "/" << (*Policy & RISCVVType::MASK_AGNOSTIC ? "ma" : "mu");
|
|
}
|
|
|
|
SS << "}";
|
|
|
|
// Filter out some configurations, if needed.
|
|
if (!FilterConfig.empty()) {
|
|
if (!Regex(FilterConfig).match(ConfigStr))
|
|
continue;
|
|
}
|
|
|
|
CodeTemplate CT = OrigCT.clone();
|
|
CT.Config = std::move(ConfigStr);
|
|
for (InstructionTemplate &IT : CT.Instructions) {
|
|
if (IsSerial) {
|
|
// Reset this template's value assignments and do it
|
|
// ourselves.
|
|
IT = InstructionTemplate(&Instr);
|
|
assignSerialRVVOperands(IT);
|
|
}
|
|
|
|
for (const auto &[Op, OpVal] : ValueAssignments)
|
|
IT.getValueFor(*Op) = OpVal;
|
|
}
|
|
Result.push_back(std::move(CT));
|
|
if (Result.size() - StartingResultSize >=
|
|
SnippetGenerator::Opts.MaxConfigsPerOpcode)
|
|
return;
|
|
}
|
|
}
|
|
|
|
template <class BaseT>
|
|
Expected<std::vector<CodeTemplate>>
|
|
RISCVSnippetGenerator<BaseT>::generateCodeTemplates(
|
|
InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const {
|
|
const Instruction &Instr = Variant.getInstr();
|
|
|
|
bool IsSerial = std::is_same_v<BaseT, SerialSnippetGenerator>;
|
|
|
|
unsigned BaseOpcode = RISCV::getRVVMCOpcode(Instr.getOpcode());
|
|
|
|
// Bail out ineligible opcodes before generating base code templates since
|
|
// the latter is quite expensive.
|
|
if (IsSerial && BaseOpcode && isIneligibleOfSerialSnippets(BaseOpcode, Instr))
|
|
return std::vector<CodeTemplate>{};
|
|
|
|
auto BaseCodeTemplates =
|
|
BaseT::generateCodeTemplates(Variant, ForbiddenRegisters);
|
|
if (!BaseCodeTemplates)
|
|
return BaseCodeTemplates.takeError();
|
|
|
|
if (!BaseOpcode)
|
|
return BaseCodeTemplates;
|
|
|
|
// Specialize for RVV pseudo.
|
|
std::vector<CodeTemplate> ExpandedTemplates;
|
|
for (const auto &BaseCT : *BaseCodeTemplates)
|
|
annotateWithVType(BaseCT, Instr, BaseOpcode, ForbiddenRegisters,
|
|
ExpandedTemplates);
|
|
|
|
return ExpandedTemplates;
|
|
}
|
|
|
|
// Stores constant value to a general-purpose (integer) register.
|
|
static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI,
|
|
MCRegister Reg, const APInt &Value) {
|
|
SmallVector<MCInst, 8> MCInstSeq;
|
|
MCRegister DestReg = Reg;
|
|
|
|
RISCVMatInt::generateMCInstSeq(Value.getSExtValue(), STI, DestReg, MCInstSeq);
|
|
|
|
std::vector<MCInst> MatIntInstrs(MCInstSeq.begin(), MCInstSeq.end());
|
|
return MatIntInstrs;
|
|
}
|
|
|
|
const MCPhysReg ScratchIntReg = RISCV::X30; // t5
|
|
|
|
// Stores constant bits to a floating-point register.
|
|
static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI,
|
|
MCRegister Reg, const APInt &Bits,
|
|
unsigned FmvOpcode) {
|
|
std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
|
|
Instrs.push_back(MCInstBuilder(FmvOpcode).addReg(Reg).addReg(ScratchIntReg));
|
|
return Instrs;
|
|
}
|
|
|
|
// main idea is:
|
|
// we support APInt only if (represented as double) it has zero fractional
|
|
// part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5
|
|
// and then do FCVT this is only reliable thing in 32-bit mode, otherwise we
|
|
// need to use __floatsidf
|
|
static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
|
|
MCRegister Reg,
|
|
const APInt &Bits) {
|
|
double D = Bits.bitsToDouble();
|
|
double IPart;
|
|
double FPart = std::modf(D, &IPart);
|
|
|
|
if (std::abs(FPart) > std::numeric_limits<double>::epsilon()) {
|
|
errs() << "loadFP64RegBits32 is not implemented for doubles like " << D
|
|
<< ", please remove fractional part\n";
|
|
return {};
|
|
}
|
|
|
|
std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
|
|
Instrs.push_back(MCInstBuilder(RISCV::FCVT_D_W)
|
|
.addReg(Reg)
|
|
.addReg(ScratchIntReg)
|
|
.addImm(RISCVFPRndMode::RNE));
|
|
return Instrs;
|
|
}
|
|
|
|
class ExegesisRISCVTarget : public ExegesisTarget {
|
|
// NOTE: Alternatively, we can use BitVector here, but the number of RVV MC
|
|
// opcodes is just a small portion of the entire opcode space, so I thought it
|
|
// would be a waste of space to use BitVector.
|
|
mutable SmallSet<unsigned, 16> RVVMCOpcodesWithPseudos;
|
|
|
|
public:
|
|
ExegesisRISCVTarget();
|
|
|
|
bool matchesArch(Triple::ArchType Arch) const override;
|
|
|
|
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg,
|
|
const APInt &Value) const override;
|
|
|
|
const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
|
|
unsigned Opcode) const override {
|
|
// We don't want to support RVV instructions that depend on VTYPE, because
|
|
// those instructions by themselves don't carry any additional information
|
|
// for us to setup the proper VTYPE environment via VSETVL instructions.
|
|
// FIXME: Ideally, we should use RISCVVInversePseudosTable, but it requires
|
|
// LMUL and SEW and I don't think enumerating those combinations is any
|
|
// better than the ugly trick here that memorizes the corresponding MC
|
|
// opcodes of the RVV pseudo we have processed previously. This works most
|
|
// of the time because RVV pseudo opcodes are placed before any other RVV
|
|
// opcodes. Of course this doesn't work if we're asked to benchmark only a
|
|
// certain subset of opcodes.
|
|
if (RVVMCOpcodesWithPseudos.count(Opcode))
|
|
return "The MC opcode of RVV instructions are ignored";
|
|
|
|
// We want to support all RVV pseudos.
|
|
if (unsigned MCOpcode = RISCV::getRVVMCOpcode(Opcode)) {
|
|
RVVMCOpcodesWithPseudos.insert(MCOpcode);
|
|
return nullptr;
|
|
}
|
|
|
|
return ExegesisTarget::getIgnoredOpcodeReasonOrNull(State, Opcode);
|
|
}
|
|
|
|
MCRegister getDefaultLoopCounterRegister(const Triple &) const override;
|
|
|
|
void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
|
|
MachineBasicBlock &TargetMBB,
|
|
const MCInstrInfo &MII,
|
|
MCRegister LoopRegister) const override;
|
|
|
|
MCRegister getScratchMemoryRegister(const Triple &TT) const override;
|
|
|
|
void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg,
|
|
unsigned Offset) const override;
|
|
|
|
ArrayRef<MCPhysReg> getUnavailableRegisters() const override;
|
|
|
|
bool allowAsBackToBack(const Instruction &Instr) const override {
|
|
return !Instr.Description.isPseudo();
|
|
}
|
|
|
|
Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
|
|
MCOperand &AssignedValue,
|
|
const BitVector &ForbiddenRegs) const override;
|
|
|
|
std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator(
|
|
const LLVMState &State,
|
|
const SnippetGenerator::Options &Opts) const override {
|
|
return std::make_unique<RISCVSnippetGenerator<SerialSnippetGenerator>>(
|
|
State, Opts);
|
|
}
|
|
|
|
std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator(
|
|
const LLVMState &State,
|
|
const SnippetGenerator::Options &Opts) const override {
|
|
return std::make_unique<RISCVSnippetGenerator<ParallelSnippetGenerator>>(
|
|
State, Opts);
|
|
}
|
|
|
|
std::vector<InstructionTemplate>
|
|
generateInstructionVariants(const Instruction &Instr,
|
|
unsigned MaxConfigsPerOpcode) const override;
|
|
|
|
void addTargetSpecificPasses(PassManagerBase &PM) const override {
|
|
// Turn AVL operand of physical registers into virtual registers.
|
|
PM.add(exegesis::createRISCVPreprocessingPass());
|
|
PM.add(createRISCVInsertVSETVLIPass());
|
|
// Setting up the correct FRM.
|
|
PM.add(createRISCVInsertReadWriteCSRPass());
|
|
PM.add(createRISCVInsertWriteVXRMPass());
|
|
// This will assign physical register to the result of VSETVLI instructions
|
|
// that produce VLMAX.
|
|
PM.add(exegesis::createRISCVPostprocessingPass());
|
|
// PseudoRET will be expanded by RISCVAsmPrinter; we have to expand
|
|
// PseudoMovImm with RISCVPostRAExpandPseudoPass though.
|
|
PM.add(createRISCVPostRAExpandPseudoPass());
|
|
}
|
|
};
|
|
|
|
ExegesisRISCVTarget::ExegesisRISCVTarget()
|
|
: ExegesisTarget(RISCVCpuPfmCounters, RISCV_MC::isOpcodeAvailable) {}
|
|
|
|
bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const {
|
|
return Arch == Triple::riscv32 || Arch == Triple::riscv64;
|
|
}
|
|
|
|
std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
|
|
MCRegister Reg,
|
|
const APInt &Value) const {
|
|
if (RISCV::GPRRegClass.contains(Reg))
|
|
return loadIntReg(STI, Reg, Value);
|
|
if (RISCV::FPR16RegClass.contains(Reg))
|
|
return loadFPRegBits(STI, Reg, Value, RISCV::FMV_H_X);
|
|
if (RISCV::FPR32RegClass.contains(Reg))
|
|
return loadFPRegBits(STI, Reg, Value, RISCV::FMV_W_X);
|
|
if (RISCV::FPR64RegClass.contains(Reg)) {
|
|
if (STI.hasFeature(RISCV::Feature64Bit))
|
|
return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
|
|
return loadFP64RegBits32(STI, Reg, Value);
|
|
}
|
|
// TODO: Emit proper code to initialize other kinds of registers.
|
|
return {};
|
|
}
|
|
|
|
const MCPhysReg DefaultLoopCounterReg = RISCV::X31; // t6
|
|
const MCPhysReg ScratchMemoryReg = RISCV::X10; // a0
|
|
|
|
MCRegister
|
|
ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const {
|
|
return DefaultLoopCounterReg;
|
|
}
|
|
|
|
void ExegesisRISCVTarget::decrementLoopCounterAndJump(
|
|
MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
|
|
const MCInstrInfo &MII, MCRegister LoopRegister) const {
|
|
BuildMI(&MBB, DebugLoc(), MII.get(RISCV::ADDI))
|
|
.addDef(LoopRegister)
|
|
.addUse(LoopRegister)
|
|
.addImm(-1);
|
|
BuildMI(&MBB, DebugLoc(), MII.get(RISCV::BNE))
|
|
.addUse(LoopRegister)
|
|
.addUse(RISCV::X0)
|
|
.addMBB(&TargetMBB);
|
|
}
|
|
|
|
MCRegister
|
|
ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const {
|
|
return ScratchMemoryReg; // a0
|
|
}
|
|
|
|
void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT,
|
|
MCRegister Reg,
|
|
unsigned Offset) const {
|
|
// TODO: for now we ignore Offset because have no way
|
|
// to detect it in instruction.
|
|
auto &I = IT.getInstr();
|
|
|
|
auto MemOpIt =
|
|
find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
|
|
assert(MemOpIt != I.Operands.end() &&
|
|
"Instruction must have memory operands");
|
|
|
|
const Operand &MemOp = *MemOpIt;
|
|
|
|
assert(MemOp.isReg() && "Memory operand expected to be register");
|
|
|
|
IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
|
|
}
|
|
|
|
const MCPhysReg UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg,
|
|
ScratchIntReg, ScratchMemoryReg};
|
|
|
|
ArrayRef<MCPhysReg> ExegesisRISCVTarget::getUnavailableRegisters() const {
|
|
return UnavailableRegisters;
|
|
}
|
|
|
|
Error ExegesisRISCVTarget::randomizeTargetMCOperand(
|
|
const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
|
|
const BitVector &ForbiddenRegs) const {
|
|
uint8_t OperandType =
|
|
Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType;
|
|
|
|
switch (OperandType) {
|
|
case RISCVOp::OPERAND_FRMARG:
|
|
AssignedValue = MCOperand::createImm(RISCVFPRndMode::DYN);
|
|
break;
|
|
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
|
|
AssignedValue = MCOperand::createImm(0b1 << 4);
|
|
break;
|
|
case RISCVOp::OPERAND_SIMM6_NONZERO:
|
|
case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
|
|
AssignedValue = MCOperand::createImm(1);
|
|
break;
|
|
case RISCVOp::OPERAND_SIMM5:
|
|
// 5-bit signed immediate value.
|
|
AssignedValue = MCOperand::createImm(randomIndex(31) - 16);
|
|
break;
|
|
case RISCVOp::OPERAND_AVL:
|
|
case RISCVOp::OPERAND_UIMM5:
|
|
// 5-bit unsigned immediate value.
|
|
AssignedValue = MCOperand::createImm(randomIndex(31));
|
|
break;
|
|
default:
|
|
if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
|
|
OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
|
|
AssignedValue = MCOperand::createImm(0);
|
|
}
|
|
return Error::success();
|
|
}
|
|
|
|
std::vector<InstructionTemplate>
|
|
ExegesisRISCVTarget::generateInstructionVariants(
|
|
const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const {
|
|
InstructionTemplate IT{&Instr};
|
|
for (const Operand &Op : Instr.Operands)
|
|
if (Op.isMemory()) {
|
|
IT.getValueFor(Op) = MCOperand::createReg(ScratchMemoryReg);
|
|
}
|
|
return {IT};
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
static ExegesisTarget *getTheRISCVExegesisTarget() {
|
|
static ExegesisRISCVTarget Target;
|
|
return &Target;
|
|
}
|
|
|
|
void InitializeRISCVExegesisTarget() {
|
|
ExegesisTarget::registerTarget(getTheRISCVExegesisTarget());
|
|
}
|
|
|
|
} // namespace exegesis
|
|
} // namespace llvm
|