llvm-project/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp

866 lines
27 KiB
C++

//===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// This pass does some optimizations for *W instructions at the MI level.
//
// First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
// because the sign extended bits aren't consumed or because the input was
// already sign extended by an earlier instruction.
//
// Then:
// 1. Unless explicit disabled or the target prefers instructions with W suffix,
// it removes the -w suffix from opw instructions whenever all users are
// dependent only on the lower word of the result of the instruction.
// The cases handled are:
// * addi.w because it helps reduce test differences between LA32 and LA64
// w/o being a pessimization.
//
// 2. Or if explicit enabled or the target prefers instructions with W suffix,
// it adds the W suffix to the instruction whenever all users are dependent
// only on the lower word of the result of the instruction.
// The cases handled are:
// * add.d/addi.d/sub.d/mul.d.
// * slli.d with imm < 32.
// * ld.d/ld.wu.
//===---------------------------------------------------------------------===//
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
#include "LoongArchSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
using namespace llvm;
#define DEBUG_TYPE "loongarch-opt-w-instrs"
#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
STATISTIC(NumTransformedToWInstrs,
"Number of instructions transformed to W-ops");
static cl::opt<bool>
DisableSExtWRemoval("loongarch-disable-sextw-removal",
cl::desc("Disable removal of sign-extend insn"),
cl::init(false), cl::Hidden);
static cl::opt<bool>
DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
cl::desc("Disable convert to D suffix"),
cl::init(false), cl::Hidden);
namespace {
class LoongArchOptWInstrs : public MachineFunctionPass {
public:
static char ID;
LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI);
bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI);
bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
};
} // end anonymous namespace
char LoongArchOptWInstrs::ID = 0;
INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,
false, false)
FunctionPass *llvm::createLoongArchOptWInstrsPass() {
return new LoongArchOptWInstrs();
}
// Checks if all users only demand the lower \p OrigBits of the original
// instruction's result.
// TODO: handle multiple interdependent transformations
static bool hasAllNBitUsers(const MachineInstr &OrigMI,
const LoongArchSubtarget &ST,
const MachineRegisterInfo &MRI, unsigned OrigBits) {
SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
while (!Worklist.empty()) {
auto P = Worklist.pop_back_val();
const MachineInstr *MI = P.first;
unsigned Bits = P.second;
if (!Visited.insert(P).second)
continue;
// Only handle instructions with one def.
if (MI->getNumExplicitDefs() != 1)
return false;
Register DestReg = MI->getOperand(0).getReg();
if (!DestReg.isVirtual())
return false;
for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
const MachineInstr *UserMI = UserOp.getParent();
unsigned OpIdx = UserOp.getOperandNo();
switch (UserMI->getOpcode()) {
default:
return false;
case LoongArch::ADD_W:
case LoongArch::ADDI_W:
case LoongArch::SUB_W:
case LoongArch::ALSL_W:
case LoongArch::ALSL_WU:
case LoongArch::MUL_W:
case LoongArch::MULH_W:
case LoongArch::MULH_WU:
case LoongArch::MULW_D_W:
case LoongArch::MULW_D_WU:
case LoongArch::SLL_W:
case LoongArch::SLLI_W:
case LoongArch::SRL_W:
case LoongArch::SRLI_W:
case LoongArch::SRA_W:
case LoongArch::SRAI_W:
case LoongArch::ROTR_W:
case LoongArch::ROTRI_W:
case LoongArch::CLO_W:
case LoongArch::CLZ_W:
case LoongArch::CTO_W:
case LoongArch::CTZ_W:
case LoongArch::BYTEPICK_W:
case LoongArch::REVB_2H:
case LoongArch::BITREV_4B:
case LoongArch::BITREV_W:
case LoongArch::BSTRINS_W:
case LoongArch::BSTRPICK_W:
case LoongArch::CRC_W_W_W:
case LoongArch::CRCC_W_W_W:
case LoongArch::MOVGR2FCSR:
case LoongArch::MOVGR2FRH_W:
case LoongArch::MOVGR2FR_W_64:
case LoongArch::VINSGR2VR_W:
case LoongArch::XVINSGR2VR_W:
case LoongArch::VREPLGR2VR_W:
case LoongArch::XVREPLGR2VR_W:
if (Bits >= 32)
break;
return false;
// {DIV,MOD}.W{U} consumes the upper 32 bits if the div32
// feature is not enabled.
case LoongArch::DIV_W:
case LoongArch::DIV_WU:
case LoongArch::MOD_W:
case LoongArch::MOD_WU:
if (Bits >= 32 && ST.hasDiv32())
break;
return false;
case LoongArch::MOVGR2CF:
case LoongArch::VREPLVE_D:
case LoongArch::XVREPLVE_D:
if (Bits >= 1)
break;
return false;
case LoongArch::VREPLVE_W:
case LoongArch::XVREPLVE_W:
if (Bits >= 2)
break;
return false;
case LoongArch::VREPLVE_H:
case LoongArch::XVREPLVE_H:
if (Bits >= 3)
break;
return false;
case LoongArch::VREPLVE_B:
case LoongArch::XVREPLVE_B:
if (Bits >= 4)
break;
return false;
case LoongArch::EXT_W_B:
case LoongArch::VINSGR2VR_B:
case LoongArch::VREPLGR2VR_B:
case LoongArch::XVREPLGR2VR_B:
if (Bits >= 8)
break;
return false;
case LoongArch::EXT_W_H:
case LoongArch::VINSGR2VR_H:
case LoongArch::VREPLGR2VR_H:
case LoongArch::XVREPLGR2VR_H:
if (Bits >= 16)
break;
return false;
case LoongArch::SRLI_D: {
// If we are shifting right by less than Bits, and users don't demand
// any bits that were shifted into [Bits-1:0], then we can consider this
// as an N-Bit user.
unsigned ShAmt = UserMI->getOperand(2).getImm();
if (Bits > ShAmt) {
Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
break;
}
return false;
}
// these overwrite higher input bits, otherwise the lower word of output
// depends only on the lower word of input. So check their uses read W.
case LoongArch::SLLI_D:
if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))
break;
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
case LoongArch::ANDI: {
uint64_t Imm = UserMI->getOperand(2).getImm();
if (Bits >= (unsigned)llvm::bit_width(Imm))
break;
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
}
case LoongArch::ORI: {
uint64_t Imm = UserMI->getOperand(2).getImm();
if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
break;
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
}
case LoongArch::SLL_D:
// Operand 2 is the shift amount which uses log2(grlen) bits.
if (OpIdx == 2) {
if (Bits >= Log2_32(ST.getGRLen()))
break;
return false;
}
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
case LoongArch::SRA_D:
case LoongArch::SRL_D:
case LoongArch::ROTR_D:
// Operand 2 is the shift amount which uses 6 bits.
if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))
break;
return false;
case LoongArch::ST_B:
case LoongArch::STX_B:
case LoongArch::STGT_B:
case LoongArch::STLE_B:
case LoongArch::IOCSRWR_B:
// The first argument is the value to store.
if (OpIdx == 0 && Bits >= 8)
break;
return false;
case LoongArch::ST_H:
case LoongArch::STX_H:
case LoongArch::STGT_H:
case LoongArch::STLE_H:
case LoongArch::IOCSRWR_H:
// The first argument is the value to store.
if (OpIdx == 0 && Bits >= 16)
break;
return false;
case LoongArch::ST_W:
case LoongArch::STX_W:
case LoongArch::SCREL_W:
case LoongArch::STPTR_W:
case LoongArch::STGT_W:
case LoongArch::STLE_W:
case LoongArch::IOCSRWR_W:
// The first argument is the value to store.
if (OpIdx == 0 && Bits >= 32)
break;
return false;
case LoongArch::CRC_W_B_W:
case LoongArch::CRCC_W_B_W:
if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
break;
return false;
case LoongArch::CRC_W_H_W:
case LoongArch::CRCC_W_H_W:
if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
break;
return false;
case LoongArch::CRC_W_D_W:
case LoongArch::CRCC_W_D_W:
if (OpIdx == 2 && Bits >= 32)
break;
return false;
// For these, lower word of output in these operations, depends only on
// the lower word of input. So, we check all uses only read lower word.
case LoongArch::COPY:
case LoongArch::PHI:
case LoongArch::ADD_D:
case LoongArch::ADDI_D:
case LoongArch::SUB_D:
case LoongArch::MUL_D:
case LoongArch::AND:
case LoongArch::OR:
case LoongArch::NOR:
case LoongArch::XOR:
case LoongArch::XORI:
case LoongArch::ANDN:
case LoongArch::ORN:
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
case LoongArch::MASKNEZ:
case LoongArch::MASKEQZ:
if (OpIdx != 1)
return false;
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
}
}
}
return true;
}
static bool hasAllWUsers(const MachineInstr &OrigMI,
const LoongArchSubtarget &ST,
const MachineRegisterInfo &MRI) {
return hasAllNBitUsers(OrigMI, ST, MRI, 32);
}
// This function returns true if the machine instruction always outputs a value
// where bits 63:32 match bit 31.
static bool isSignExtendingOpW(const MachineInstr &MI,
const MachineRegisterInfo &MRI, unsigned OpNo) {
switch (MI.getOpcode()) {
// Normal cases
case LoongArch::ADD_W:
case LoongArch::SUB_W:
case LoongArch::ADDI_W:
case LoongArch::ALSL_W:
case LoongArch::LU12I_W:
case LoongArch::SLT:
case LoongArch::SLTU:
case LoongArch::SLTI:
case LoongArch::SLTUI:
case LoongArch::ANDI:
case LoongArch::MUL_W:
case LoongArch::MULH_W:
case LoongArch::MULH_WU:
case LoongArch::DIV_W:
case LoongArch::MOD_W:
case LoongArch::DIV_WU:
case LoongArch::MOD_WU:
case LoongArch::SLL_W:
case LoongArch::SRL_W:
case LoongArch::SRA_W:
case LoongArch::ROTR_W:
case LoongArch::SLLI_W:
case LoongArch::SRLI_W:
case LoongArch::SRAI_W:
case LoongArch::ROTRI_W:
case LoongArch::EXT_W_B:
case LoongArch::EXT_W_H:
case LoongArch::CLO_W:
case LoongArch::CLZ_W:
case LoongArch::CTO_W:
case LoongArch::CTZ_W:
case LoongArch::BYTEPICK_W:
case LoongArch::REVB_2H:
case LoongArch::BITREV_4B:
case LoongArch::BITREV_W:
case LoongArch::BSTRINS_W:
case LoongArch::BSTRPICK_W:
case LoongArch::LD_B:
case LoongArch::LD_H:
case LoongArch::LD_W:
case LoongArch::LD_BU:
case LoongArch::LD_HU:
case LoongArch::LL_W:
case LoongArch::LLACQ_W:
case LoongArch::RDTIMEL_W:
case LoongArch::RDTIMEH_W:
case LoongArch::CPUCFG:
case LoongArch::LDX_B:
case LoongArch::LDX_H:
case LoongArch::LDX_W:
case LoongArch::LDX_BU:
case LoongArch::LDX_HU:
case LoongArch::LDPTR_W:
case LoongArch::LDGT_B:
case LoongArch::LDGT_H:
case LoongArch::LDGT_W:
case LoongArch::LDLE_B:
case LoongArch::LDLE_H:
case LoongArch::LDLE_W:
case LoongArch::AMSWAP_B:
case LoongArch::AMSWAP_H:
case LoongArch::AMSWAP_W:
case LoongArch::AMADD_B:
case LoongArch::AMADD_H:
case LoongArch::AMADD_W:
case LoongArch::AMAND_W:
case LoongArch::AMOR_W:
case LoongArch::AMXOR_W:
case LoongArch::AMMAX_W:
case LoongArch::AMMIN_W:
case LoongArch::AMMAX_WU:
case LoongArch::AMMIN_WU:
case LoongArch::AMSWAP__DB_B:
case LoongArch::AMSWAP__DB_H:
case LoongArch::AMSWAP__DB_W:
case LoongArch::AMADD__DB_B:
case LoongArch::AMADD__DB_H:
case LoongArch::AMADD__DB_W:
case LoongArch::AMAND__DB_W:
case LoongArch::AMOR__DB_W:
case LoongArch::AMXOR__DB_W:
case LoongArch::AMMAX__DB_W:
case LoongArch::AMMIN__DB_W:
case LoongArch::AMMAX__DB_WU:
case LoongArch::AMMIN__DB_WU:
case LoongArch::AMCAS_B:
case LoongArch::AMCAS_H:
case LoongArch::AMCAS_W:
case LoongArch::AMCAS__DB_B:
case LoongArch::AMCAS__DB_H:
case LoongArch::AMCAS__DB_W:
case LoongArch::CRC_W_B_W:
case LoongArch::CRC_W_H_W:
case LoongArch::CRC_W_W_W:
case LoongArch::CRC_W_D_W:
case LoongArch::CRCC_W_B_W:
case LoongArch::CRCC_W_H_W:
case LoongArch::CRCC_W_W_W:
case LoongArch::CRCC_W_D_W:
case LoongArch::IOCSRRD_B:
case LoongArch::IOCSRRD_H:
case LoongArch::IOCSRRD_W:
case LoongArch::MOVFR2GR_S:
case LoongArch::MOVFCSR2GR:
case LoongArch::MOVCF2GR:
case LoongArch::MOVFRH2GR_S:
case LoongArch::MOVFR2GR_S_64:
case LoongArch::VPICKVE2GR_W:
case LoongArch::XVPICKVE2GR_W:
return true;
// Special cases that require checking operands.
// shifting right sufficiently makes the value 32-bit sign-extended
case LoongArch::SRAI_D:
return MI.getOperand(2).getImm() >= 32;
case LoongArch::SRLI_D:
return MI.getOperand(2).getImm() > 32;
// The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
case LoongArch::ADDI_D:
case LoongArch::ORI:
return MI.getOperand(1).isReg() &&
MI.getOperand(1).getReg() == LoongArch::R0;
// A bits extract is sign extended if the msb is less than 31.
case LoongArch::BSTRPICK_D:
return MI.getOperand(2).getImm() < 31;
// Copying from R0 produces zero.
case LoongArch::COPY:
return MI.getOperand(1).getReg() == LoongArch::R0;
// Ignore the scratch register destination.
case LoongArch::PseudoMaskedAtomicSwap32:
case LoongArch::PseudoAtomicSwap32:
case LoongArch::PseudoMaskedAtomicLoadAdd32:
case LoongArch::PseudoMaskedAtomicLoadSub32:
case LoongArch::PseudoAtomicLoadNand32:
case LoongArch::PseudoMaskedAtomicLoadNand32:
case LoongArch::PseudoAtomicLoadAdd32:
case LoongArch::PseudoAtomicLoadSub32:
case LoongArch::PseudoAtomicLoadAnd32:
case LoongArch::PseudoAtomicLoadOr32:
case LoongArch::PseudoAtomicLoadXor32:
case LoongArch::PseudoMaskedAtomicLoadUMax32:
case LoongArch::PseudoMaskedAtomicLoadUMin32:
case LoongArch::PseudoCmpXchg32:
case LoongArch::PseudoMaskedCmpXchg32:
case LoongArch::PseudoMaskedAtomicLoadMax32:
case LoongArch::PseudoMaskedAtomicLoadMin32:
return OpNo == 0;
}
return false;
}
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
const MachineRegisterInfo &MRI,
SmallPtrSetImpl<MachineInstr *> &FixableDef) {
SmallSet<Register, 4> Visited;
SmallVector<Register, 4> Worklist;
auto AddRegToWorkList = [&](Register SrcReg) {
if (!SrcReg.isVirtual())
return false;
Worklist.push_back(SrcReg);
return true;
};
if (!AddRegToWorkList(SrcReg))
return false;
while (!Worklist.empty()) {
Register Reg = Worklist.pop_back_val();
// If we already visited this register, we don't need to check it again.
if (!Visited.insert(Reg).second)
continue;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
continue;
int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
assert(OpNo != -1 && "Couldn't find register");
// If this is a sign extending operation we don't need to look any further.
if (isSignExtendingOpW(*MI, MRI, OpNo))
continue;
// Is this an instruction that propagates sign extend?
switch (MI->getOpcode()) {
default:
// Unknown opcode, give up.
return false;
case LoongArch::COPY: {
const MachineFunction *MF = MI->getMF();
const LoongArchMachineFunctionInfo *LAFI =
MF->getInfo<LoongArchMachineFunctionInfo>();
// If this is the entry block and the register is livein, see if we know
// it is sign extended.
if (MI->getParent() == &MF->front()) {
Register VReg = MI->getOperand(0).getReg();
if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))
continue;
}
Register CopySrcReg = MI->getOperand(1).getReg();
if (CopySrcReg == LoongArch::R4) {
// For a method return value, we check the ZExt/SExt flags in attribute.
// We assume the following code sequence for method call.
// PseudoCALL @bar, ...
// ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
// %0:gpr = COPY $r4
//
// We use the PseudoCall to look up the IR function being called to find
// its return attributes.
const MachineBasicBlock *MBB = MI->getParent();
auto II = MI->getIterator();
if (II == MBB->instr_begin() ||
(--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
return false;
const MachineInstr &CallMI = *(--II);
if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
return false;
auto *CalleeFn =
dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
if (!CalleeFn)
return false;
auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
if (!IntTy)
return false;
const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
unsigned BitWidth = IntTy->getBitWidth();
if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
(BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
continue;
}
if (!AddRegToWorkList(CopySrcReg))
return false;
break;
}
// For these, we just need to check if the 1st operand is sign extended.
case LoongArch::MOD_D:
case LoongArch::ANDI:
case LoongArch::ORI:
case LoongArch::XORI:
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
// Logical operations use a sign extended 12-bit immediate.
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
return false;
break;
case LoongArch::MOD_DU:
case LoongArch::AND:
case LoongArch::OR:
case LoongArch::XOR:
case LoongArch::ANDN:
case LoongArch::ORN:
case LoongArch::PHI: {
// If all incoming values are sign-extended, the output of AND, OR, XOR,
// or PHI is also sign-extended.
// The input registers for PHI are operand 1, 3, ...
// The input registers for others are operand 1 and 2.
unsigned B = 1, E = 3, D = 1;
switch (MI->getOpcode()) {
case LoongArch::PHI:
E = MI->getNumOperands();
D = 2;
break;
}
for (unsigned I = B; I != E; I += D) {
if (!MI->getOperand(I).isReg())
return false;
if (!AddRegToWorkList(MI->getOperand(I).getReg()))
return false;
}
break;
}
case LoongArch::MASKEQZ:
case LoongArch::MASKNEZ:
// Instructions return zero or operand 1. Result is sign extended if
// operand 1 is sign extended.
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
return false;
break;
// With these opcode, we can "fix" them with the W-version
// if we know all users of the result only rely on bits 31:0
case LoongArch::SLLI_D:
// SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
if (MI->getOperand(2).getImm() >= 32)
return false;
[[fallthrough]];
case LoongArch::ADDI_D:
case LoongArch::ADD_D:
case LoongArch::LD_D:
case LoongArch::LD_WU:
case LoongArch::MUL_D:
case LoongArch::SUB_D:
if (hasAllWUsers(*MI, ST, MRI)) {
FixableDef.insert(MI);
break;
}
return false;
// If all incoming values are sign-extended and all users only use
// the lower 32 bits, then convert them to W versions.
case LoongArch::DIV_D: {
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
return false;
if (!AddRegToWorkList(MI->getOperand(2).getReg()))
return false;
if (hasAllWUsers(*MI, ST, MRI)) {
FixableDef.insert(MI);
break;
}
return false;
}
}
}
// If we get here, then every node we visited produces a sign extended value
// or propagated sign extended values. So the result must be sign extended.
return true;
}
static unsigned getWOp(unsigned Opcode) {
switch (Opcode) {
case LoongArch::ADDI_D:
return LoongArch::ADDI_W;
case LoongArch::ADD_D:
return LoongArch::ADD_W;
case LoongArch::DIV_D:
return LoongArch::DIV_W;
case LoongArch::LD_D:
case LoongArch::LD_WU:
return LoongArch::LD_W;
case LoongArch::MUL_D:
return LoongArch::MUL_W;
case LoongArch::SLLI_D:
return LoongArch::SLLI_W;
case LoongArch::SUB_D:
return LoongArch::SUB_W;
default:
llvm_unreachable("Unexpected opcode for replacement with W variant");
}
}
bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI) {
if (DisableSExtWRemoval)
return false;
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// We're looking for the sext.w pattern ADDI.W rd, rs, 0.
if (!LoongArch::isSEXT_W(MI))
continue;
Register SrcReg = MI.getOperand(1).getReg();
SmallPtrSet<MachineInstr *, 4> FixableDefs;
// If all users only use the lower bits, this sext.w is redundant.
// Or if all definitions reaching MI sign-extend their output,
// then sext.w is redundant.
if (!hasAllWUsers(MI, ST, MRI) &&
!isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
continue;
Register DstReg = MI.getOperand(0).getReg();
if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
continue;
// Convert Fixable instructions to their W versions.
for (MachineInstr *Fixable : FixableDefs) {
LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
LLVM_DEBUG(dbgs() << " with " << *Fixable);
++NumTransformedToWInstrs;
}
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
MRI.replaceRegWith(DstReg, SrcReg);
MRI.clearKillFlags(SrcReg);
MI.eraseFromParent();
++NumRemovedSExtW;
MadeChange = true;
}
}
return MadeChange;
}
bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
unsigned Opc;
switch (MI.getOpcode()) {
default:
continue;
case LoongArch::ADDI_W:
Opc = LoongArch::ADDI_D;
break;
}
if (hasAllWUsers(MI, ST, MRI)) {
MI.setDesc(TII.get(Opc));
MadeChange = true;
}
}
}
return MadeChange;
}
bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
const LoongArchInstrInfo &TII,
const LoongArchSubtarget &ST,
MachineRegisterInfo &MRI) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
unsigned WOpc;
// TODO: Add more?
switch (MI.getOpcode()) {
default:
continue;
case LoongArch::ADD_D:
WOpc = LoongArch::ADD_W;
break;
case LoongArch::ADDI_D:
WOpc = LoongArch::ADDI_W;
break;
case LoongArch::SUB_D:
WOpc = LoongArch::SUB_W;
break;
case LoongArch::MUL_D:
WOpc = LoongArch::MUL_W;
break;
case LoongArch::SLLI_D:
// SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
if (MI.getOperand(2).getImm() >= 32)
continue;
WOpc = LoongArch::SLLI_W;
break;
case LoongArch::LD_D:
case LoongArch::LD_WU:
WOpc = LoongArch::LD_W;
break;
}
if (hasAllWUsers(MI, ST, MRI)) {
LLVM_DEBUG(dbgs() << "Replacing " << MI);
MI.setDesc(TII.get(WOpc));
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
MI.clearFlag(MachineInstr::MIFlag::IsExact);
LLVM_DEBUG(dbgs() << " with " << MI);
++NumTransformedToWInstrs;
MadeChange = true;
}
}
}
return MadeChange;
}
bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();
const LoongArchInstrInfo &TII = *ST.getInstrInfo();
if (!ST.is64Bit())
return false;
bool MadeChange = false;
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
if (!(DisableCvtToDSuffix || ST.preferWInst()))
MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
if (ST.preferWInst())
MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
return MadeChange;
}