286 lines
9.0 KiB
C++
286 lines
9.0 KiB
C++
//===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===---------------------------------------------------------------------===//
|
|
//
|
|
// Look for ADDIs that can be removed by folding their immediate into later
|
|
// load/store addresses. There may be other arithmetic instructions between the
|
|
// addi and load/store that we need to reassociate through. If the final result
|
|
// of the arithmetic is only used by load/store addresses, we can fold the
|
|
// offset into the all the load/store as long as it doesn't create an offset
|
|
// that is too large.
|
|
//
|
|
//===---------------------------------------------------------------------===//
|
|
|
|
#include "RISCV.h"
|
|
#include "RISCVSubtarget.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include <queue>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "riscv-fold-mem-offset"
|
|
#define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset"
|
|
|
|
namespace {
|
|
|
|
class RISCVFoldMemOffset : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
RISCVFoldMemOffset() : MachineFunctionPass(ID) {}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
bool foldOffset(Register OrigReg, int64_t InitialOffset,
|
|
const MachineRegisterInfo &MRI,
|
|
DenseMap<MachineInstr *, int64_t> &FoldableInstrs);
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesCFG();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; }
|
|
};
|
|
|
|
// Wrapper class around a std::optional to allow accumulation.
|
|
class FoldableOffset {
|
|
std::optional<int64_t> Offset;
|
|
|
|
public:
|
|
bool hasValue() const { return Offset.has_value(); }
|
|
int64_t getValue() const { return *Offset; }
|
|
|
|
FoldableOffset &operator=(int64_t RHS) {
|
|
Offset = RHS;
|
|
return *this;
|
|
}
|
|
|
|
FoldableOffset &operator+=(int64_t RHS) {
|
|
if (!Offset)
|
|
Offset = 0;
|
|
Offset = (uint64_t)*Offset + (uint64_t)RHS;
|
|
return *this;
|
|
}
|
|
|
|
int64_t operator*() { return *Offset; }
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
char RISCVFoldMemOffset::ID = 0;
|
|
INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME,
|
|
false, false)
|
|
|
|
FunctionPass *llvm::createRISCVFoldMemOffsetPass() {
|
|
return new RISCVFoldMemOffset();
|
|
}
|
|
|
|
// Walk forward from the ADDI looking for arithmetic instructions we can
|
|
// analyze or memory instructions that use it as part of their address
|
|
// calculation. For each arithmetic instruction we lookup how the offset
|
|
// contributes to the value in that register use that information to
|
|
// calculate the contribution to the output of this instruction.
|
|
// Only addition and left shift are supported.
|
|
// FIXME: Add multiplication by constant. The constant will be in a register.
|
|
bool RISCVFoldMemOffset::foldOffset(
|
|
Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI,
|
|
DenseMap<MachineInstr *, int64_t> &FoldableInstrs) {
|
|
// Map to hold how much the offset contributes to the value of this register.
|
|
DenseMap<Register, int64_t> RegToOffsetMap;
|
|
|
|
// Insert root offset into the map.
|
|
RegToOffsetMap[OrigReg] = InitialOffset;
|
|
|
|
std::queue<Register> Worklist;
|
|
Worklist.push(OrigReg);
|
|
|
|
while (!Worklist.empty()) {
|
|
Register Reg = Worklist.front();
|
|
Worklist.pop();
|
|
|
|
if (!Reg.isVirtual())
|
|
return false;
|
|
|
|
for (auto &User : MRI.use_nodbg_instructions(Reg)) {
|
|
FoldableOffset Offset;
|
|
|
|
switch (User.getOpcode()) {
|
|
default:
|
|
return false;
|
|
case RISCV::ADD:
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = I->second;
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset += I->second;
|
|
break;
|
|
case RISCV::SH1ADD:
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = (uint64_t)I->second << 1;
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset += I->second;
|
|
break;
|
|
case RISCV::SH2ADD:
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = (uint64_t)I->second << 2;
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset += I->second;
|
|
break;
|
|
case RISCV::SH3ADD:
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = (uint64_t)I->second << 3;
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset += I->second;
|
|
break;
|
|
case RISCV::ADD_UW:
|
|
case RISCV::SH1ADD_UW:
|
|
case RISCV::SH2ADD_UW:
|
|
case RISCV::SH3ADD_UW:
|
|
// Don't fold through the zero extended input.
|
|
if (User.getOperand(1).getReg() == Reg)
|
|
return false;
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = I->second;
|
|
break;
|
|
case RISCV::SLLI: {
|
|
unsigned ShAmt = User.getOperand(2).getImm();
|
|
if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
I != RegToOffsetMap.end())
|
|
Offset = (uint64_t)I->second << ShAmt;
|
|
break;
|
|
}
|
|
case RISCV::LB:
|
|
case RISCV::LBU:
|
|
case RISCV::SB:
|
|
case RISCV::LH:
|
|
case RISCV::LH_INX:
|
|
case RISCV::LHU:
|
|
case RISCV::FLH:
|
|
case RISCV::SH:
|
|
case RISCV::SH_INX:
|
|
case RISCV::FSH:
|
|
case RISCV::LW:
|
|
case RISCV::LW_INX:
|
|
case RISCV::LWU:
|
|
case RISCV::FLW:
|
|
case RISCV::SW:
|
|
case RISCV::SW_INX:
|
|
case RISCV::FSW:
|
|
case RISCV::LD:
|
|
case RISCV::LD_RV32:
|
|
case RISCV::FLD:
|
|
case RISCV::SD:
|
|
case RISCV::SD_RV32:
|
|
case RISCV::FSD: {
|
|
// Can't fold into store value.
|
|
if (User.getOperand(0).getReg() == Reg)
|
|
return false;
|
|
|
|
// Existing offset must be immediate.
|
|
if (!User.getOperand(2).isImm())
|
|
return false;
|
|
|
|
// Require at least one operation between the ADDI and the load/store.
|
|
// We have other optimizations that should handle the simple case.
|
|
if (User.getOperand(1).getReg() == OrigReg)
|
|
return false;
|
|
|
|
auto I = RegToOffsetMap.find(User.getOperand(1).getReg());
|
|
if (I == RegToOffsetMap.end())
|
|
return false;
|
|
|
|
int64_t LocalOffset = User.getOperand(2).getImm();
|
|
assert(isInt<12>(LocalOffset));
|
|
int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second;
|
|
if (!isInt<12>(CombinedOffset))
|
|
return false;
|
|
|
|
FoldableInstrs[&User] = CombinedOffset;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// If we reach here we should have an accumulated offset.
|
|
assert(Offset.hasValue() && "Expected an offset");
|
|
|
|
// If the offset is new or changed, add the destination register to the
|
|
// work list.
|
|
int64_t OffsetVal = Offset.getValue();
|
|
auto P =
|
|
RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal);
|
|
if (P.second) {
|
|
Worklist.push(User.getOperand(0).getReg());
|
|
} else if (P.first->second != OffsetVal) {
|
|
P.first->second = OffsetVal;
|
|
Worklist.push(User.getOperand(0).getReg());
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
// This optimization may increase size by preventing compression.
|
|
if (MF.getFunction().hasOptSize())
|
|
return false;
|
|
|
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
bool MadeChange = false;
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
|
|
// FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is
|
|
// equivalent to LUI+ADDI.
|
|
if (MI.getOpcode() != RISCV::ADDI)
|
|
continue;
|
|
|
|
// We only want to optimize register ADDIs.
|
|
if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
|
|
continue;
|
|
|
|
// Ignore 'li'.
|
|
if (MI.getOperand(1).getReg() == RISCV::X0)
|
|
continue;
|
|
|
|
int64_t Offset = MI.getOperand(2).getImm();
|
|
assert(isInt<12>(Offset));
|
|
|
|
DenseMap<MachineInstr *, int64_t> FoldableInstrs;
|
|
|
|
if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs))
|
|
continue;
|
|
|
|
if (FoldableInstrs.empty())
|
|
continue;
|
|
|
|
// We can fold this ADDI.
|
|
// Rewrite all the instructions.
|
|
for (auto [MemMI, NewOffset] : FoldableInstrs)
|
|
MemMI->getOperand(2).setImm(NewOffset);
|
|
|
|
MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
|
|
MRI.clearKillFlags(MI.getOperand(1).getReg());
|
|
MI.eraseFromParent();
|
|
}
|
|
}
|
|
|
|
return MadeChange;
|
|
}
|