226 lines
7.5 KiB
C++
226 lines
7.5 KiB
C++
//===- GCNCreateVOPD.cpp - Create VOPD Instructions ----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// Combine VALU pairs into VOPD instructions
|
|
/// Only works on wave32
|
|
/// Has register requirements, we reject creating VOPD if the requirements are
|
|
/// not met.
|
|
/// shouldCombineVOPD mutator in postRA machine scheduler puts candidate
|
|
/// instructions for VOPD back-to-back
|
|
///
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "GCNVOPDUtils.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
#include "llvm/CodeGen/MachinePassManager.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#define DEBUG_TYPE "gcn-create-vopd"
|
|
STATISTIC(NumVOPDCreated, "Number of VOPD Insts Created.");
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
class GCNCreateVOPD {
|
|
private:
|
|
class VOPDCombineInfo {
|
|
public:
|
|
VOPDCombineInfo() = default;
|
|
VOPDCombineInfo(MachineInstr *First, MachineInstr *Second,
|
|
bool VOPD3 = false)
|
|
: FirstMI(First), SecondMI(Second), IsVOPD3(VOPD3) {}
|
|
|
|
MachineInstr *FirstMI;
|
|
MachineInstr *SecondMI;
|
|
bool IsVOPD3;
|
|
};
|
|
|
|
public:
|
|
const GCNSubtarget *ST = nullptr;
|
|
|
|
bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
|
|
auto *FirstMI = CI.FirstMI;
|
|
auto *SecondMI = CI.SecondMI;
|
|
unsigned Opc1 = FirstMI->getOpcode();
|
|
unsigned Opc2 = SecondMI->getOpcode();
|
|
unsigned EncodingFamily =
|
|
AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
|
|
int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1, CI.IsVOPD3),
|
|
AMDGPU::getVOPDOpcode(Opc2, CI.IsVOPD3),
|
|
EncodingFamily, CI.IsVOPD3);
|
|
assert(NewOpcode != -1 &&
|
|
"Should have previously determined this as a possible VOPD\n");
|
|
|
|
auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
|
|
FirstMI->getDebugLoc(), SII->get(NewOpcode))
|
|
.setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
|
|
|
|
namespace VOPD = AMDGPU::VOPD;
|
|
MachineInstr *MI[] = {FirstMI, SecondMI};
|
|
auto InstInfo =
|
|
AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
|
|
|
|
for (auto CompIdx : VOPD::COMPONENTS) {
|
|
auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
|
|
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
|
|
}
|
|
|
|
const AMDGPU::OpName Mods[2][3] = {
|
|
{AMDGPU::OpName::src0X_modifiers, AMDGPU::OpName::vsrc1X_modifiers,
|
|
AMDGPU::OpName::vsrc2X_modifiers},
|
|
{AMDGPU::OpName::src0Y_modifiers, AMDGPU::OpName::vsrc1Y_modifiers,
|
|
AMDGPU::OpName::vsrc2Y_modifiers}};
|
|
const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
|
|
AMDGPU::OpName::src1_modifiers,
|
|
AMDGPU::OpName::src2_modifiers};
|
|
const unsigned VOPDOpc = VOPDInst->getOpcode();
|
|
|
|
for (auto CompIdx : VOPD::COMPONENTS) {
|
|
auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
|
|
bool IsVOP3 = SII->isVOP3(*MI[CompIdx]);
|
|
for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
|
|
if (AMDGPU::hasNamedOperand(VOPDOpc, Mods[CompIdx][CompSrcIdx])) {
|
|
const MachineOperand *Mod =
|
|
SII->getNamedOperand(*MI[CompIdx], SrcMods[CompSrcIdx]);
|
|
VOPDInst.addImm(Mod ? Mod->getImm() : 0);
|
|
}
|
|
auto MCOprIdx =
|
|
InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx, IsVOP3);
|
|
VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
|
|
}
|
|
if (MI[CompIdx]->getOpcode() == AMDGPU::V_CNDMASK_B32_e32 && CI.IsVOPD3)
|
|
VOPDInst.addReg(AMDGPU::VCC_LO);
|
|
}
|
|
|
|
if (CI.IsVOPD3) {
|
|
if (unsigned BitOp2 = AMDGPU::getBitOp2(Opc2))
|
|
VOPDInst.addImm(BitOp2);
|
|
}
|
|
|
|
SII->fixImplicitOperands(*VOPDInst);
|
|
for (auto CompIdx : VOPD::COMPONENTS)
|
|
VOPDInst.copyImplicitOps(*MI[CompIdx]);
|
|
|
|
LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
|
|
<< *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
|
|
|
|
for (auto CompIdx : VOPD::COMPONENTS)
|
|
MI[CompIdx]->eraseFromParent();
|
|
|
|
++NumVOPDCreated;
|
|
return true;
|
|
}
|
|
|
|
bool run(MachineFunction &MF) {
|
|
ST = &MF.getSubtarget<GCNSubtarget>();
|
|
if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
|
|
return false;
|
|
LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
|
|
|
|
const SIInstrInfo *SII = ST->getInstrInfo();
|
|
bool Changed = false;
|
|
unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(*ST);
|
|
bool HasVOPD3 = ST->hasVOPD3();
|
|
|
|
SmallVector<VOPDCombineInfo> ReplaceCandidates;
|
|
|
|
for (auto &MBB : MF) {
|
|
auto MII = MBB.begin(), E = MBB.end();
|
|
while (MII != E) {
|
|
auto *FirstMI = &*MII;
|
|
MII = next_nodbg(MII, MBB.end());
|
|
if (MII == MBB.end())
|
|
break;
|
|
if (FirstMI->isDebugInstr())
|
|
continue;
|
|
auto *SecondMI = &*MII;
|
|
unsigned Opc = FirstMI->getOpcode();
|
|
unsigned Opc2 = SecondMI->getOpcode();
|
|
VOPDCombineInfo CI;
|
|
|
|
const auto checkVOPD = [&](bool VOPD3) -> bool {
|
|
llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD =
|
|
AMDGPU::getCanBeVOPD(Opc, EncodingFamily, VOPD3);
|
|
llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD =
|
|
AMDGPU::getCanBeVOPD(Opc2, EncodingFamily, VOPD3);
|
|
|
|
if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
|
|
CI = VOPDCombineInfo(FirstMI, SecondMI, VOPD3);
|
|
else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
|
|
CI = VOPDCombineInfo(SecondMI, FirstMI, VOPD3);
|
|
else
|
|
return false;
|
|
// checkVOPDRegConstraints cares about program order, but doReplace
|
|
// cares about X-Y order in the constituted VOPD
|
|
return llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI,
|
|
VOPD3);
|
|
};
|
|
|
|
if (checkVOPD(false) || (HasVOPD3 && checkVOPD(true))) {
|
|
ReplaceCandidates.push_back(CI);
|
|
++MII;
|
|
}
|
|
}
|
|
}
|
|
for (auto &CI : ReplaceCandidates) {
|
|
Changed |= doReplace(SII, CI);
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
};
|
|
|
|
class GCNCreateVOPDLegacy : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesCFG();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
StringRef getPassName() const override {
|
|
return "GCN Create VOPD Instructions";
|
|
}
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
return GCNCreateVOPD().run(MF);
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
PreservedAnalyses
|
|
llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
|
|
MachineFunctionAnalysisManager &AM) {
|
|
if (!GCNCreateVOPD().run(MF))
|
|
return PreservedAnalyses::all();
|
|
return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
|
|
}
|
|
|
|
char GCNCreateVOPDLegacy::ID = 0;
|
|
|
|
char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
|
|
|
|
INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
|
|
false, false)
|