llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
Petar Avramovic 40270e8ef2
AMDGPU/GlobalISel: Add regbanklegalize rules for load and store (#153176)
Cover all the missing cases and add very detailed tests for each rule.
In summary:
- Flat and Scratch, addrspace(0) and addrspace(5), loads are always
  divergent.
- Global and Constant, addrspace(1) and addrspace(4), have real uniform
  loads, s_load, but require additional checks for align and flags in mmo.
  For not natural align or not uniform mmo do uniform-in-vgpr lowering.
- Private, addrspace(3), only has instructions for divergent load, for
  uniform do uniform-in-vgpr lowering.
- Store rules are simplified using Ptr32 and Ptr64.
  All operands need to be vgpr.

Some tests have code size regression since they use more sgpr instructions,
marked with FixMe comment to get back to later.
2025-09-11 11:26:20 +02:00

132 lines
5.0 KiB
C++

//===- AMDGPURegBankLegalizeHelper ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
#include "AMDGPURegBankLegalizeRules.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
class MachineIRBuilder;
namespace AMDGPU {
// Receives list of RegBankLLTMappingApplyID and applies register banks on all
// operands. It is user's responsibility to provide RegBankLLTMappingApplyIDs
// for all register operands, there is no need to specify NonReg for trailing
// imm operands. This finishes selection of register banks if there is no need
// to replace instruction. In other case InstApplyMethod will create new
// instruction(s).
class RegBankLegalizeHelper {
const GCNSubtarget &ST;
MachineIRBuilder &B;
MachineRegisterInfo &MRI;
const MachineUniformityInfo &MUI;
const RegisterBankInfo &RBI;
const RegBankLegalizeRules &RBLRules;
const bool IsWave32;
const RegisterBank *SgprRB;
const RegisterBank *VgprRB;
const RegisterBank *VccRB;
static constexpr LLT S1 = LLT::scalar(1);
static constexpr LLT S16 = LLT::scalar(16);
static constexpr LLT S32 = LLT::scalar(32);
static constexpr LLT S64 = LLT::scalar(64);
static constexpr LLT S96 = LLT::scalar(96);
static constexpr LLT S128 = LLT::scalar(128);
static constexpr LLT S256 = LLT::scalar(256);
static constexpr LLT V2S16 = LLT::fixed_vector(2, 16);
static constexpr LLT V4S16 = LLT::fixed_vector(4, 16);
static constexpr LLT V6S16 = LLT::fixed_vector(6, 16);
static constexpr LLT V8S16 = LLT::fixed_vector(8, 16);
static constexpr LLT V16S16 = LLT::fixed_vector(16, 16);
static constexpr LLT V32S16 = LLT::fixed_vector(32, 16);
static constexpr LLT V2S32 = LLT::fixed_vector(2, 32);
static constexpr LLT V3S32 = LLT::fixed_vector(3, 32);
static constexpr LLT V4S32 = LLT::fixed_vector(4, 32);
static constexpr LLT V6S32 = LLT::fixed_vector(6, 32);
static constexpr LLT V7S32 = LLT::fixed_vector(7, 32);
static constexpr LLT V8S32 = LLT::fixed_vector(8, 32);
static constexpr LLT V16S32 = LLT::fixed_vector(16, 32);
static constexpr LLT V2S64 = LLT::fixed_vector(2, 64);
static constexpr LLT V3S64 = LLT::fixed_vector(3, 64);
static constexpr LLT V4S64 = LLT::fixed_vector(4, 64);
static constexpr LLT V8S64 = LLT::fixed_vector(8, 64);
static constexpr LLT V16S64 = LLT::fixed_vector(16, 64);
static constexpr LLT P1 = LLT::pointer(1, 64);
static constexpr LLT P4 = LLT::pointer(4, 64);
static constexpr LLT P6 = LLT::pointer(6, 32);
MachineRegisterInfo::VRegAttrs SgprRB_S32 = {SgprRB, S32};
MachineRegisterInfo::VRegAttrs VgprRB_S32 = {VgprRB, S32};
MachineRegisterInfo::VRegAttrs VccRB_S1 = {VccRB, S1};
public:
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI,
const RegisterBankInfo &RBI,
const RegBankLegalizeRules &RBLRules);
void findRuleAndApplyMapping(MachineInstr &MI);
// Manual apply helpers.
void applyMappingPHI(MachineInstr &MI);
void applyMappingTrivial(MachineInstr &MI);
private:
bool executeInWaterfallLoop(MachineIRBuilder &B,
iterator_range<MachineBasicBlock::iterator> Range,
SmallSet<Register, 4> &SgprOperandRegs);
LLT getTyFromID(RegBankLLTMappingApplyID ID);
LLT getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty);
const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID);
void
applyMappingDst(MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs);
void
applyMappingSrc(MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
void splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown,
LLT MergeTy = LLT());
void widenLoad(MachineInstr &MI, LLT WideTy, LLT MergeTy = LLT());
void widenMMOToS32(GAnyLoad &MI) const;
void lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
SmallSet<Register, 4> &SgprWaterfallOperandRegs);
void lowerVccExtToSel(MachineInstr &MI);
std::pair<Register, Register> unpackZExt(Register Reg);
std::pair<Register, Register> unpackSExt(Register Reg);
std::pair<Register, Register> unpackAExt(Register Reg);
void lowerUnpackBitShift(MachineInstr &MI);
void lowerV_BFE(MachineInstr &MI);
void lowerS_BFE(MachineInstr &MI);
void lowerSplitTo32(MachineInstr &MI);
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
};
} // end namespace AMDGPU
} // end namespace llvm
#endif