[AMDGPU] Remove AMDGPUArgumentUsageInfo pass (#182490)
`AMDGPUArgumentUsageInfo` provided a per-function map that `lowerFormalArguments` would write each function's implicit argument register layout into, and `passSpecialInputs` would read back when lowering calls to look up the callee's layout. This per-function map is redundant for all non-entry callees, which already use the same `FixedABIFunctionInfo` register layout. GlobalISel already used `FixedABIFunctionInfo` unconditionally. This change makes SelectionDAG do the same.
This commit is contained in:
parent
eac18e783f
commit
9e6a6be8a8
@ -536,8 +536,6 @@ void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
|
||||
ImmutablePass *createAMDGPUExternalAAWrapperPass();
|
||||
void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
|
||||
|
||||
void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &);
|
||||
|
||||
ModulePass *createAMDGPUExportKernelRuntimeHandlesLegacyPass();
|
||||
void initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(PassRegistry &);
|
||||
extern char &AMDGPUExportKernelRuntimeHandlesLegacyID;
|
||||
|
||||
@ -11,17 +11,11 @@
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/NativeFormatting.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
|
||||
|
||||
INITIALIZE_PASS(AMDGPUArgumentUsageInfoWrapperLegacy, DEBUG_TYPE,
|
||||
"Argument Register Usage Information Storage", false, true)
|
||||
|
||||
void ArgDescriptor::print(raw_ostream &OS,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
if (!isSet()) {
|
||||
@ -42,46 +36,9 @@ void ArgDescriptor::print(raw_ostream &OS,
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
char AMDGPUArgumentUsageInfoWrapperLegacy::ID = 0;
|
||||
|
||||
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
|
||||
|
||||
// Hardcoded registers from fixed function ABI
|
||||
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
|
||||
= AMDGPUFunctionArgInfo::fixedABILayout();
|
||||
|
||||
// TODO: Print preload kernargs?
|
||||
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
||||
for (const auto &FI : ArgInfoMap) {
|
||||
OS << "Arguments for " << FI.first->getName() << '\n'
|
||||
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
|
||||
<< " DispatchPtr: " << FI.second.DispatchPtr
|
||||
<< " QueuePtr: " << FI.second.QueuePtr
|
||||
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
|
||||
<< " DispatchID: " << FI.second.DispatchID
|
||||
<< " FlatScratchInit: " << FI.second.FlatScratchInit
|
||||
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
|
||||
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
|
||||
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
|
||||
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
|
||||
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
|
||||
<< " LDSKernelId: " << FI.second.LDSKernelId
|
||||
<< " PrivateSegmentWaveByteOffset: "
|
||||
<< FI.second.PrivateSegmentWaveByteOffset
|
||||
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
|
||||
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
|
||||
<< " WorkItemIDX " << FI.second.WorkItemIDX
|
||||
<< " WorkItemIDY " << FI.second.WorkItemIDY
|
||||
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
|
||||
<< '\n';
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUArgumentUsageInfo::invalidate(Module &M, const PreservedAnalyses &PA,
|
||||
ModuleAnalysisManager::Invalidator &) {
|
||||
auto PAC = PA.getChecker<AMDGPUArgumentUsageAnalysis>();
|
||||
return !PAC.preservedWhenStateless();
|
||||
}
|
||||
const AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::FixedABIFunctionInfo =
|
||||
AMDGPUFunctionArgInfo::fixedABILayout();
|
||||
|
||||
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
||||
AMDGPUFunctionArgInfo::getPreloadedValue(
|
||||
@ -180,18 +137,3 @@ AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
|
||||
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
|
||||
return AI;
|
||||
}
|
||||
|
||||
const AMDGPUFunctionArgInfo &
|
||||
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
|
||||
auto I = ArgInfoMap.find(&F);
|
||||
if (I == ArgInfoMap.end())
|
||||
return FixedABIFunctionInfo;
|
||||
return I->second;
|
||||
}
|
||||
|
||||
AnalysisKey AMDGPUArgumentUsageAnalysis::Key;
|
||||
|
||||
AMDGPUArgumentUsageInfo
|
||||
AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) {
|
||||
return AMDGPUArgumentUsageInfo();
|
||||
}
|
||||
|
||||
@ -10,16 +10,11 @@
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
|
||||
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/CodeGen/Register.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/PassRegistry.h"
|
||||
#include <variant>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Function;
|
||||
class LLT;
|
||||
class raw_ostream;
|
||||
class TargetRegisterClass;
|
||||
@ -28,7 +23,6 @@ class TargetRegisterInfo;
|
||||
struct ArgDescriptor {
|
||||
private:
|
||||
friend struct AMDGPUFunctionArgInfo;
|
||||
friend class AMDGPUArgumentUsageInfo;
|
||||
|
||||
std::variant<std::monostate, MCRegister, unsigned> Val;
|
||||
|
||||
@ -168,69 +162,7 @@ struct AMDGPUFunctionArgInfo {
|
||||
getPreloadedValue(PreloadedValue Value) const;
|
||||
|
||||
static AMDGPUFunctionArgInfo fixedABILayout();
|
||||
};
|
||||
|
||||
class AMDGPUArgumentUsageInfo {
|
||||
private:
|
||||
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
|
||||
|
||||
public:
|
||||
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
|
||||
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
|
||||
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const;
|
||||
|
||||
void clear() { ArgInfoMap.clear(); }
|
||||
|
||||
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
|
||||
ArgInfoMap[&F] = ArgInfo;
|
||||
}
|
||||
|
||||
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
|
||||
|
||||
bool invalidate(Module &M, const PreservedAnalyses &PA,
|
||||
ModuleAnalysisManager::Invalidator &Inv);
|
||||
};
|
||||
|
||||
class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
|
||||
std::unique_ptr<AMDGPUArgumentUsageInfo> AUIP;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
AMDGPUArgumentUsageInfoWrapperLegacy() : ImmutablePass(ID) {}
|
||||
|
||||
AMDGPUArgumentUsageInfo &getArgUsageInfo() { return *AUIP; }
|
||||
const AMDGPUArgumentUsageInfo &getArgUsageInfo() const { return *AUIP; }
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesAll();
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override {
|
||||
AUIP = std::make_unique<AMDGPUArgumentUsageInfo>();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool doFinalization(Module &M) override {
|
||||
AUIP->clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override {
|
||||
AUIP->print(OS, M);
|
||||
}
|
||||
};
|
||||
|
||||
class AMDGPUArgumentUsageAnalysis
|
||||
: public AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis> {
|
||||
friend AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis>;
|
||||
static AnalysisKey Key;
|
||||
|
||||
public:
|
||||
using Result = AMDGPUArgumentUsageInfo;
|
||||
|
||||
AMDGPUArgumentUsageInfo run(Module &M, ModuleAnalysisManager &);
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
@ -790,8 +790,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
if (!Info.CB)
|
||||
return true;
|
||||
|
||||
const AMDGPUFunctionArgInfo *CalleeArgInfo
|
||||
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
|
||||
const AMDGPUFunctionArgInfo &CalleeArgInfo =
|
||||
AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
|
||||
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
|
||||
@ -841,7 +841,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
continue;
|
||||
|
||||
std::tie(OutgoingArg, ArgRC, ArgTy) =
|
||||
CalleeArgInfo->getPreloadedValue(InputID);
|
||||
CalleeArgInfo.getPreloadedValue(InputID);
|
||||
if (!OutgoingArg)
|
||||
continue;
|
||||
|
||||
@ -888,13 +888,13 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
LLT ArgTy;
|
||||
|
||||
std::tie(OutgoingArg, ArgRC, ArgTy) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC, ArgTy) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC, ArgTy) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
if (!OutgoingArg)
|
||||
return false;
|
||||
|
||||
@ -917,7 +917,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
// If incoming ids are not packed we need to pack them.
|
||||
// FIXME: Should consider known workgroup size to eliminate known 0 cases.
|
||||
Register InputReg;
|
||||
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
|
||||
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX &&
|
||||
NeedWorkItemIDX) {
|
||||
if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
|
||||
InputReg = MRI.createGenericVirtualRegister(S32);
|
||||
@ -929,7 +929,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
}
|
||||
}
|
||||
|
||||
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
|
||||
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY &&
|
||||
NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
|
||||
Register Y = MRI.createGenericVirtualRegister(S32);
|
||||
LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY,
|
||||
@ -939,7 +939,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
|
||||
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
|
||||
}
|
||||
|
||||
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
|
||||
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ &&
|
||||
NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
|
||||
Register Z = MRI.createGenericVirtualRegister(S32);
|
||||
LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ,
|
||||
|
||||
@ -134,7 +134,6 @@ static SDValue stripExtractLoElt(SDValue In) {
|
||||
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
|
||||
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
|
||||
false)
|
||||
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfoWrapperLegacy)
|
||||
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
|
||||
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
@ -238,7 +237,6 @@ bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<AMDGPUArgumentUsageInfoWrapperLegacy>();
|
||||
AU.addRequired<UniformityInfoWrapperPass>();
|
||||
#ifdef EXPENSIVE_CHECKS
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
|
||||
@ -13,12 +13,6 @@
|
||||
|
||||
// NOTE: NO INCLUDE GUARD DESIRED!
|
||||
|
||||
#ifndef MODULE_ANALYSIS
|
||||
#define MODULE_ANALYSIS(NAME, CREATE_PASS)
|
||||
#endif
|
||||
MODULE_ANALYSIS("amdgpu-argument-usage", AMDGPUArgumentUsageAnalysis())
|
||||
#undef MODULE_ANALYSIS
|
||||
|
||||
#ifndef MODULE_PASS
|
||||
#define MODULE_PASS(NAME, CREATE_PASS)
|
||||
#endif
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUAliasAnalysis.h"
|
||||
#include "AMDGPUArgumentUsageInfo.h"
|
||||
#include "AMDGPUBarrierLatency.h"
|
||||
#include "AMDGPUCtorDtorLowering.h"
|
||||
#include "AMDGPUExportClustering.h"
|
||||
@ -642,7 +641,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||
initializeAMDGPULowerExecSyncLegacyPass(*PR);
|
||||
initializeAMDGPUSwLowerLDSLegacyPass(*PR);
|
||||
initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR);
|
||||
initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(*PR);
|
||||
initializeAMDGPUAtomicOptimizerPass(*PR);
|
||||
initializeAMDGPULowerKernelArgumentsPass(*PR);
|
||||
initializeAMDGPUPromoteKernelArgumentsPass(*PR);
|
||||
@ -2289,11 +2287,6 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(
|
||||
|
||||
void AMDGPUCodeGenPassBuilder::addPreISel(PassManagerWrapper &PMW) const {
|
||||
|
||||
// Require AMDGPUArgumentUsageAnalysis so that it's available during ISel.
|
||||
flushFPMsToMPM(PMW);
|
||||
addModulePass(RequireAnalysisPass<AMDGPUArgumentUsageAnalysis, Module>(),
|
||||
PMW);
|
||||
|
||||
if (TM.getOptLevel() > CodeGenOptLevel::None) {
|
||||
addFunctionPass(FlattenCFGPass(), PMW);
|
||||
addFunctionPass(SinkingPass(), PMW);
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachinePassManager.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValueManager.h"
|
||||
#include "llvm/CodeGen/SDPatternMatch.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
@ -3668,19 +3667,6 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
if (IsEntryFunc)
|
||||
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
|
||||
|
||||
if (DAG.getPass()) {
|
||||
auto &ArgUsageInfo =
|
||||
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
|
||||
ArgUsageInfo.getArgUsageInfo().setFuncArgInfo(Fn, Info->getArgInfo());
|
||||
} else if (auto *MFAM = DAG.getMFAM()) {
|
||||
Module &M = *MF.getFunction().getParent();
|
||||
auto *ArgUsageInfo =
|
||||
MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
|
||||
.getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
|
||||
if (ArgUsageInfo)
|
||||
ArgUsageInfo->setFuncArgInfo(Fn, Info->getArgInfo());
|
||||
}
|
||||
|
||||
unsigned StackArgSize = CCInfo.getStackSize();
|
||||
Info->setBytesInStackArgArea(StackArgSize);
|
||||
|
||||
@ -3890,24 +3876,8 @@ void SITargetLowering::passSpecialInputs(
|
||||
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
|
||||
|
||||
const AMDGPUFunctionArgInfo *CalleeArgInfo =
|
||||
&AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
|
||||
if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
|
||||
if (DAG.getPass()) {
|
||||
auto &ArgUsageInfo =
|
||||
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
|
||||
CalleeArgInfo =
|
||||
&ArgUsageInfo.getArgUsageInfo().lookupFuncArgInfo(*CalleeFunc);
|
||||
} else if (auto *MFAM = DAG.getMFAM()) {
|
||||
Module &M = *DAG.getMachineFunction().getFunction().getParent();
|
||||
auto *ArgUsageInfo =
|
||||
MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(
|
||||
DAG.getMachineFunction())
|
||||
.getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
|
||||
if (ArgUsageInfo)
|
||||
CalleeArgInfo = &ArgUsageInfo->lookupFuncArgInfo(*CalleeFunc);
|
||||
}
|
||||
}
|
||||
const AMDGPUFunctionArgInfo &CalleeArgInfo =
|
||||
AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
|
||||
|
||||
// TODO: Unify with private memory register handling. This is complicated by
|
||||
// the fact that at least in kernels, the input argument is not necessarily
|
||||
@ -3934,7 +3904,7 @@ void SITargetLowering::passSpecialInputs(
|
||||
continue;
|
||||
|
||||
const auto [OutgoingArg, ArgRC, ArgTy] =
|
||||
CalleeArgInfo->getPreloadedValue(InputID);
|
||||
CalleeArgInfo.getPreloadedValue(InputID);
|
||||
if (!OutgoingArg)
|
||||
continue;
|
||||
|
||||
@ -3983,13 +3953,13 @@ void SITargetLowering::passSpecialInputs(
|
||||
// packed.
|
||||
|
||||
auto [OutgoingArg, ArgRC, Ty] =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC, Ty) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
||||
if (!OutgoingArg)
|
||||
std::tie(OutgoingArg, ArgRC, Ty) =
|
||||
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
||||
if (!OutgoingArg)
|
||||
return;
|
||||
|
||||
@ -4008,7 +3978,7 @@ void SITargetLowering::passSpecialInputs(
|
||||
const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
|
||||
|
||||
// If incoming ids are not packed we need to pack them.
|
||||
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
|
||||
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX &&
|
||||
NeedWorkItemIDX) {
|
||||
if (Subtarget->getMaxWorkitemID(F, 0) != 0) {
|
||||
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
|
||||
@ -4017,7 +3987,7 @@ void SITargetLowering::passSpecialInputs(
|
||||
}
|
||||
}
|
||||
|
||||
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
|
||||
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY &&
|
||||
NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) {
|
||||
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
|
||||
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
|
||||
@ -4027,7 +3997,7 @@ void SITargetLowering::passSpecialInputs(
|
||||
: Y;
|
||||
}
|
||||
|
||||
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
|
||||
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ &&
|
||||
NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) {
|
||||
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
|
||||
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
|
||||
|
||||
@ -109,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
|
||||
} else if (!isEntryFunction()) {
|
||||
if (CC != CallingConv::AMDGPU_Gfx &&
|
||||
CC != CallingConv::AMDGPU_Gfx_WholeWave)
|
||||
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
|
||||
ArgInfo = AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
|
||||
|
||||
FrameOffsetReg = AMDGPU::SGPR33;
|
||||
StackPtrOffsetReg = AMDGPU::SGPR32;
|
||||
|
||||
@ -1,17 +1,22 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck %s
|
||||
|
||||
define amdgpu_kernel void @entry_fn() {
|
||||
; CHECK-LABEL: entry_fn:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_sext_i32_i16 s5, s5
|
||||
; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+8
|
||||
; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+16
|
||||
; CHECK-NEXT: v_mov_b32_e32 v31, v0
|
||||
; CHECK-NEXT: s_load_b64 s[12:13], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_mov_b64 s[10:11], s[6:7]
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[2:3]
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_wait_kmcnt 0x0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[12:13]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
call void @entry_fn()
|
||||
@ -34,22 +39,17 @@ define void @caller() {
|
||||
; CHECK-NEXT: s_mov_b32 exec_lo, s1
|
||||
; CHECK-NEXT: s_add_co_i32 s32, s32, 16
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s0, 2
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_getpc_b64 s[0:1]
|
||||
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
|
||||
; CHECK-NEXT: s_sext_i32_i16 s5, s5
|
||||
; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+12
|
||||
; CHECK-NEXT: s_sext_i32_i16 s1, s1
|
||||
; CHECK-NEXT: s_add_co_u32 s0, s0, entry_fn@gotpcrel32@lo+12
|
||||
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
|
||||
; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+24
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, v31
|
||||
; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_add_co_ci_u32 s1, s1, entry_fn@gotpcrel32@hi+24
|
||||
; CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], s[6:7]
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], s[10:11]
|
||||
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
|
||||
; CHECK-NEXT: s_wait_kmcnt 0x0
|
||||
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
|
||||
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
|
||||
|
||||
@ -36,9 +36,8 @@
|
||||
; GCN-O0-NEXT: amdgpu-lower-intrinsics
|
||||
; GCN-O0-NEXT: cgscc(function(lower-switch
|
||||
; GCN-O0-NEXT: lower-invoke
|
||||
; GCN-O0-NEXT: unreachableblockelim))
|
||||
; GCN-O0-NEXT: require<amdgpu-argument-usage>
|
||||
; GCN-O0-NEXT: cgscc(function(amdgpu-unify-divergent-exit-nodes
|
||||
; GCN-O0-NEXT: unreachableblockelim
|
||||
; GCN-O0-NEXT: amdgpu-unify-divergent-exit-nodes
|
||||
; GCN-O0-NEXT: fix-irreducible
|
||||
; GCN-O0-NEXT: unify-loop-exits
|
||||
; GCN-O0-NEXT: StructurizeCFGPass
|
||||
@ -145,9 +144,8 @@
|
||||
; GCN-O2-NEXT: amdgpu-lower-intrinsics
|
||||
; GCN-O2-NEXT: cgscc(function(lower-switch
|
||||
; GCN-O2-NEXT: lower-invoke
|
||||
; GCN-O2-NEXT: unreachableblockelim))
|
||||
; GCN-O2-NEXT: require<amdgpu-argument-usage>
|
||||
; GCN-O2-NEXT: cgscc(function(flatten-cfg
|
||||
; GCN-O2-NEXT: unreachableblockelim
|
||||
; GCN-O2-NEXT: flatten-cfg
|
||||
; GCN-O2-NEXT: sink
|
||||
; GCN-O2-NEXT: amdgpu-late-codegenprepare
|
||||
; GCN-O2-NEXT: amdgpu-unify-divergent-exit-nodes
|
||||
@ -317,9 +315,8 @@
|
||||
; GCN-O3-NEXT: amdgpu-lower-intrinsics
|
||||
; GCN-O3-NEXT: cgscc(function(lower-switch
|
||||
; GCN-O3-NEXT: lower-invoke
|
||||
; GCN-O3-NEXT: unreachableblockelim))
|
||||
; GCN-O3-NEXT: require<amdgpu-argument-usage>
|
||||
; GCN-O3-NEXT: cgscc(function(flatten-cfg
|
||||
; GCN-O3-NEXT: unreachableblockelim
|
||||
; GCN-O3-NEXT: flatten-cfg
|
||||
; GCN-O3-NEXT: sink
|
||||
; GCN-O3-NEXT: amdgpu-late-codegenprepare
|
||||
; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
; GCN-O0-NEXT:Library Function Lowering Analysis
|
||||
; GCN-O0-NEXT:Assumption Cache Tracker
|
||||
; GCN-O0-NEXT:Profile summary info
|
||||
; GCN-O0-NEXT:Argument Register Usage Information Storage
|
||||
; GCN-O0-NEXT:Create Garbage Collector Module Metadata
|
||||
; GCN-O0-NEXT:Register Usage Information Storage
|
||||
; GCN-O0-NEXT:Machine Branch Probability Analysis
|
||||
@ -174,7 +173,6 @@
|
||||
; GCN-O1-NEXT:External Alias Analysis
|
||||
; GCN-O1-NEXT:Type-Based Alias Analysis
|
||||
; GCN-O1-NEXT:Scoped NoAlias Alias Analysis
|
||||
; GCN-O1-NEXT:Argument Register Usage Information Storage
|
||||
; GCN-O1-NEXT:Create Garbage Collector Module Metadata
|
||||
; GCN-O1-NEXT:Machine Branch Probability Analysis
|
||||
; GCN-O1-NEXT:Register Usage Information Storage
|
||||
@ -467,7 +465,6 @@
|
||||
; GCN-O1-OPTS-NEXT:External Alias Analysis
|
||||
; GCN-O1-OPTS-NEXT:Type-Based Alias Analysis
|
||||
; GCN-O1-OPTS-NEXT:Scoped NoAlias Alias Analysis
|
||||
; GCN-O1-OPTS-NEXT:Argument Register Usage Information Storage
|
||||
; GCN-O1-OPTS-NEXT:Create Garbage Collector Module Metadata
|
||||
; GCN-O1-OPTS-NEXT:Machine Branch Probability Analysis
|
||||
; GCN-O1-OPTS-NEXT:Register Usage Information Storage
|
||||
@ -787,7 +784,6 @@
|
||||
; GCN-O2-NEXT:External Alias Analysis
|
||||
; GCN-O2-NEXT:Type-Based Alias Analysis
|
||||
; GCN-O2-NEXT:Scoped NoAlias Alias Analysis
|
||||
; GCN-O2-NEXT:Argument Register Usage Information Storage
|
||||
; GCN-O2-NEXT:Create Garbage Collector Module Metadata
|
||||
; GCN-O2-NEXT:Machine Branch Probability Analysis
|
||||
; GCN-O2-NEXT:Register Usage Information Storage
|
||||
@ -1112,7 +1108,6 @@
|
||||
; GCN-O3-NEXT:External Alias Analysis
|
||||
; GCN-O3-NEXT:Type-Based Alias Analysis
|
||||
; GCN-O3-NEXT:Scoped NoAlias Alias Analysis
|
||||
; GCN-O3-NEXT:Argument Register Usage Information Storage
|
||||
; GCN-O3-NEXT:Create Garbage Collector Module Metadata
|
||||
; GCN-O3-NEXT:Machine Branch Probability Analysis
|
||||
; GCN-O3-NEXT:Register Usage Information Storage
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user