From 9e6a6be8a84f32072e40b27e146fa9076560274e Mon Sep 17 00:00:00 2001 From: Dark Steve Date: Mon, 23 Feb 2026 18:47:01 +0530 Subject: [PATCH] [AMDGPU] Remove AMDGPUArgumentUsageInfo pass (#182490) `AMDGPUArgumentUsageInfo` provided a per-function map that `lowerFormalArguments` would write each function's implicit argument register layout into, and `passSpecialInputs` would read back when lowering calls to look up the callee's layout. This per-function map is redundant for all non-entry callees, which already use the same `FixedABIFunctionInfo` register layout. GlobalISel already used `FixedABIFunctionInfo` unconditionally. This change makes SelectionDAG do the same. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 2 - .../Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp | 62 +---------------- .../Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 68 ------------------- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 18 ++--- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 - llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 6 -- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 7 -- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 48 +++---------- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 +- llvm/test/CodeGen/AMDGPU/cc-entry.ll | 28 ++++---- llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 15 ++-- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 5 -- 12 files changed, 41 insertions(+), 222 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5df11a45b488..3c717d0f8f91 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -536,8 +536,6 @@ void initializeAMDGPUAAWrapperPassPass(PassRegistry&); ImmutablePass *createAMDGPUExternalAAWrapperPass(); void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); -void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &); - ModulePass *createAMDGPUExportKernelRuntimeHandlesLegacyPass(); void initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(PassRegistry &); extern char &AMDGPUExportKernelRuntimeHandlesLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index 346e257ea729..65fafec56748 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -11,17 +11,11 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/Function.h" #include "llvm/Support/NativeFormatting.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define DEBUG_TYPE "amdgpu-argument-reg-usage-info" - -INITIALIZE_PASS(AMDGPUArgumentUsageInfoWrapperLegacy, DEBUG_TYPE, - "Argument Register Usage Information Storage", false, true) - void ArgDescriptor::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (!isSet()) { @@ -42,46 +36,9 @@ void ArgDescriptor::print(raw_ostream &OS, OS << '\n'; } -char AMDGPUArgumentUsageInfoWrapperLegacy::ID = 0; - -const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{}; - // Hardcoded registers from fixed function ABI -const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo - = AMDGPUFunctionArgInfo::fixedABILayout(); - -// TODO: Print preload kernargs? -void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const { - for (const auto &FI : ArgInfoMap) { - OS << "Arguments for " << FI.first->getName() << '\n' - << " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer - << " DispatchPtr: " << FI.second.DispatchPtr - << " QueuePtr: " << FI.second.QueuePtr - << " KernargSegmentPtr: " << FI.second.KernargSegmentPtr - << " DispatchID: " << FI.second.DispatchID - << " FlatScratchInit: " << FI.second.FlatScratchInit - << " PrivateSegmentSize: " << FI.second.PrivateSegmentSize - << " WorkGroupIDX: " << FI.second.WorkGroupIDX - << " WorkGroupIDY: " << FI.second.WorkGroupIDY - << " WorkGroupIDZ: " << FI.second.WorkGroupIDZ - << " WorkGroupInfo: " << FI.second.WorkGroupInfo - << " LDSKernelId: " << FI.second.LDSKernelId - << " PrivateSegmentWaveByteOffset: " - << FI.second.PrivateSegmentWaveByteOffset - << " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr - << " ImplicitArgPtr: " << FI.second.ImplicitArgPtr - << " WorkItemIDX " << FI.second.WorkItemIDX - << " WorkItemIDY " << FI.second.WorkItemIDY - << " WorkItemIDZ " << FI.second.WorkItemIDZ - << '\n'; - } -} - -bool AMDGPUArgumentUsageInfo::invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &) { - auto PAC = PA.getChecker(); - return !PAC.preservedWhenStateless(); -} +const AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::FixedABIFunctionInfo = + AMDGPUFunctionArgInfo::fixedABILayout(); std::tuple AMDGPUFunctionArgInfo::getPreloadedValue( @@ -180,18 +137,3 @@ AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() { AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20); return AI; } - -const AMDGPUFunctionArgInfo & -AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const { - auto I = ArgInfoMap.find(&F); - if (I == ArgInfoMap.end()) - return FixedABIFunctionInfo; - return I->second; -} - -AnalysisKey AMDGPUArgumentUsageAnalysis::Key; - -AMDGPUArgumentUsageInfo -AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) { - return AMDGPUArgumentUsageInfo(); -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index f38e49b947e3..cbb3c6ce127a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -10,16 +10,11 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/Register.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include namespace llvm { -class Function; class LLT; class raw_ostream; class TargetRegisterClass; @@ -28,7 +23,6 @@ class TargetRegisterInfo; struct ArgDescriptor { private: friend struct AMDGPUFunctionArgInfo; - friend class AMDGPUArgumentUsageInfo; std::variant Val; @@ -168,69 +162,7 @@ struct AMDGPUFunctionArgInfo { getPreloadedValue(PreloadedValue Value) const; static AMDGPUFunctionArgInfo fixedABILayout(); -}; - -class AMDGPUArgumentUsageInfo { -private: - DenseMap ArgInfoMap; - -public: - static const AMDGPUFunctionArgInfo ExternFunctionInfo; static const AMDGPUFunctionArgInfo FixedABIFunctionInfo; - - void print(raw_ostream &OS, const Module *M = nullptr) const; - - void clear() { ArgInfoMap.clear(); } - - void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) { - ArgInfoMap[&F] = ArgInfo; - } - - const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const; - - bool invalidate(Module &M, const PreservedAnalyses &PA, - ModuleAnalysisManager::Invalidator &Inv); -}; - -class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass { - std::unique_ptr AUIP; - -public: - static char ID; - - AMDGPUArgumentUsageInfoWrapperLegacy() : ImmutablePass(ID) {} - - AMDGPUArgumentUsageInfo &getArgUsageInfo() { return *AUIP; } - const AMDGPUArgumentUsageInfo &getArgUsageInfo() const { return *AUIP; } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override { - AUIP = std::make_unique(); - return false; - } - - bool doFinalization(Module &M) override { - AUIP->clear(); - return false; - } - - void print(raw_ostream &OS, const Module *M = nullptr) const override { - AUIP->print(OS, M); - } -}; - -class AMDGPUArgumentUsageAnalysis - : public AnalysisInfoMixin { - friend AnalysisInfoMixin; - static AnalysisKey Key; - -public: - using Result = AMDGPUArgumentUsageInfo; - - AMDGPUArgumentUsageInfo run(Module &M, ModuleAnalysisManager &); }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 5c6affdae0c5..fe8342e3da61 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -790,8 +790,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, if (!Info.CB) return true; - const AMDGPUFunctionArgInfo *CalleeArgInfo - = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; + const AMDGPUFunctionArgInfo &CalleeArgInfo = + AMDGPUFunctionArgInfo::FixedABIFunctionInfo; const SIMachineFunctionInfo *MFI = MF.getInfo(); const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo(); @@ -841,7 +841,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, continue; std::tie(OutgoingArg, ArgRC, ArgTy) = - CalleeArgInfo->getPreloadedValue(InputID); + CalleeArgInfo.getPreloadedValue(InputID); if (!OutgoingArg) continue; @@ -888,13 +888,13 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, LLT ArgTy; std::tie(OutgoingArg, ArgRC, ArgTy) = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); if (!OutgoingArg) std::tie(OutgoingArg, ArgRC, ArgTy) = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y); if (!OutgoingArg) std::tie(OutgoingArg, ArgRC, ArgTy) = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z); if (!OutgoingArg) return false; @@ -917,7 +917,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, // If incoming ids are not packed we need to pack them. // FIXME: Should consider known workgroup size to eliminate known 0 cases. Register InputReg; - if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX && + if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX && NeedWorkItemIDX) { if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) { InputReg = MRI.createGenericVirtualRegister(S32); @@ -929,7 +929,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, } } - if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && + if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY && NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) { Register Y = MRI.createGenericVirtualRegister(S32); LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY, @@ -939,7 +939,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder, InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y; } - if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && + if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ && NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) { Register Z = MRI.createGenericVirtualRegister(S32); LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b2d584899503..4acda590ed5b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -134,7 +134,6 @@ static SDValue stripExtractLoElt(SDValue In) { INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) -INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfoWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) #ifdef EXPENSIVE_CHECKS @@ -238,7 +237,6 @@ bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) { } void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); AU.addRequired(); #ifdef EXPENSIVE_CHECKS AU.addRequired(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index f464fbf31c75..46d70c257b75 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -13,12 +13,6 @@ // NOTE: NO INCLUDE GUARD DESIRED! -#ifndef MODULE_ANALYSIS -#define MODULE_ANALYSIS(NAME, CREATE_PASS) -#endif -MODULE_ANALYSIS("amdgpu-argument-usage", AMDGPUArgumentUsageAnalysis()) -#undef MODULE_ANALYSIS - #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 49c60c254f6f..52dbd16f80f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -17,7 +17,6 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" -#include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUBarrierLatency.h" #include "AMDGPUCtorDtorLowering.h" #include "AMDGPUExportClustering.h" @@ -642,7 +641,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPULowerExecSyncLegacyPass(*PR); initializeAMDGPUSwLowerLDSLegacyPass(*PR); initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR); - initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(*PR); initializeAMDGPUAtomicOptimizerPass(*PR); initializeAMDGPULowerKernelArgumentsPass(*PR); initializeAMDGPUPromoteKernelArgumentsPass(*PR); @@ -2289,11 +2287,6 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare( void AMDGPUCodeGenPassBuilder::addPreISel(PassManagerWrapper &PMW) const { - // Require AMDGPUArgumentUsageAnalysis so that it's available during ISel. - flushFPMsToMPM(PMW); - addModulePass(RequireAnalysisPass(), - PMW); - if (TM.getOptLevel() > CodeGenOptLevel::None) { addFunctionPass(FlattenCFGPass(), PMW); addFunctionPass(SinkingPass(), PMW); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8533b1bd06d9..e547f2b74523 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -35,7 +35,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/PseudoSourceValueManager.h" #include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/IR/DiagnosticInfo.h" @@ -3668,19 +3667,6 @@ SDValue SITargetLowering::LowerFormalArguments( if (IsEntryFunc) allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics); - if (DAG.getPass()) { - auto &ArgUsageInfo = - DAG.getPass()->getAnalysis(); - ArgUsageInfo.getArgUsageInfo().setFuncArgInfo(Fn, Info->getArgInfo()); - } else if (auto *MFAM = DAG.getMFAM()) { - Module &M = *MF.getFunction().getParent(); - auto *ArgUsageInfo = - MFAM->getResult(MF) - .getCachedResult(M); - if (ArgUsageInfo) - ArgUsageInfo->setFuncArgInfo(Fn, Info->getArgInfo()); - } - unsigned StackArgSize = CCInfo.getStackSize(); Info->setBytesInStackArgArea(StackArgSize); @@ -3890,24 +3876,8 @@ void SITargetLowering::passSpecialInputs( const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo(); - const AMDGPUFunctionArgInfo *CalleeArgInfo = - &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; - if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) { - if (DAG.getPass()) { - auto &ArgUsageInfo = - DAG.getPass()->getAnalysis(); - CalleeArgInfo = - &ArgUsageInfo.getArgUsageInfo().lookupFuncArgInfo(*CalleeFunc); - } else if (auto *MFAM = DAG.getMFAM()) { - Module &M = *DAG.getMachineFunction().getFunction().getParent(); - auto *ArgUsageInfo = - MFAM->getResult( - DAG.getMachineFunction()) - .getCachedResult(M); - if (ArgUsageInfo) - CalleeArgInfo = &ArgUsageInfo->lookupFuncArgInfo(*CalleeFunc); - } - } + const AMDGPUFunctionArgInfo &CalleeArgInfo = + AMDGPUFunctionArgInfo::FixedABIFunctionInfo; // TODO: Unify with private memory register handling. This is complicated by // the fact that at least in kernels, the input argument is not necessarily @@ -3934,7 +3904,7 @@ void SITargetLowering::passSpecialInputs( continue; const auto [OutgoingArg, ArgRC, ArgTy] = - CalleeArgInfo->getPreloadedValue(InputID); + CalleeArgInfo.getPreloadedValue(InputID); if (!OutgoingArg) continue; @@ -3983,13 +3953,13 @@ void SITargetLowering::passSpecialInputs( // packed. auto [OutgoingArg, ArgRC, Ty] = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); if (!OutgoingArg) std::tie(OutgoingArg, ArgRC, Ty) = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y); if (!OutgoingArg) std::tie(OutgoingArg, ArgRC, Ty) = - CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z); + CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z); if (!OutgoingArg) return; @@ -4008,7 +3978,7 @@ void SITargetLowering::passSpecialInputs( const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z"); // If incoming ids are not packed we need to pack them. - if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX && + if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX && NeedWorkItemIDX) { if (Subtarget->getMaxWorkitemID(F, 0) != 0) { InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX); @@ -4017,7 +3987,7 @@ void SITargetLowering::passSpecialInputs( } } - if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && + if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY && NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) { SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY); Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y, @@ -4027,7 +3997,7 @@ void SITargetLowering::passSpecialInputs( : Y; } - if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && + if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ && NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) { SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ); Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z, diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index af3226d4d944..25a28ec47191 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -109,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, } else if (!isEntryFunction()) { if (CC != CallingConv::AMDGPU_Gfx && CC != CallingConv::AMDGPU_Gfx_WholeWave) - ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; + ArgInfo = AMDGPUFunctionArgInfo::FixedABIFunctionInfo; FrameOffsetReg = AMDGPU::SGPR33; StackPtrOffsetReg = AMDGPU::SGPR32; diff --git a/llvm/test/CodeGen/AMDGPU/cc-entry.ll b/llvm/test/CodeGen/AMDGPU/cc-entry.ll index 6a857ba8e0c2..f8aca120184f 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-entry.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-entry.ll @@ -1,17 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck %s define amdgpu_kernel void @entry_fn() { ; CHECK-LABEL: entry_fn: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[8:9], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_sext_i32_i16 s5, s5 ; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+8 ; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+16 +; CHECK-NEXT: v_mov_b32_e32 v31, v0 +; CHECK-NEXT: s_load_b64 s[12:13], s[4:5], 0x0 +; CHECK-NEXT: s_mov_b64 s[10:11], s[6:7] +; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1] +; CHECK-NEXT: s_mov_b64 s[6:7], s[2:3] ; CHECK-NEXT: s_mov_b32 s32, 0 -; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[12:13] ; CHECK-NEXT: s_endpgm entry: call void @entry_fn() @@ -34,22 +39,17 @@ define void @caller() { ; CHECK-NEXT: s_mov_b32 exec_lo, s1 ; CHECK-NEXT: s_add_co_i32 s32, s32, 16 ; CHECK-NEXT: v_writelane_b32 v40, s0, 2 -; CHECK-NEXT: s_mov_b64 s[0:1], s[4:5] -; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0) -; CHECK-NEXT: s_sext_i32_i16 s5, s5 -; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+12 +; CHECK-NEXT: s_sext_i32_i16 s1, s1 +; CHECK-NEXT: s_add_co_u32 s0, s0, entry_fn@gotpcrel32@lo+12 ; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0) -; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+24 -; CHECK-NEXT: v_mov_b32_e32 v0, v31 -; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: s_add_co_ci_u32 s1, s1, entry_fn@gotpcrel32@hi+24 +; CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: s_mov_b64 s[2:3], s[6:7] -; CHECK-NEXT: s_mov_b64 s[6:7], s[10:11] ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_wait_kmcnt 0x0 -; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index b1d9d618302a..094ffe2b7971 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -36,9 +36,8 @@ ; GCN-O0-NEXT: amdgpu-lower-intrinsics ; GCN-O0-NEXT: cgscc(function(lower-switch ; GCN-O0-NEXT: lower-invoke -; GCN-O0-NEXT: unreachableblockelim)) -; GCN-O0-NEXT: require -; GCN-O0-NEXT: cgscc(function(amdgpu-unify-divergent-exit-nodes +; GCN-O0-NEXT: unreachableblockelim +; GCN-O0-NEXT: amdgpu-unify-divergent-exit-nodes ; GCN-O0-NEXT: fix-irreducible ; GCN-O0-NEXT: unify-loop-exits ; GCN-O0-NEXT: StructurizeCFGPass @@ -145,9 +144,8 @@ ; GCN-O2-NEXT: amdgpu-lower-intrinsics ; GCN-O2-NEXT: cgscc(function(lower-switch ; GCN-O2-NEXT: lower-invoke -; GCN-O2-NEXT: unreachableblockelim)) -; GCN-O2-NEXT: require -; GCN-O2-NEXT: cgscc(function(flatten-cfg +; GCN-O2-NEXT: unreachableblockelim +; GCN-O2-NEXT: flatten-cfg ; GCN-O2-NEXT: sink ; GCN-O2-NEXT: amdgpu-late-codegenprepare ; GCN-O2-NEXT: amdgpu-unify-divergent-exit-nodes @@ -317,9 +315,8 @@ ; GCN-O3-NEXT: amdgpu-lower-intrinsics ; GCN-O3-NEXT: cgscc(function(lower-switch ; GCN-O3-NEXT: lower-invoke -; GCN-O3-NEXT: unreachableblockelim)) -; GCN-O3-NEXT: require -; GCN-O3-NEXT: cgscc(function(flatten-cfg +; GCN-O3-NEXT: unreachableblockelim +; GCN-O3-NEXT: flatten-cfg ; GCN-O3-NEXT: sink ; GCN-O3-NEXT: amdgpu-late-codegenprepare ; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 2904ba604fb1..abb6ccc5faad 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -21,7 +21,6 @@ ; GCN-O0-NEXT:Library Function Lowering Analysis ; GCN-O0-NEXT:Assumption Cache Tracker ; GCN-O0-NEXT:Profile summary info -; GCN-O0-NEXT:Argument Register Usage Information Storage ; GCN-O0-NEXT:Create Garbage Collector Module Metadata ; GCN-O0-NEXT:Register Usage Information Storage ; GCN-O0-NEXT:Machine Branch Probability Analysis @@ -174,7 +173,6 @@ ; GCN-O1-NEXT:External Alias Analysis ; GCN-O1-NEXT:Type-Based Alias Analysis ; GCN-O1-NEXT:Scoped NoAlias Alias Analysis -; GCN-O1-NEXT:Argument Register Usage Information Storage ; GCN-O1-NEXT:Create Garbage Collector Module Metadata ; GCN-O1-NEXT:Machine Branch Probability Analysis ; GCN-O1-NEXT:Register Usage Information Storage @@ -467,7 +465,6 @@ ; GCN-O1-OPTS-NEXT:External Alias Analysis ; GCN-O1-OPTS-NEXT:Type-Based Alias Analysis ; GCN-O1-OPTS-NEXT:Scoped NoAlias Alias Analysis -; GCN-O1-OPTS-NEXT:Argument Register Usage Information Storage ; GCN-O1-OPTS-NEXT:Create Garbage Collector Module Metadata ; GCN-O1-OPTS-NEXT:Machine Branch Probability Analysis ; GCN-O1-OPTS-NEXT:Register Usage Information Storage @@ -787,7 +784,6 @@ ; GCN-O2-NEXT:External Alias Analysis ; GCN-O2-NEXT:Type-Based Alias Analysis ; GCN-O2-NEXT:Scoped NoAlias Alias Analysis -; GCN-O2-NEXT:Argument Register Usage Information Storage ; GCN-O2-NEXT:Create Garbage Collector Module Metadata ; GCN-O2-NEXT:Machine Branch Probability Analysis ; GCN-O2-NEXT:Register Usage Information Storage @@ -1112,7 +1108,6 @@ ; GCN-O3-NEXT:External Alias Analysis ; GCN-O3-NEXT:Type-Based Alias Analysis ; GCN-O3-NEXT:Scoped NoAlias Alias Analysis -; GCN-O3-NEXT:Argument Register Usage Information Storage ; GCN-O3-NEXT:Create Garbage Collector Module Metadata ; GCN-O3-NEXT:Machine Branch Probability Analysis ; GCN-O3-NEXT:Register Usage Information Storage