llvm-project/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
Dark Steve cc19f420b9
[AMDGPU][NPM] Port AMDGPUArgumentUsageInfo to NPM (#170886)
Port AMDGPUArgumentUsageInfo analysis to the NPM to fix suboptimal code
generation when NPM is enabled by default.

Previously, DAG.getPass() returns nullptr when using NPM, causing the
argument usage info to be unavailable during ISel. This resulted in
fallback to FixedABIFunctionInfo which assumes all implicit arguments
are needed, generating unnecessary register setup code for entry
functions.

Fixes LLVM::CodeGen/AMDGPU/cc-entry.ll

Changes:
- Split AMDGPUArgumentUsageInfo into a data class and NPM analysis
wrapper
- Update SIISelLowering to use DAG.getMFAM() for NPM path
- Add RequireAnalysisPass in addPreISel() to ensure analysis
availability

This follows the same pattern used for PhysicalRegisterUsageInfo.
2025-12-08 20:38:00 +05:30

198 lines
8.5 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPU.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
INITIALIZE_PASS(AMDGPUArgumentUsageInfoWrapperLegacy, DEBUG_TYPE,
"Argument Register Usage Information Storage", false, true)
void ArgDescriptor::print(raw_ostream &OS,
const TargetRegisterInfo *TRI) const {
if (!isSet()) {
OS << "<not set>\n";
return;
}
if (isRegister())
OS << "Reg " << printReg(getRegister(), TRI);
else
OS << "Stack offset " << getStackOffset();
if (isMasked()) {
OS << " & ";
llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
}
OS << '\n';
}
char AMDGPUArgumentUsageInfoWrapperLegacy::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
// Hardcoded registers from fixed function ABI
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
= AMDGPUFunctionArgInfo::fixedABILayout();
// TODO: Print preload kernargs?
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
OS << "Arguments for " << FI.first->getName() << '\n'
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
<< " DispatchPtr: " << FI.second.DispatchPtr
<< " QueuePtr: " << FI.second.QueuePtr
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
<< " DispatchID: " << FI.second.DispatchID
<< " FlatScratchInit: " << FI.second.FlatScratchInit
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
<< " LDSKernelId: " << FI.second.LDSKernelId
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
<< " WorkItemIDX " << FI.second.WorkItemIDX
<< " WorkItemIDY " << FI.second.WorkItemIDY
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
<< '\n';
}
}
bool AMDGPUArgumentUsageInfo::invalidate(Module &M, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
auto PAC = PA.getChecker<AMDGPUArgumentUsageAnalysis>();
return !PAC.preservedWhenStateless();
}
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
switch (Value) {
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
return std::tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
&AMDGPU::SGPR_128RegClass, LLT::fixed_vector(4, 32));
}
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
return std::tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
return std::tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
return std::tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z:
case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID:
return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
return std::tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::DISPATCH_ID:
return std::tuple(DispatchID ? &DispatchID : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
return std::tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
return std::tuple(DispatchPtr ? &DispatchPtr : nullptr,
&AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::QUEUE_PTR:
return std::tuple(QueuePtr ? &QueuePtr : nullptr, &AMDGPU::SGPR_64RegClass,
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
return std::tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
return std::tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
return std::tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
}
llvm_unreachable("unexpected preloaded value type");
}
AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AMDGPUFunctionArgInfo AI;
AI.PrivateSegmentBuffer
= ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
// Do not pass kernarg segment pointer, only pass increment version in its
// place.
AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
// Skip FlatScratchInit/PrivateSegmentSize
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15);
const unsigned Mask = 0x3ff;
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
return AI;
}
const AMDGPUFunctionArgInfo &
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
if (I == ArgInfoMap.end())
return FixedABIFunctionInfo;
return I->second;
}
AnalysisKey AMDGPUArgumentUsageAnalysis::Key;
AMDGPUArgumentUsageInfo
AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) {
return AMDGPUArgumentUsageInfo();
}