[AMDGPU] Remove AMDGPUArgumentUsageInfo pass (#182490)

`AMDGPUArgumentUsageInfo` provided a per-function map that
`lowerFormalArguments` would write each function's implicit argument
register layout into, and `passSpecialInputs` would read back when
lowering calls to look up the callee's layout. This per-function map is
redundant for all non-entry callees, which already use the same
`FixedABIFunctionInfo` register layout.

GlobalISel already used `FixedABIFunctionInfo` unconditionally. This
change makes SelectionDAG do the same.
This commit is contained in:
Dark Steve 2026-02-23 18:47:01 +05:30 committed by GitHub
parent eac18e783f
commit 9e6a6be8a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 41 additions and 222 deletions

View File

@ -536,8 +536,6 @@ void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
ImmutablePass *createAMDGPUExternalAAWrapperPass();
void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &);
ModulePass *createAMDGPUExportKernelRuntimeHandlesLegacyPass();
void initializeAMDGPUExportKernelRuntimeHandlesLegacyPass(PassRegistry &);
extern char &AMDGPUExportKernelRuntimeHandlesLegacyID;

View File

@ -11,17 +11,11 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
INITIALIZE_PASS(AMDGPUArgumentUsageInfoWrapperLegacy, DEBUG_TYPE,
"Argument Register Usage Information Storage", false, true)
void ArgDescriptor::print(raw_ostream &OS,
const TargetRegisterInfo *TRI) const {
if (!isSet()) {
@ -42,46 +36,9 @@ void ArgDescriptor::print(raw_ostream &OS,
OS << '\n';
}
char AMDGPUArgumentUsageInfoWrapperLegacy::ID = 0;
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
// Hardcoded registers from fixed function ABI
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
= AMDGPUFunctionArgInfo::fixedABILayout();
// TODO: Print preload kernargs?
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
for (const auto &FI : ArgInfoMap) {
OS << "Arguments for " << FI.first->getName() << '\n'
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
<< " DispatchPtr: " << FI.second.DispatchPtr
<< " QueuePtr: " << FI.second.QueuePtr
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
<< " DispatchID: " << FI.second.DispatchID
<< " FlatScratchInit: " << FI.second.FlatScratchInit
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
<< " LDSKernelId: " << FI.second.LDSKernelId
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
<< " WorkItemIDX " << FI.second.WorkItemIDX
<< " WorkItemIDY " << FI.second.WorkItemIDY
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
<< '\n';
}
}
bool AMDGPUArgumentUsageInfo::invalidate(Module &M, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
auto PAC = PA.getChecker<AMDGPUArgumentUsageAnalysis>();
return !PAC.preservedWhenStateless();
}
const AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::FixedABIFunctionInfo =
AMDGPUFunctionArgInfo::fixedABILayout();
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
AMDGPUFunctionArgInfo::getPreloadedValue(
@ -180,18 +137,3 @@ AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
return AI;
}
const AMDGPUFunctionArgInfo &
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
auto I = ArgInfoMap.find(&F);
if (I == ArgInfoMap.end())
return FixedABIFunctionInfo;
return I->second;
}
AnalysisKey AMDGPUArgumentUsageAnalysis::Key;
AMDGPUArgumentUsageInfo
AMDGPUArgumentUsageAnalysis::run(Module &M, ModuleAnalysisManager &) {
return AMDGPUArgumentUsageInfo();
}

View File

@ -10,16 +10,11 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include <variant>
namespace llvm {
class Function;
class LLT;
class raw_ostream;
class TargetRegisterClass;
@ -28,7 +23,6 @@ class TargetRegisterInfo;
struct ArgDescriptor {
private:
friend struct AMDGPUFunctionArgInfo;
friend class AMDGPUArgumentUsageInfo;
std::variant<std::monostate, MCRegister, unsigned> Val;
@ -168,69 +162,7 @@ struct AMDGPUFunctionArgInfo {
getPreloadedValue(PreloadedValue Value) const;
static AMDGPUFunctionArgInfo fixedABILayout();
};
class AMDGPUArgumentUsageInfo {
private:
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
public:
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
void print(raw_ostream &OS, const Module *M = nullptr) const;
void clear() { ArgInfoMap.clear(); }
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
ArgInfoMap[&F] = ArgInfo;
}
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
bool invalidate(Module &M, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &Inv);
};
class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
std::unique_ptr<AMDGPUArgumentUsageInfo> AUIP;
public:
static char ID;
AMDGPUArgumentUsageInfoWrapperLegacy() : ImmutablePass(ID) {}
AMDGPUArgumentUsageInfo &getArgUsageInfo() { return *AUIP; }
const AMDGPUArgumentUsageInfo &getArgUsageInfo() const { return *AUIP; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool doInitialization(Module &M) override {
AUIP = std::make_unique<AMDGPUArgumentUsageInfo>();
return false;
}
bool doFinalization(Module &M) override {
AUIP->clear();
return false;
}
void print(raw_ostream &OS, const Module *M = nullptr) const override {
AUIP->print(OS, M);
}
};
class AMDGPUArgumentUsageAnalysis
: public AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis> {
friend AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis>;
static AnalysisKey Key;
public:
using Result = AMDGPUArgumentUsageInfo;
AMDGPUArgumentUsageInfo run(Module &M, ModuleAnalysisManager &);
};
} // end namespace llvm

View File

@ -790,8 +790,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
if (!Info.CB)
return true;
const AMDGPUFunctionArgInfo *CalleeArgInfo
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
const AMDGPUFunctionArgInfo &CalleeArgInfo =
AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUFunctionArgInfo &CallerArgInfo = MFI->getArgInfo();
@ -841,7 +841,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
continue;
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
CalleeArgInfo.getPreloadedValue(InputID);
if (!OutgoingArg)
continue;
@ -888,13 +888,13 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
LLT ArgTy;
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
if (!OutgoingArg)
return false;
@ -917,7 +917,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
// If incoming ids are not packed we need to pack them.
// FIXME: Should consider known workgroup size to eliminate known 0 cases.
Register InputReg;
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX &&
NeedWorkItemIDX) {
if (ST.getMaxWorkitemID(MF.getFunction(), 0) != 0) {
InputReg = MRI.createGenericVirtualRegister(S32);
@ -929,7 +929,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY &&
NeedWorkItemIDY && ST.getMaxWorkitemID(MF.getFunction(), 1) != 0) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->buildLoadInputValue(Y, MIRBuilder, IncomingArgY,
@ -939,7 +939,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ &&
NeedWorkItemIDZ && ST.getMaxWorkitemID(MF.getFunction(), 2) != 0) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->buildLoadInputValue(Z, MIRBuilder, IncomingArgZ,

View File

@ -134,7 +134,6 @@ static SDValue stripExtractLoElt(SDValue In) {
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false,
false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfoWrapperLegacy)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
@ -238,7 +237,6 @@ bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
}
void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AMDGPUArgumentUsageInfoWrapperLegacy>();
AU.addRequired<UniformityInfoWrapperPass>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();

View File

@ -13,12 +13,6 @@
// NOTE: NO INCLUDE GUARD DESIRED!
#ifndef MODULE_ANALYSIS
#define MODULE_ANALYSIS(NAME, CREATE_PASS)
#endif
MODULE_ANALYSIS("amdgpu-argument-usage", AMDGPUArgumentUsageAnalysis())
#undef MODULE_ANALYSIS
#ifndef MODULE_PASS
#define MODULE_PASS(NAME, CREATE_PASS)
#endif

View File

@ -17,7 +17,6 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUBarrierLatency.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
@ -642,7 +641,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerExecSyncLegacyPass(*PR);
initializeAMDGPUSwLowerLDSLegacyPass(*PR);
initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR);
initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(*PR);
initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPUPromoteKernelArgumentsPass(*PR);
@ -2289,11 +2287,6 @@ void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(
void AMDGPUCodeGenPassBuilder::addPreISel(PassManagerWrapper &PMW) const {
// Require AMDGPUArgumentUsageAnalysis so that it's available during ISel.
flushFPMsToMPM(PMW);
addModulePass(RequireAnalysisPass<AMDGPUArgumentUsageAnalysis, Module>(),
PMW);
if (TM.getOptLevel() > CodeGenOptLevel::None) {
addFunctionPass(FlattenCFGPass(), PMW);
addFunctionPass(SinkingPass(), PMW);

View File

@ -35,7 +35,6 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/CodeGen/PseudoSourceValueManager.h"
#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/DiagnosticInfo.h"
@ -3668,19 +3667,6 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsEntryFunc)
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
if (DAG.getPass()) {
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
ArgUsageInfo.getArgUsageInfo().setFuncArgInfo(Fn, Info->getArgInfo());
} else if (auto *MFAM = DAG.getMFAM()) {
Module &M = *MF.getFunction().getParent();
auto *ArgUsageInfo =
MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
.getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
if (ArgUsageInfo)
ArgUsageInfo->setFuncArgInfo(Fn, Info->getArgInfo());
}
unsigned StackArgSize = CCInfo.getStackSize();
Info->setBytesInStackArgArea(StackArgSize);
@ -3890,24 +3876,8 @@ void SITargetLowering::passSpecialInputs(
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();
const AMDGPUFunctionArgInfo *CalleeArgInfo =
&AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
if (DAG.getPass()) {
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfoWrapperLegacy>();
CalleeArgInfo =
&ArgUsageInfo.getArgUsageInfo().lookupFuncArgInfo(*CalleeFunc);
} else if (auto *MFAM = DAG.getMFAM()) {
Module &M = *DAG.getMachineFunction().getFunction().getParent();
auto *ArgUsageInfo =
MFAM->getResult<ModuleAnalysisManagerMachineFunctionProxy>(
DAG.getMachineFunction())
.getCachedResult<AMDGPUArgumentUsageAnalysis>(M);
if (ArgUsageInfo)
CalleeArgInfo = &ArgUsageInfo->lookupFuncArgInfo(*CalleeFunc);
}
}
const AMDGPUFunctionArgInfo &CalleeArgInfo =
AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
@ -3934,7 +3904,7 @@ void SITargetLowering::passSpecialInputs(
continue;
const auto [OutgoingArg, ArgRC, ArgTy] =
CalleeArgInfo->getPreloadedValue(InputID);
CalleeArgInfo.getPreloadedValue(InputID);
if (!OutgoingArg)
continue;
@ -3983,13 +3953,13 @@ void SITargetLowering::passSpecialInputs(
// packed.
auto [OutgoingArg, ArgRC, Ty] =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
if (!OutgoingArg)
std::tie(OutgoingArg, ArgRC, Ty) =
CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
CalleeArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
if (!OutgoingArg)
return;
@ -4008,7 +3978,7 @@ void SITargetLowering::passSpecialInputs(
const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
// If incoming ids are not packed we need to pack them.
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo.WorkItemIDX &&
NeedWorkItemIDX) {
if (Subtarget->getMaxWorkitemID(F, 0) != 0) {
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
@ -4017,7 +3987,7 @@ void SITargetLowering::passSpecialInputs(
}
}
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo.WorkItemIDY &&
NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
@ -4027,7 +3997,7 @@ void SITargetLowering::passSpecialInputs(
: Y;
}
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo.WorkItemIDZ &&
NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,

View File

@ -109,7 +109,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
} else if (!isEntryFunction()) {
if (CC != CallingConv::AMDGPU_Gfx &&
CC != CallingConv::AMDGPU_Gfx_WholeWave)
ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
ArgInfo = AMDGPUFunctionArgInfo::FixedABIFunctionInfo;
FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;

View File

@ -1,17 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck %s
define amdgpu_kernel void @entry_fn() {
; CHECK-LABEL: entry_fn:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_mov_b64 s[8:9], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_sext_i32_i16 s5, s5
; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+8
; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+16
; CHECK-NEXT: v_mov_b32_e32 v31, v0
; CHECK-NEXT: s_load_b64 s[12:13], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[10:11], s[6:7]
; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1]
; CHECK-NEXT: s_mov_b64 s[6:7], s[2:3]
; CHECK-NEXT: s_mov_b32 s32, 0
; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_swappc_b64 s[30:31], s[12:13]
; CHECK-NEXT: s_endpgm
entry:
call void @entry_fn()
@ -34,22 +39,17 @@ define void @caller() {
; CHECK-NEXT: s_mov_b32 exec_lo, s1
; CHECK-NEXT: s_add_co_i32 s32, s32, 16
; CHECK-NEXT: v_writelane_b32 v40, s0, 2
; CHECK-NEXT: s_mov_b64 s[0:1], s[4:5]
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_getpc_b64 s[0:1]
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
; CHECK-NEXT: s_sext_i32_i16 s5, s5
; CHECK-NEXT: s_add_co_u32 s4, s4, entry_fn@gotpcrel32@lo+12
; CHECK-NEXT: s_sext_i32_i16 s1, s1
; CHECK-NEXT: s_add_co_u32 s0, s0, entry_fn@gotpcrel32@lo+12
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
; CHECK-NEXT: s_add_co_ci_u32 s5, s5, entry_fn@gotpcrel32@hi+24
; CHECK-NEXT: v_mov_b32_e32 v0, v31
; CHECK-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; CHECK-NEXT: s_add_co_ci_u32 s1, s1, entry_fn@gotpcrel32@hi+24
; CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: s_mov_b64 s[2:3], s[6:7]
; CHECK-NEXT: s_mov_b64 s[6:7], s[10:11]
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0

View File

@ -36,9 +36,8 @@
; GCN-O0-NEXT: amdgpu-lower-intrinsics
; GCN-O0-NEXT: cgscc(function(lower-switch
; GCN-O0-NEXT: lower-invoke
; GCN-O0-NEXT: unreachableblockelim))
; GCN-O0-NEXT: require<amdgpu-argument-usage>
; GCN-O0-NEXT: cgscc(function(amdgpu-unify-divergent-exit-nodes
; GCN-O0-NEXT: unreachableblockelim
; GCN-O0-NEXT: amdgpu-unify-divergent-exit-nodes
; GCN-O0-NEXT: fix-irreducible
; GCN-O0-NEXT: unify-loop-exits
; GCN-O0-NEXT: StructurizeCFGPass
@ -145,9 +144,8 @@
; GCN-O2-NEXT: amdgpu-lower-intrinsics
; GCN-O2-NEXT: cgscc(function(lower-switch
; GCN-O2-NEXT: lower-invoke
; GCN-O2-NEXT: unreachableblockelim))
; GCN-O2-NEXT: require<amdgpu-argument-usage>
; GCN-O2-NEXT: cgscc(function(flatten-cfg
; GCN-O2-NEXT: unreachableblockelim
; GCN-O2-NEXT: flatten-cfg
; GCN-O2-NEXT: sink
; GCN-O2-NEXT: amdgpu-late-codegenprepare
; GCN-O2-NEXT: amdgpu-unify-divergent-exit-nodes
@ -317,9 +315,8 @@
; GCN-O3-NEXT: amdgpu-lower-intrinsics
; GCN-O3-NEXT: cgscc(function(lower-switch
; GCN-O3-NEXT: lower-invoke
; GCN-O3-NEXT: unreachableblockelim))
; GCN-O3-NEXT: require<amdgpu-argument-usage>
; GCN-O3-NEXT: cgscc(function(flatten-cfg
; GCN-O3-NEXT: unreachableblockelim
; GCN-O3-NEXT: flatten-cfg
; GCN-O3-NEXT: sink
; GCN-O3-NEXT: amdgpu-late-codegenprepare
; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes

View File

@ -21,7 +21,6 @@
; GCN-O0-NEXT:Library Function Lowering Analysis
; GCN-O0-NEXT:Assumption Cache Tracker
; GCN-O0-NEXT:Profile summary info
; GCN-O0-NEXT:Argument Register Usage Information Storage
; GCN-O0-NEXT:Create Garbage Collector Module Metadata
; GCN-O0-NEXT:Register Usage Information Storage
; GCN-O0-NEXT:Machine Branch Probability Analysis
@ -174,7 +173,6 @@
; GCN-O1-NEXT:External Alias Analysis
; GCN-O1-NEXT:Type-Based Alias Analysis
; GCN-O1-NEXT:Scoped NoAlias Alias Analysis
; GCN-O1-NEXT:Argument Register Usage Information Storage
; GCN-O1-NEXT:Create Garbage Collector Module Metadata
; GCN-O1-NEXT:Machine Branch Probability Analysis
; GCN-O1-NEXT:Register Usage Information Storage
@ -467,7 +465,6 @@
; GCN-O1-OPTS-NEXT:External Alias Analysis
; GCN-O1-OPTS-NEXT:Type-Based Alias Analysis
; GCN-O1-OPTS-NEXT:Scoped NoAlias Alias Analysis
; GCN-O1-OPTS-NEXT:Argument Register Usage Information Storage
; GCN-O1-OPTS-NEXT:Create Garbage Collector Module Metadata
; GCN-O1-OPTS-NEXT:Machine Branch Probability Analysis
; GCN-O1-OPTS-NEXT:Register Usage Information Storage
@ -787,7 +784,6 @@
; GCN-O2-NEXT:External Alias Analysis
; GCN-O2-NEXT:Type-Based Alias Analysis
; GCN-O2-NEXT:Scoped NoAlias Alias Analysis
; GCN-O2-NEXT:Argument Register Usage Information Storage
; GCN-O2-NEXT:Create Garbage Collector Module Metadata
; GCN-O2-NEXT:Machine Branch Probability Analysis
; GCN-O2-NEXT:Register Usage Information Storage
@ -1112,7 +1108,6 @@
; GCN-O3-NEXT:External Alias Analysis
; GCN-O3-NEXT:Type-Based Alias Analysis
; GCN-O3-NEXT:Scoped NoAlias Alias Analysis
; GCN-O3-NEXT:Argument Register Usage Information Storage
; GCN-O3-NEXT:Create Garbage Collector Module Metadata
; GCN-O3-NEXT:Machine Branch Probability Analysis
; GCN-O3-NEXT:Register Usage Information Storage