From 34ee487775ae3b93f241e1bf8c8e97046d998654 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Mar 2026 09:22:04 +0100 Subject: [PATCH] AMDGPU: Implememt memsize forms of isLoadFromStackSlot/isStoreToStackSlot (#188264) Requested in #182673, though I'm not sure why this needs to be pushed into targets. The size can be taken from the machine mem operand generically. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 ++++++++++++------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 23 ++++++++++--- .../si-lower-sgpr-spills-vgpr-lanes-usage.mir | 3 -- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5086c553da10..df2700d41489 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9748,8 +9748,8 @@ bool SIInstrInfo::isHighLatencyDef(int Opc) const { (isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc) || isFLAT(Opc)); } -Register SIInstrInfo::isStackAccess(const MachineInstr &MI, - int &FrameIndex) const { +Register SIInstrInfo::isStackAccess(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const { const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); if (!Addr || !Addr->isFI()) return Register(); @@ -9758,41 +9758,51 @@ Register SIInstrInfo::isStackAccess(const MachineInstr &MI, (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); - return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); + + int VDataIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); + MemBytes = TypeSize::getFixed(getOpSize(MI.getOpcode(), VDataIdx)); + return MI.getOperand(VDataIdx).getReg(); } -Register SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, - int &FrameIndex) const { +Register SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const { const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); assert(Addr && Addr->isFI()); FrameIndex = Addr->getIndex(); - return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); + + int DataIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::data); + MemBytes = TypeSize::getFixed(getOpSize(MI.getOpcode(), DataIdx)); + return MI.getOperand(DataIdx).getReg(); } Register SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const { + int &FrameIndex, + TypeSize &MemBytes) const { if (!MI.mayLoad()) return Register(); if (isMUBUF(MI) || isVGPRSpill(MI)) - return isStackAccess(MI, FrameIndex); + return isStackAccess(MI, FrameIndex, MemBytes); if (isSGPRSpill(MI)) - return isSGPRStackAccess(MI, FrameIndex); + return isSGPRStackAccess(MI, FrameIndex, MemBytes); return Register(); } Register SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const { + int &FrameIndex, + TypeSize &MemBytes) const { if (!MI.mayStore()) return Register(); if (isMUBUF(MI) || isVGPRSpill(MI)) - return isStackAccess(MI, FrameIndex); + return isStackAccess(MI, FrameIndex, MemBytes); if (isSGPRSpill(MI)) - return isSGPRStackAccess(MI, FrameIndex); + return isSGPRStackAccess(MI, FrameIndex, MemBytes); return Register(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 93d28d22bfd1..cc0b0408bc09 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1556,13 +1556,28 @@ public: return get(pseudoToMCOpcode(Opcode)); } - Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const; - Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; + Register isStackAccess(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const; + Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const; Register isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; + int &FrameIndex) const override { + TypeSize MemBytes = TypeSize::getZero(); + return isLoadFromStackSlot(MI, FrameIndex, MemBytes); + } + + Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const override; + Register isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; + int &FrameIndex) const override { + TypeSize MemBytes = TypeSize::getZero(); + return isStoreToStackSlot(MI, FrameIndex, MemBytes); + } + + Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, + TypeSize &MemBytes) const override; unsigned getInstBundleSize(const MachineInstr &MI) const; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir index 99621bccf06a..1ffef8e60d90 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir @@ -42,7 +42,6 @@ body: | ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4 ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0 ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr0_sgpr1 ; SGPR_SPILLED-NEXT: $sgpr1 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3 @@ -108,7 +107,6 @@ body: | ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4 ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0 ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: $sgpr2 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1, implicit-def $sgpr2_sgpr3 ; SGPR_SPILLED-NEXT: $sgpr3 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 2 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]] @@ -171,7 +169,6 @@ body: | ; SGPR_SPILLED-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4 ; SGPR_SPILLED-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $vgpr0 ; SGPR_SPILLED-NEXT: $sgpr32 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3