diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 1402291539ff..bb8a80f811d4 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -496,6 +496,17 @@ public: SmallVector getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override; + + float + getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override { + // Prioritize VGPR_32_Lo256 over other classes which may occupy registers + // beyond v256. + return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) * + ((RC == &AMDGPU::VGPR_32_Lo256RegClass || + RC == &AMDGPU::VReg_64_Lo256_Align2RegClass) + ? 2.0 + : 1.0); + } }; namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index abe12c17ae76..5cff5f2248b0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -644,7 +644,7 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1 // Identical to VGPR_32 except it only contains the low 256 (Lo256) registers. def VGPR_32_Lo256 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32, (add (sequence "VGPR%u", 0, 255))> { - let AllocationPriority = 0; + let AllocationPriority = !add(3, !mul(BaseClassPriority, BaseClassScaleFactor)); let GeneratePressureSet = 0; let Size = 32; let Weight = 1; diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll b/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll index 1ac3da3b930f..eafe54ebc98f 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll +++ b/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll @@ -1,9 +1,8 @@ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -; FIXME: Scale operands of WMMA are limited to low 256 VGPRs -; currently we are spilling it because all low VGPRs are occupied even though our budget is higher. +; Scale operands of WMMA are limited to low 256 VGPRs ; Make sure we do not spill scale operands because of the low 256 restriction. -; CHECK: ; ScratchSize: 12 +; CHECK: ; ScratchSize: 0 ; CHECK: ; Occupancy: 1 define amdgpu_kernel void @spill_scale_test(float %arg, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <16 x i32> %arg8, float %arg9, <16 x i32> %arg10, float %arg11, <16 x i8> %arg12) #0 {