llvm-project/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
Ivan Kosarev 21c1ba16ed
[TableGen] Complete the support for artificial registers (#183371)
Artificial registers were added in
eb0c510ecde667cd911682cc1e855f73f341d134
as a means of giving super-registers heavier weights than that
of their subregisters, even when they only contain a single
physical subregister.

Artifical registers thus do exist in code and participate in
register unit weight calculations, but are not supposed to be
available for register allocation.

This patch completes the support for artificial registers to:

- Ignore artificial registers when joining register unit uber
  sets. Artificial registers may be members of classes that
  together include registers and their sub-registers, making it
  impossible to compute normalised weights for uber sets they
  belong to.

  We have a use case downstream relying on this being supported,
  which allows to avoid introducing a large number of additional
  register classes.

- Not generate purely artificial register class intersections.
  It is critical not to have such classes, as the common LLVM
  codegen infrastructure will try to use them to constrain
  classes of virtual registers instead of producing COPYs
  whenever both the source and target register classes contain
  the same artificial registers.

- Not generate sub-classes where classes with the same
  non-artificial members already exist. This is mostly for
  convenience. For example, the HI16-capable subset of AMDGPU's
  AV_32 is VGPR_32, except VGPR_32 also contains the artificial
  staging registers. If the staging registers are not ignored,
  we'll end up having an additional generated register class,
  AV_32_with_hi16_in_VGPR_16, -- harmless, but also useless.

Eliminates a few inferred AMDGPU register classes:
    - VS_32_with_hi16
    - VS_32_Lo256_with_hi16
    - VS_32_Lo128_with_hi16
    - VRegOrLds_32_and_VS_32_Lo256
    - VRegOrLds_32_and_VS_32_Lo128
    - SRegOrLds_32_and_VRegOrLds_32

Causes no register class changes for other targets.
2026-03-04 13:33:26 +00:00

31 lines
2.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -stop-after=greedy,1 -o - %s | FileCheck -check-prefix=GCN %s
; Convert AV spills into VGPR spills by introducing appropriate copies in between.
define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN-LABEL: name: test_spill_av_class
; GCN: bb.0 (%ir-block.0):
; GCN-NEXT: liveins: $sgpr8_sgpr9
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
; GCN-NEXT: [[AV_MOV_1:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 1179658 /* regdef:VGPR_32 */, def undef %14.sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %24:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:VReg_64 */, %14
; GCN-NEXT: S_ENDPGM 0
%v0 = call i32 asm sideeffect "; def $0", "=v"()
%tmp = insertelement <2 x i32> poison, i32 %v0, i32 0
%mai = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %arg, i32 0, i32 0, i32 0)
store volatile <4 x i32> %mai, ptr addrspace(1) poison
call void asm sideeffect "; use $0", "v"(<2 x i32> %tmp);
ret void
}
declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }