llvm-project/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
Matt Arsenault 8b2ca766f0 AMDGPU: Reserve v32 if we may need to copy between AGPRs on gfx908
We need to guarantee cheap copies between AGPRs, and unfortunately
gfx908 cannot directly do this. Theoretically we could set the
scavenger up with an emergency spill slot, but it also feels
unreasonable to pay that cost for what was assumed to be a simple and
cheap copy. Pick a register that doesn't conflict with any ABI
registers.

This does not address the same issue when copying from SGPR to AGPR
for gfx90a (this coincidentally fixes it for gfx908), but that's less
interesting since the register allocator shouldn't be proactively
introducing such copies.

One edge case I'm worried about is respecting the VGPR budget implied
by amdgpu-waves-per-eu. If the theoretical upper bound of a function
is 32 VGPRs, this will force the actual count to be 33.

This is also broken if inline assembly uses/defs something in v32. The
coalescer will eliminate the intermediate vreg between the def and
use, and the introduced copy will clobber the user value.

(cherry picked from commit 3335784ac2d587ff4eac04586e189532ae8b2607)
2022-02-08 11:14:52 -05:00

237 lines
7.9 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s
---
# GCN-LABEL: name: alloc_vgpr_64
# GFX908: $vgpr3_vgpr4 = GLOBAL_LOAD
name: alloc_vgpr_64
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_96
# GFX908: $vgpr3_vgpr4_vgpr5 = GLOBAL_LOAD
name: alloc_vgpr_96
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_128
# GFX908: $vgpr3_vgpr4_vgpr5_vgpr6 = GLOBAL_LOAD
name: alloc_vgpr_128
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_160
# GFX908: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMAGE_LOAD
name: alloc_vgpr_160
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_256
# GFX908: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10 = COPY
name: alloc_vgpr_256
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256 = COPY %3:sgpr_256
%4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_512
# GFX908: $vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18 = IMPLICIT_DEF
name: alloc_vgpr_512
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_512 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_vgpr_1024
# GFX908: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
name: alloc_vgpr_1024
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$vgpr2' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_1024 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_agpr_64
# GFX908: $agpr1_agpr2 = IMPLICIT_DEF
name: alloc_agpr_64
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$agpr0' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $agpr0
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_64 = IMPLICIT_DEF
%2:vreg_64 = COPY %3:areg_64
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_agpr_128
# GFX908: $agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
name: alloc_agpr_128
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$agpr0' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $agpr0
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_128 = IMPLICIT_DEF
%2:vreg_128 = COPY %3:areg_128
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_agpr_512
# GFX908: $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = IMPLICIT_DEF
name: alloc_agpr_512
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$agpr0' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $agpr0
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_512 = IMPLICIT_DEF
%2:vreg_512 = COPY %3:areg_512
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
# GCN-LABEL: name: alloc_agpr_1024
# GFX908: $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32 = IMPLICIT_DEF
name: alloc_agpr_1024
tracksRegLiveness: true
liveins:
- { reg: '$vgpr0_vgpr1' }
- { reg: '$agpr0' }
body: |
bb.0:
liveins: $vgpr0_vgpr1, $agpr0
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_1024 = IMPLICIT_DEF
%2:vreg_1024 = COPY %3:areg_1024
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...