Mark the memory operand of spill load/stores as MOThreadPrivate, so that these loads and stores are emitted with `nv` set. The reason is that scratch memory used by spills will never be shared by another thread. It's purely thread local and thus a good fit for the `nv` bit, which is controlled by the MOThreadPrivate flag.
44 lines
4.8 KiB
YAML
44 lines
4.8 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass prologepilog,machine-cp -o - %s | FileCheck -check-prefix=GFX942 %s
|
|
|
|
--- |
|
|
define amdgpu_kernel void @agpr_spill_copy() #0 { ret void }
|
|
|
|
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
|
|
---
|
|
name: agpr_spill_copy
|
|
tracksRegLiveness: true
|
|
stack:
|
|
- { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 }
|
|
machineFunctionInfo:
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
hasSpilledVGPRs: true
|
|
body: |
|
|
bb.0:
|
|
; GFX942-LABEL: name: agpr_spill_copy
|
|
; GFX942: liveins: $agpr30, $agpr31
|
|
; GFX942-NEXT: {{ $}}
|
|
; GFX942-NEXT: renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
|
|
; GFX942-NEXT: renamable $agpr28_agpr29 = IMPLICIT_DEF
|
|
; GFX942-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
|
|
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
|
|
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
|
|
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
|
|
; GFX942-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
|
; GFX942-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
|
; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr12_vgpr13, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 48, align 4, addrspace 5)
|
|
; GFX942-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
|
|
; GFX942-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
|
|
; GFX942-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
|
|
; GFX942-NEXT: $agpr15 = COPY $agpr30, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
|
; GFX942-NEXT: $agpr14 = COPY $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
|
; GFX942-NEXT: $agpr12_agpr13 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 48, align 4, addrspace 5)
|
|
; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
|
renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
|
|
renamable $agpr28_agpr29 = IMPLICIT_DEF
|
|
renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
|
|
SI_SPILL_AV512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
|
|
renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
|
|
S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
|
...
|