Aaditya 1d6607d5ef
[AMDGPU] Remove alignment constraint from spill pseudos (#177317)
Spill pseudo opcodes don't require target reg class alignment
constraint.
For targets which do have alignment constraints, lower the spills to
32-bit accesses.
Update the machine verifier accordingly.
Sgpr spill pseudos didn't enforce alignment constraints.
Modify vgpr spills reg class to not enforce them either.
2026-03-09 09:33:23 +05:30

451 lines
30 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX900 %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX942 %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX12,GFX1200 %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX12,GFX1250 %s
---
name: spill_v32
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0
; GFX900-LABEL: name: spill_v32
; GFX900: liveins: $vgpr0
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: S_NOP 0, implicit $vgpr0
;
; GFX942-LABEL: name: spill_v32
; GFX942: liveins: $vgpr0
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX942-NEXT: S_NOP 0, implicit $vgpr0
;
; GFX12-LABEL: name: spill_v32
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX12-NEXT: S_NOP 0, implicit $vgpr0
SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
S_NOP 0, implicit $vgpr0
...
---
name: spill_v32_kill
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0
; GFX900-LABEL: name: spill_v32_kill
; GFX900: liveins: $vgpr0
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
;
; GFX942-LABEL: name: spill_v32_kill
; GFX942: liveins: $vgpr0
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
;
; GFX12-LABEL: name: spill_v32_kill
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
...
---
name: spill_v64
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX900-LABEL: name: spill_v64
; GFX900: liveins: $vgpr0_vgpr1
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1
;
; GFX942-LABEL: name: spill_v64
; GFX942: liveins: $vgpr0_vgpr1
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1
;
; GFX12-LABEL: name: spill_v64
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
; GFX12-NEXT: S_NOP 0, implicit $vgpr0_vgpr1
SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
S_NOP 0, implicit $vgpr0_vgpr1
...
---
name: spill_v64_kill
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0_vgpr1
; GFX900-LABEL: name: spill_v64_kill
; GFX900: liveins: $vgpr0_vgpr1
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
;
; GFX942-LABEL: name: spill_v64_kill
; GFX942: liveins: $vgpr0_vgpr1
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
;
; GFX12-LABEL: name: spill_v64_kill
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
...
# Make sure there's no verifier error on the undef spill component when the value is killed.
---
name: spill_v64_undef_sub1_killed
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0
; GFX900-LABEL: name: spill_v64_undef_sub1_killed
; GFX900: liveins: $vgpr0
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
;
; GFX942-LABEL: name: spill_v64_undef_sub1_killed
; GFX942: liveins: $vgpr0
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
;
; GFX12-LABEL: name: spill_v64_undef_sub1_killed
; GFX12: liveins: $vgpr0
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
...
---
name: spill_v64_undef_sub0_killed
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr1
; GFX900-LABEL: name: spill_v64_undef_sub0_killed
; GFX900: liveins: $vgpr1
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
;
; GFX942-LABEL: name: spill_v64_undef_sub0_killed
; GFX942: liveins: $vgpr1
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
;
; GFX12-LABEL: name: spill_v64_undef_sub0_killed
; GFX12: liveins: $vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
...
---
name: spill_v128_kill
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-LABEL: name: spill_v128_kill
; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
;
; GFX942-LABEL: name: spill_v128_kill
; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
;
; GFX12-LABEL: name: spill_v128_kill
; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...
---
name: spill_v32_undef
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
; CHECK-LABEL: name: spill_v32_undef
; CHECK: S_NOP 0, implicit undef $vgpr0
SI_SPILL_V32_SAVE undef $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
S_NOP 0, implicit undef $vgpr0
...
---
name: spill_v64_undef
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
; CHECK-LABEL: name: spill_v64_undef
; CHECK: S_NOP 0, implicit undef $vgpr0_vgpr1
SI_SPILL_V64_SAVE undef $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
S_NOP 0, implicit undef $vgpr0_vgpr1
...
---
name: spill_v128_kill_unaligned
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX900-LABEL: name: spill_v128_kill_unaligned
; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
;
; GFX942-LABEL: name: spill_v128_kill_unaligned
; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
;
; GFX1200-LABEL: name: spill_v128_kill_unaligned
; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX1200-NEXT: {{ $}}
; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
;
; GFX1250-LABEL: name: spill_v128_kill_unaligned
; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX1250-NEXT: {{ $}}
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
SI_SPILL_V128_SAVE killed $vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...
---
name: spill_v128_unaligned
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX900-LABEL: name: spill_v128_unaligned
; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
;
; GFX942-LABEL: name: spill_v128_unaligned
; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
;
; GFX1200-LABEL: name: spill_v128_unaligned
; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX1200-NEXT: {{ $}}
; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
;
; GFX1250-LABEL: name: spill_v128_unaligned
; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4
; GFX1250-NEXT: {{ $}}
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
SI_SPILL_V128_SAVE $vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...
---
name: spill_v256_aligned
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-LABEL: name: spill_v256_aligned
; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
;
; GFX942-LABEL: name: spill_v256_aligned
; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
;
; GFX12-LABEL: name: spill_v256_aligned
; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
SI_SPILL_V256_SAVE $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...
---
name: spill_v256_unaligned
tracksRegLiveness: true
stack:
- { id: 0, type: spill-slot, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
body: |
bb.0:
liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX900-LABEL: name: spill_v256_unaligned
; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
;
; GFX942-LABEL: name: spill_v256_unaligned
; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX942-NEXT: {{ $}}
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr6, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr7, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr8, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
;
; GFX1200-LABEL: name: spill_v256_unaligned
; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX1200-NEXT: {{ $}}
; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr5_vgpr6_vgpr7_vgpr8, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
;
; GFX1250-LABEL: name: spill_v256_unaligned
; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
; GFX1250-NEXT: {{ $}}
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr6, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr7, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr8, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
SI_SPILL_V256_SAVE $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
...