
Extension of https://reviews.llvm.org/D141101 to remove the define flag from the last stack memory access. Fixes case where COPY instructions are used for some of the stack restoration, but the copies get optimized away during the machine-cp pass. Prior to this change, was possible to produce the following code: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_ST 64, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.17, align 4, addrspace 5) $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_ST 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.17 + 16, align 4, addrspace 5) $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_ST 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.17 + 32, align 4, addrspace 5) $agpr31 = COPY $agpr112, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 $agpr30 = COPY $agpr208, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 $agpr28_agpr29 = SCRATCH_LOAD_DWORDX2_ST 112, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s64) from %stack.17 + 48, align 4, addrspace 5) where `$agpr30 = COPY $agpr208` would be optimized away by `machine-cp` pass. Instead, change to: $agpr28_agpr29 = SCRATCH_LOAD_DWORDX2_ST 112, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.17 + 48, align 4, addrspace 5) Fixes #131386. Made the simple fix, but I'm not completely comfortable with this change since the reason for the previous inclusion of `IsLastSubReg` is unclear to me. @krzysz00
145 lines
12 KiB
YAML
145 lines
12 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -start-before=greedy,1 -stop-after=prologepilog -verify-machineinstrs -verify-regalloc -o - %s | FileCheck --check-prefixes=GCN %s
|
|
|
|
# The VGPR pair spilled and restored around the callsite is used in the next basic block.
|
|
#
|
|
# AMDGPU target spill hooks storeRegToStackSlot/loadRegFromStackSlot handle the register spills via
|
|
# spill pseudos to insert a single instruction per spill to tackle the limitation during inline spiller
|
|
# that incorrectly updates the Liveintervals in case of a spill lowered into multiple instructions.
|
|
# AV spills were handled earlier by converting them into equivalent VGPR spills by adding appropriate copies.
|
|
# The multiple instructions (a copy + vgpr spill pseudo) introduced an incorrect liverange that caused a
|
|
# crash during RA. It is fixed by introducing AV* spill pseudos to ensure a single instruction per spill and
|
|
# the test started compiling successfully.
|
|
|
|
---
|
|
name: test_av_spill_cross_bb_usage
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
adjustsStack: true
|
|
stack:
|
|
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
; GCN-LABEL: name: test_av_spill_cross_bb_usage
|
|
; GCN: bb.0:
|
|
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
|
|
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
|
|
; GCN-NEXT: renamable $vgpr44 = COPY $vgpr13, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr43 = COPY $vgpr12, implicit $exec
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit undef $scc
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: successors: %bb.2(0x80000000)
|
|
; GCN-NEXT: liveins: $exec, $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr40, $sgpr30_sgpr31, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000F, $vgpr41_vgpr42:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F, $vgpr45_vgpr46:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $vgpr57 = COPY $vgpr9, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr56 = COPY $vgpr8, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr59 = COPY $vgpr7, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr58 = COPY $vgpr6, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr61 = COPY $vgpr5, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr60 = COPY $vgpr4, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr42 = COPY $vgpr3, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr41 = COPY $vgpr2, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
|
|
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
|
|
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
|
|
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
|
|
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
|
|
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5)
|
|
; GCN-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
|
|
; GCN-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5)
|
|
; GCN-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
|
|
; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5)
|
|
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.2 + 4, addrspace 5)
|
|
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: liveins: $vgpr40, $vgpr14_vgpr15:0x000000000000000F, $vgpr43_vgpr44:0x000000000000000F
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
|
|
; GCN-NEXT: FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
; GCN-NEXT: FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
; GCN-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
|
|
; GCN-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
|
|
; GCN-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
|
|
; GCN-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
|
|
; GCN-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
|
|
; GCN-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
|
|
; GCN-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
|
|
; GCN-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
|
|
; GCN-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
|
|
; GCN-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
|
|
; GCN-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
|
|
; GCN-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
|
|
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
|
; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
|
|
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
|
; GCN-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
|
|
bb.0:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $sgpr30, $sgpr31, $sgpr30_sgpr31
|
|
|
|
undef %0.sub1:vreg_64 = COPY $vgpr15
|
|
%0.sub0:vreg_64 = COPY $vgpr14
|
|
undef %1.sub1:vreg_64 = COPY $vgpr13
|
|
%1.sub0:vreg_64 = COPY $vgpr12
|
|
undef %2.sub1:vreg_64 = COPY $vgpr11
|
|
%2.sub0:vreg_64 = COPY $vgpr10
|
|
undef %3.sub1:vreg_64 = COPY $vgpr9
|
|
%3.sub0:vreg_64 = COPY $vgpr8
|
|
undef %4.sub1:vreg_64 = COPY $vgpr7
|
|
%4.sub0:vreg_64 = COPY $vgpr6
|
|
undef %5.sub1:vreg_64 = COPY $vgpr5
|
|
%5.sub0:vreg_64 = COPY $vgpr4
|
|
undef %6.sub1:vreg_64 = COPY $vgpr3
|
|
%6.sub0:vreg_64 = COPY $vgpr2
|
|
undef %7.sub1:vreg_64 = COPY $vgpr1
|
|
%7.sub0:vreg_64 = COPY $vgpr0
|
|
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
liveins: $vgpr40, $sgpr30, $sgpr31, $sgpr30_sgpr31
|
|
|
|
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
|
|
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
|
|
$vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
|
|
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
|
|
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
|
|
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
FLAT_STORE_DWORDX2 %4, %8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
FLAT_STORE_DWORDX2 %2, %3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
|
|
bb.2:
|
|
liveins: $vgpr40
|
|
|
|
%9:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
|
|
FLAT_STORE_DWORDX2 undef %10:vreg_64, %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
FLAT_STORE_DWORDX2 %9, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
|
|
S_SETPC_B64_return undef $sgpr30_sgpr31
|
|
...
|