llvm-project/llvm/test/CodeGen/AMDGPU/spill-before-exec2.mir
Luo Yuanke 9bae84b017
[RegAlloc] Relax the split constrain on MBB prolog (#168259)
https://reviews.llvm.org/D52052 is to prevent register split on the MBB
which have prolog instructions defining the exec register (or mask register
that activate the threads of a warp in GPU). The constrain seems too
strict, because 1) If the split is allowed, it may fit the free live range
of a physical register, and no spill will happen; 2) The register class of
register that is under splitting may not be the same to the register that
is defined in prolog, so there is no interference with the register being
defined in prolog. 
The current code has another small issue. The MBB->getFirstNonDebugInstr()
just skip debug instructions, but SA->getFirstSplitPoint(Number) would skip
label and phi instructions. This cause some MBB with label instruction
being taken as prolog.
This patch is to relax the split constrain on MMB with prolog by checking
if the register defined in prolog has the common register class with the
register being split. It allow the split if the register defined in prolog
is physical register or there is no common register class.

---------

Co-authored-by: Yuanke Luo <ykluo@birentech.com>
2025-11-29 07:27:19 +08:00

168 lines
9.2 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -run-pass=greedy -o - %s | FileCheck %s
---
# Check that spill save/restore should be inserted after $exec mask is defined.
name: foo
tracksRegLiveness: true
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
; CHECK-LABEL: name: foo
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr102_sgpr103
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5)
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.3, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.3, align 4, addrspace 5)
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.4, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.4, align 4, addrspace 5)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY20:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: [[COPY21:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
; CHECK-NEXT: S_BRANCH %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; CHECK-NEXT: liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: liveins: $sgpr102_sgpr103
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: $exec = IMPLICIT_DEF
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY1]].sub0_sub1, [[SI_SPILL_S128_RESTORE]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE1:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE2:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5)
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE1]].sub0_sub1, [[SI_SPILL_S128_RESTORE2]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE3:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.3, align 4, addrspace 5)
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE4:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.4, align 4, addrspace 5)
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE3]].sub0_sub1, [[SI_SPILL_S128_RESTORE4]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY2]].sub0_sub1, [[COPY3]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY4]].sub0_sub1, [[COPY5]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY6]].sub0_sub1, [[COPY7]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY8]].sub0_sub1, [[COPY9]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY10]].sub0_sub1, [[COPY11]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY12]].sub0_sub1, [[COPY13]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY14]].sub0_sub1, [[COPY15]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY16]].sub0_sub1, [[COPY17]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY18]].sub0_sub1, [[COPY19]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY20]].sub0_sub1, [[COPY21]].sub2_sub3, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit [[S_OR_SAVEEXEC_B64_1]], implicit $vgpr0
bb.0:
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
%0:sreg_64 = COPY $sgpr102_sgpr103
%1:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
%2:sgpr_128 = COPY %1
%3:sgpr_128 = COPY %1
%4:sgpr_128 = COPY %1
%5:sgpr_128 = COPY %1
%6:sgpr_128 = COPY %1
%7:sgpr_128 = COPY %1
%8:sgpr_128 = COPY %1
%9:sgpr_128 = COPY %1
%10:sgpr_128 = COPY %1
%11:sgpr_128 = COPY %1
%12:sgpr_128 = COPY %1
%13:sgpr_128 = COPY %1
%14:sgpr_128 = COPY %1
%15:sgpr_128 = COPY %1
%16:sgpr_128 = COPY %1
%17:sgpr_128 = COPY %1
%18:sgpr_128 = COPY %1
%19:sgpr_128 = COPY %1
%20:sgpr_128 = COPY %1
%21:sgpr_128 = COPY %1
%22:sgpr_128 = COPY %1
%23:sgpr_128 = COPY %1
%24:sgpr_128 = COPY %1
%25:sgpr_128 = COPY %1
%26:sgpr_128 = COPY %1
S_BRANCH %bb.1
bb.1:
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.2:
liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.4
bb.3:
liveins: $sgpr102_sgpr103
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
S_BRANCH %bb.4
bb.4:
$exec = IMPLICIT_DEF
S_CMP_EQ_U64 %1.sub0_sub1, %2.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %3.sub0_sub1, %4.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %5.sub0_sub1, %6.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %7.sub0_sub1, %8.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %9.sub0_sub1, %10.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %11.sub0_sub1, %12.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %13.sub0_sub1, %14.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %15.sub0_sub1, %16.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %17.sub0_sub1, %18.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %19.sub0_sub1, %20.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %21.sub0_sub1, %22.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %23.sub0_sub1, %24.sub2_sub3, implicit-def $scc
S_CMP_EQ_U64 %25.sub0_sub1, %26.sub2_sub3, implicit-def $scc
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %0, implicit $vgpr0
...