https://reviews.llvm.org/D52052 is to prevent register split on the MBB which have prolog instructions defining the exec register (or mask register that activate the threads of a warp in GPU). The constrain seems too strict, because 1) If the split is allowed, it may fit the free live range of a physical register, and no spill will happen; 2) The register class of register that is under splitting may not be the same to the register that is defined in prolog, so there is no interference with the register being defined in prolog. The current code has another small issue. The MBB->getFirstNonDebugInstr() just skip debug instructions, but SA->getFirstSplitPoint(Number) would skip label and phi instructions. This cause some MBB with label instruction being taken as prolog. This patch is to relax the split constrain on MMB with prolog by checking if the register defined in prolog has the common register class with the register being split. It allow the split if the register defined in prolog is physical register or there is no common register class. --------- Co-authored-by: Yuanke Luo <ykluo@birentech.com>
168 lines
9.2 KiB
YAML
168 lines
9.2 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
|
# RUN: llc -mtriple=amdgcn-- -verify-machineinstrs -run-pass=greedy -o - %s | FileCheck %s
|
|
|
|
---
|
|
# Check that spill save/restore should be inserted after $exec mask is defined.
|
|
|
|
name: foo
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
body: |
|
|
; CHECK-LABEL: name: foo
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
|
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: dead [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr102_sgpr103
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
|
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5)
|
|
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5)
|
|
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.2, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.2, align 4, addrspace 5)
|
|
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.3, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.3, align 4, addrspace 5)
|
|
; CHECK-NEXT: SI_SPILL_S128_SAVE [[COPY1]], %stack.4, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.4, align 4, addrspace 5)
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY19:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY20:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: [[COPY21:%[0-9]+]]:sgpr_128 = COPY [[COPY1]]
|
|
; CHECK-NEXT: S_BRANCH %bb.1
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
|
|
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
; CHECK-NEXT: S_BRANCH %bb.2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
|
|
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
; CHECK-NEXT: S_BRANCH %bb.4
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.3:
|
|
; CHECK-NEXT: successors: %bb.4(0x80000000)
|
|
; CHECK-NEXT: liveins: $sgpr102_sgpr103
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_OR_SAVEEXEC_B64_1:%[0-9]+]]:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_OR_SAVEEXEC_B64_1]], implicit-def $scc
|
|
; CHECK-NEXT: S_BRANCH %bb.4
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.4:
|
|
; CHECK-NEXT: $exec = IMPLICIT_DEF
|
|
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5)
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY1]].sub0_sub1, [[SI_SPILL_S128_RESTORE]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE1:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5)
|
|
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE2:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.2, align 4, addrspace 5)
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE1]].sub0_sub1, [[SI_SPILL_S128_RESTORE2]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE3:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.3, align 4, addrspace 5)
|
|
; CHECK-NEXT: [[SI_SPILL_S128_RESTORE4:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.4, align 4, addrspace 5)
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[SI_SPILL_S128_RESTORE3]].sub0_sub1, [[SI_SPILL_S128_RESTORE4]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY2]].sub0_sub1, [[COPY3]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY4]].sub0_sub1, [[COPY5]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY6]].sub0_sub1, [[COPY7]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY8]].sub0_sub1, [[COPY9]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY10]].sub0_sub1, [[COPY11]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY12]].sub0_sub1, [[COPY13]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY14]].sub0_sub1, [[COPY15]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY16]].sub0_sub1, [[COPY17]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY18]].sub0_sub1, [[COPY19]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: S_CMP_EQ_U64 [[COPY20]].sub0_sub1, [[COPY21]].sub2_sub3, implicit-def $scc
|
|
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit [[S_OR_SAVEEXEC_B64_1]], implicit $vgpr0
|
|
bb.0:
|
|
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103
|
|
|
|
%0:sreg_64 = COPY $sgpr102_sgpr103
|
|
%1:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
|
|
%2:sgpr_128 = COPY %1
|
|
%3:sgpr_128 = COPY %1
|
|
%4:sgpr_128 = COPY %1
|
|
%5:sgpr_128 = COPY %1
|
|
%6:sgpr_128 = COPY %1
|
|
%7:sgpr_128 = COPY %1
|
|
%8:sgpr_128 = COPY %1
|
|
%9:sgpr_128 = COPY %1
|
|
%10:sgpr_128 = COPY %1
|
|
%11:sgpr_128 = COPY %1
|
|
%12:sgpr_128 = COPY %1
|
|
%13:sgpr_128 = COPY %1
|
|
%14:sgpr_128 = COPY %1
|
|
%15:sgpr_128 = COPY %1
|
|
%16:sgpr_128 = COPY %1
|
|
%17:sgpr_128 = COPY %1
|
|
%18:sgpr_128 = COPY %1
|
|
%19:sgpr_128 = COPY %1
|
|
%20:sgpr_128 = COPY %1
|
|
%21:sgpr_128 = COPY %1
|
|
%22:sgpr_128 = COPY %1
|
|
%23:sgpr_128 = COPY %1
|
|
%24:sgpr_128 = COPY %1
|
|
%25:sgpr_128 = COPY %1
|
|
%26:sgpr_128 = COPY %1
|
|
S_BRANCH %bb.1
|
|
|
|
bb.1:
|
|
liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103
|
|
|
|
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
S_BRANCH %bb.2
|
|
|
|
bb.2:
|
|
liveins: $sgpr98_sgpr99, $sgpr102_sgpr103
|
|
|
|
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
|
S_CBRANCH_EXECZ %bb.3, implicit $exec
|
|
S_BRANCH %bb.4
|
|
|
|
bb.3:
|
|
liveins: $sgpr102_sgpr103
|
|
|
|
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec
|
|
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
|
|
S_BRANCH %bb.4
|
|
|
|
bb.4:
|
|
$exec = IMPLICIT_DEF
|
|
S_CMP_EQ_U64 %1.sub0_sub1, %2.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %3.sub0_sub1, %4.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %5.sub0_sub1, %6.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %7.sub0_sub1, %8.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %9.sub0_sub1, %10.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %11.sub0_sub1, %12.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %13.sub0_sub1, %14.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %15.sub0_sub1, %16.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %17.sub0_sub1, %18.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %19.sub0_sub1, %20.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %21.sub0_sub1, %22.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %23.sub0_sub1, %24.sub2_sub3, implicit-def $scc
|
|
S_CMP_EQ_U64 %25.sub0_sub1, %26.sub2_sub3, implicit-def $scc
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
S_SETPC_B64_return undef $sgpr30_sgpr31, implicit %0, implicit $vgpr0
|
|
...
|