Greedy register allocator prefers to move a constrained live range into a larger allocatable class over spilling them. This patch defines the necessary superclasses for vector registers. For subtargets that support copy between VGPRs and AGPRs, the vector register spills during regalloc now become just copies. Reviewed By: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D109301
73 lines
2.8 KiB
YAML
73 lines
2.8 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
|
|
|
|
# Initially %2 starts out with 2 subranges (one for sub0, and one for
|
|
# the rest of the lanes). After %2 is split, after refineSubRanges the
|
|
# newly created register has a different set of lane masks since the
|
|
# copy bundle uses 2 different defs to cover the register. This was
|
|
# fixed by doing refineSubRanges after all the COPYs being inserted.
|
|
|
|
---
|
|
name: subrange_for_this_mask_not_found
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
occupancy: 7
|
|
body: |
|
|
; CHECK-LABEL: name: subrange_for_this_mask_not_found
|
|
; CHECK: bb.0:
|
|
; CHECK: successors: %bb.1(0x80000000)
|
|
; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; CHECK: [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
|
|
; CHECK: [[COPY:%[0-9]+]]:av_1024_align2 = COPY [[DEF1]]
|
|
; CHECK: bb.1:
|
|
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; CHECK: S_NOP 0, implicit [[DEF1]]
|
|
; CHECK: S_NOP 0, implicit [[DEF1]]
|
|
; CHECK: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
|
|
; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
|
|
; CHECK: bb.2:
|
|
; CHECK: successors: %bb.3(0x80000000)
|
|
; CHECK: undef %5.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
|
|
; CHECK: internal %5.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
|
|
; CHECK: }
|
|
; CHECK: %5.sub0:av_1024_align2 = IMPLICIT_DEF
|
|
; CHECK: S_NOP 0, implicit %5.sub0
|
|
; CHECK: bb.3:
|
|
; CHECK: successors: %bb.4(0x80000000)
|
|
; CHECK: S_NOP 0, implicit %5
|
|
; CHECK: bb.4:
|
|
; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000)
|
|
; CHECK: [[DEF2:%[0-9]+]]:av_1024_align2 = IMPLICIT_DEF
|
|
; CHECK: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
|
|
; CHECK: bb.5:
|
|
; CHECK: undef %3.sub0:vreg_1024_align2 = COPY [[DEF]]
|
|
; CHECK: S_NOP 0, implicit %3
|
|
bb.0:
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:vreg_1024_align2 = IMPLICIT_DEF
|
|
%2:vreg_1024_align2 = COPY %1
|
|
|
|
bb.1:
|
|
S_NOP 0, implicit %1
|
|
S_NOP 0, implicit %1
|
|
%1:vreg_1024_align2 = IMPLICIT_DEF
|
|
S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
|
|
|
|
bb.3:
|
|
%2.sub0:vreg_1024_align2 = IMPLICIT_DEF
|
|
S_NOP 0, implicit %2.sub0
|
|
|
|
bb.4:
|
|
S_NOP 0, implicit %2
|
|
|
|
bb.5:
|
|
%2:vreg_1024_align2 = IMPLICIT_DEF
|
|
S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
|
|
|
|
bb.6:
|
|
undef %4.sub0:vreg_1024_align2 = COPY %0
|
|
S_NOP 0, implicit %4
|
|
...
|