RegisterCoalescer: Do not introduce uses of empty register classes (#161809)
Check RegisterClassInfo if any registers of the new class are actually available for use. Currently AMDGPU overrides shouldCoalesce to avoid this situation. The target hook does not have access to the dynamic register class counts, but ideally the target hook would only be used for profitability concerns. The new test doesn't change, due to the AMDGPU shouldCoalesce override, but would be unallocatable if we dropped the override and switched to the default implementation. The existing limit-coalesce.mir already tests the behavior of this override, but it's too conservative and isn't checking the case where the new class is unallocatable. Add this check so it can be relaxed.
This commit is contained in:
parent
be9e747d49
commit
067a11015d
@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy(
|
||||
}
|
||||
|
||||
if (CP.getNewRC()) {
|
||||
if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) {
|
||||
LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC())
|
||||
<< "are available for allocation\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto SrcRC = MRI->getRegClass(CP.getSrcReg());
|
||||
auto DstRC = MRI->getRegClass(CP.getDstReg());
|
||||
unsigned SrcIdx = CP.getSrcIdx();
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
|
||||
|
||||
; Make sure the coalescer doesn't introduce any uses of
|
||||
; vreg_1024. None are available to allocate with the register budget
|
||||
; of this function.
|
||||
|
||||
define void @no_introduce_vreg_1024() #0 {
|
||||
; CHECK-LABEL: no_introduce_vreg_1024:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; def v[0:7]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: v_mov_b32_e32 v9, v0
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ; use v[0:15]
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%tuple = call <8 x i32> asm sideeffect "; def $0","=v"()
|
||||
%sub0 = extractelement <8 x i32> %tuple, i32 0
|
||||
%insert = insertelement <16 x i32> poison, i32 %sub0, i32 9
|
||||
call void asm sideeffect "; use $0","v"(<16 x i32> %insert)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }
|
||||
@ -0,0 +1,34 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s
|
||||
|
||||
# The register budget for this function does not permit using 1024-bit
|
||||
# registers. The coalescer should not introduce a 1024-bit virtual
|
||||
# register which will fail to allocate.
|
||||
|
||||
--- |
|
||||
define void @no_introduce_vreg_1024() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
|
||||
...
|
||||
---
|
||||
name: no_introduce_vreg_1024
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
occupancy: 10
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
|
||||
; CHECK-LABEL: name: no_introduce_vreg_1024
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub9:vreg_512 = COPY [[COPY]].sub0
|
||||
; CHECK-NEXT: SI_RETURN implicit [[COPY1]]
|
||||
%0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
|
||||
undef %1.sub9:vreg_512 = COPY %0.sub0
|
||||
SI_RETURN implicit %1
|
||||
|
||||
...
|
||||
Loading…
x
Reference in New Issue
Block a user