This aims to fix the issue that caused https://reviews.llvm.org/D106408 to be reverted. CalcSpillWeights will reduce the weight of an interval by half if it's considered rematerializable, so it will be evicted before others. It does this by checking TII.isTriviallyReMaterializable. However rematerialization may still fail if any of the defining MI's uses aren't available at the locations it needs to be rematerialized. LiveRangeEdit::canRematerializeAt calls allUsesAvailableAt to check this but CalcSpillWeights doesn't, so the two diverge. This fixes it by also checking allUsesAvailableAt in CalcSpillWeights. In practice this has zero change AArch64/X86-64/RISC-V as measured on llvm-test-suite, but prevents weights from being perturbed in an upcoming patch which enables more rematerialization by re-attempting https://reviews.llvm.org/D106408
283 lines
11 KiB
YAML
283 lines
11 KiB
YAML
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-vgpr-index-mode -run-pass=greedy -stress-regalloc=16 -o - %s | FileCheck -check-prefixes=GCN %s
|
|
|
|
# An interval for a register that was partially defined was split, creating
|
|
# a new use (a COPY) which was reached by the undef point. In particular,
|
|
# there was a subrange of the new register which was reached by an "undef"
|
|
# point. When the code in extendSegmentsToUses verified value numbers between
|
|
# the new and the old live ranges, it did not account for this kind of a
|
|
# situation and asserted expecting the old value to exist. For a PHI node
|
|
# it is legal to have a missing predecessor value as long as the end of
|
|
# the predecessor is jointly dominated by the undefs.
|
|
#
|
|
# A simplified form of this can be illustrated as
|
|
#
|
|
# bb.1:
|
|
# %0:vreg_64 = IMPLICIT_DEF
|
|
# ...
|
|
# S_CBRANCH_SCC1 %bb.2, implicit $vcc
|
|
# S_BRANCH %bb.3
|
|
#
|
|
# bb.2:
|
|
# ; predecessors: %bb.1, %bb.4
|
|
# dead %1:vreg_64 = COPY %0:vreg_64 ; This is the point of the inserted split
|
|
# ...
|
|
# S_BRANCH %bb.5
|
|
#
|
|
# bb.3:
|
|
# ; predecessors: %bb.1
|
|
# undef %0.sub0:vreg_64 = COPY %123:sreg_32 ; undef point for %0.sub1
|
|
# ...
|
|
# S_BRANCH %bb.4
|
|
#
|
|
# bb.4
|
|
# ; predecessors: %bb.4
|
|
# ...
|
|
# S_BRANCH %bb.2
|
|
#
|
|
# This test exposes this scenario which caused previously caused an assert
|
|
|
|
---
|
|
name: _amdgpu_ps_main
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
|
stackPtrOffsetReg: $sgpr32
|
|
liveins:
|
|
- { reg: '$vgpr2', virtual-reg: '%0' }
|
|
- { reg: '$vgpr3', virtual-reg: '%1' }
|
|
- { reg: '$vgpr4', virtual-reg: '%2' }
|
|
body: |
|
|
bb.0:
|
|
successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
liveins: $vgpr2, $vgpr3, $vgpr4
|
|
%2:vgpr_32 = COPY $vgpr4
|
|
%1:vgpr_32 = COPY $vgpr3
|
|
%0:vgpr_32 = COPY $vgpr2
|
|
S_CBRANCH_SCC0 %bb.2, implicit undef $scc
|
|
|
|
bb.1:
|
|
successors: %bb.5(0x80000000)
|
|
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%3.sub1:vreg_128 = COPY %3.sub0
|
|
%3.sub2:vreg_128 = COPY %3.sub0
|
|
S_BRANCH %bb.5
|
|
|
|
bb.2:
|
|
successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
|
S_CBRANCH_SCC0 %bb.4, implicit undef $scc
|
|
|
|
bb.3:
|
|
successors: %bb.5(0x80000000)
|
|
undef %3.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%3.sub1:vreg_128 = COPY %3.sub0
|
|
S_BRANCH %bb.5
|
|
|
|
bb.4:
|
|
successors: %bb.5(0x80000000)
|
|
%3:vreg_128 = IMPLICIT_DEF
|
|
|
|
bb.5:
|
|
successors: %bb.6(0x40000000), %bb.22(0x40000000)
|
|
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
S_CBRANCH_SCC1 %bb.22, implicit undef $scc
|
|
S_BRANCH %bb.6
|
|
|
|
bb.6:
|
|
successors: %bb.8(0x40000000), %bb.11(0x40000000)
|
|
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
dead %6:vgpr_32 = V_MUL_F32_e32 0, undef %7:vgpr_32, implicit $mode, implicit $exec
|
|
dead %8:vgpr_32 = V_MUL_F32_e32 0, %2, implicit $mode, implicit $exec
|
|
undef %9.sub1:vreg_64 = V_MUL_F32_e32 0, %1, implicit $mode, implicit $exec
|
|
undef %10.sub0:vreg_128 = V_MUL_F32_e32 0, %0, implicit $mode, implicit $exec
|
|
undef %11.sub0:sgpr_256 = S_MOV_B32 0
|
|
%11.sub1:sgpr_256 = COPY %11.sub0
|
|
%11.sub2:sgpr_256 = COPY %11.sub0
|
|
%11.sub3:sgpr_256 = COPY %11.sub0
|
|
%11.sub4:sgpr_256 = COPY %11.sub0
|
|
%11.sub5:sgpr_256 = COPY %11.sub0
|
|
%11.sub6:sgpr_256 = COPY %11.sub0
|
|
%11.sub7:sgpr_256 = COPY %11.sub0
|
|
%12:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %9, %11, undef %13:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
|
|
%14:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
%15:vreg_128 = IMPLICIT_DEF
|
|
S_CBRANCH_SCC1 %bb.8, implicit undef $scc
|
|
S_BRANCH %bb.11
|
|
|
|
bb.7:
|
|
successors: %bb.13(0x80000000)
|
|
|
|
; GCN-LABEL: bb.7:
|
|
; GCN: undef %{{.+}}.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
%5:vgpr_32 = IMPLICIT_DEF
|
|
S_BRANCH %bb.13
|
|
|
|
bb.8:
|
|
successors: %bb.9(0x40000000), %bb.10(0x40000000)
|
|
S_CBRANCH_SCC0 %bb.10, implicit undef $scc
|
|
|
|
bb.9:
|
|
successors: %bb.12(0x80000000)
|
|
|
|
; GCN-LABEL: bb.9:
|
|
; GCN: undef %{{.+}}.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
S_BRANCH %bb.12
|
|
|
|
bb.10:
|
|
successors: %bb.12(0x80000000)
|
|
|
|
; GCN-LABEL: bb.10:
|
|
; GCN: undef %{{.+}}.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
|
|
|
|
undef %15.sub0:vreg_128 = V_MOV_B32_e32 2143289344, implicit $exec
|
|
%15.sub1:vreg_128 = COPY %15.sub0
|
|
%15.sub2:vreg_128 = COPY %15.sub0
|
|
S_BRANCH %bb.12
|
|
|
|
bb.11:
|
|
successors: %bb.7(0x40000000), %bb.13(0x40000000)
|
|
%16:sreg_64 = V_CMP_NE_U32_e64 0, %14, implicit $exec
|
|
%17:sreg_64 = S_AND_B64 $exec, %16, implicit-def dead $scc
|
|
$vcc = COPY %17
|
|
S_CBRANCH_VCCNZ %bb.7, implicit $vcc
|
|
S_BRANCH %bb.13
|
|
|
|
bb.12:
|
|
successors: %bb.11(0x80000000)
|
|
%14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%5:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
|
S_BRANCH %bb.11
|
|
|
|
bb.13:
|
|
successors: %bb.15(0x40000000), %bb.14(0x40000000)
|
|
|
|
%18:vgpr_32 = V_MAD_F32_e64 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $mode, implicit $exec
|
|
%19:vgpr_32 = V_MAD_F32_e64 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%20:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sgpr_128, 1040, 0 :: (dereferenceable invariant load (s128))
|
|
%22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $mode, implicit $exec
|
|
%23:vgpr_32 = V_MAD_F32_e64 0, %18, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%24:vgpr_32 = COPY %20.sub3
|
|
%25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $mode, implicit $exec
|
|
%26:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sgpr_128, 1056, 0 :: (dereferenceable invariant load (s128))
|
|
%28:vgpr_32 = V_MAD_F32_e64 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $mode, implicit $exec
|
|
%30:vgpr_32 = V_RCP_F32_e32 %29, implicit $mode, implicit $exec
|
|
%25:vgpr_32 = V_MAC_F32_e32 0, %18, %25, implicit $mode, implicit $exec
|
|
%31:vgpr_32 = V_MAD_F32_e64 0, target-flags(amdgpu-gotprel) 0, 0, %12.sub0, 0, %24, 0, 0, implicit $mode, implicit $exec
|
|
%32:vgpr_32 = V_ADD_F32_e32 %25, %31, implicit $mode, implicit $exec
|
|
%33:vgpr_32 = V_MUL_F32_e32 %22, %30, implicit $mode, implicit $exec
|
|
%34:vgpr_32 = V_MUL_F32_e32 %23, %30, implicit $mode, implicit $exec
|
|
%35:vgpr_32 = V_MUL_F32_e32 %32, %30, implicit $mode, implicit $exec
|
|
%36:vgpr_32 = V_MUL_F32_e32 0, %34, implicit $mode, implicit $exec
|
|
%36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $mode, implicit $exec
|
|
%37:vgpr_32 = V_MAD_F32_e64 0, %35, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec
|
|
%39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec
|
|
V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec
|
|
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
|
%40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $mode, implicit $exec
|
|
S_CBRANCH_VCCZ %bb.15, implicit $vcc
|
|
|
|
bb.14:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.15:
|
|
successors: %bb.16(0x40000000), %bb.18(0x40000000)
|
|
%41:vgpr_32 = V_MAD_F32_e64 0, %40, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%42:sreg_64 = V_CMP_LE_F32_e64 0, 0, 0, %41, 0, implicit $mode, implicit $exec
|
|
%43:sreg_64 = V_CMP_GE_F32_e64 0, 1065353216, 0, %41, 0, implicit $mode, implicit $exec
|
|
%44:sreg_64 = S_AND_B64 %43, %43, implicit-def dead $scc
|
|
%45:sreg_64 = S_AND_B64 %42, %42, implicit-def dead $scc
|
|
%46:sreg_64 = S_AND_B64 %45, %44, implicit-def dead $scc
|
|
%47:sreg_64 = COPY $exec, implicit-def $exec
|
|
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
|
|
$exec = S_MOV_B64_term %48
|
|
S_CBRANCH_EXECZ %bb.18, implicit $exec
|
|
S_BRANCH %bb.16
|
|
|
|
bb.16:
|
|
successors: %bb.18(0x80000000)
|
|
S_BRANCH %bb.18
|
|
|
|
bb.17:
|
|
successors: %bb.21(0x40000000), %bb.23(0x40000000)
|
|
%49:sreg_64 = V_CMP_NE_U32_e64 0, %5, implicit $exec
|
|
%50:sreg_64 = S_AND_B64 $exec, %49, implicit-def dead $scc
|
|
%51:vreg_128 = IMPLICIT_DEF
|
|
$vcc = COPY %50
|
|
S_CBRANCH_VCCNZ %bb.21, implicit $vcc
|
|
S_BRANCH %bb.23
|
|
|
|
bb.18:
|
|
successors: %bb.20(0x40000000), %bb.19(0x40000000)
|
|
$exec = S_OR_B64 $exec, %47, implicit-def $scc
|
|
%52:vgpr_32 = V_MAD_F32_e64 0, %3.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 1, %3.sub0, 0, 0, implicit $mode, implicit $exec
|
|
%53:vgpr_32 = V_MUL_F32_e32 -2147483648, %3.sub1, implicit $mode, implicit $exec
|
|
%53:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel32-hi) 1065353216, %3.sub2, %53, implicit $mode, implicit $exec
|
|
%54:vgpr_32 = V_MUL_F32_e32 %53, %53, implicit $mode, implicit $exec
|
|
%54:vgpr_32 = V_MAC_F32_e32 %52, %52, %54, implicit $mode, implicit $exec
|
|
%55:vgpr_32 = V_SQRT_F32_e32 %54, implicit $mode, implicit $exec
|
|
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%56:vgpr_32 = V_MOV_B32_e32 981668463, implicit $exec
|
|
%57:sreg_64 = V_CMP_NGT_F32_e64 0, %55, 0, %56, 0, implicit $mode, implicit $exec
|
|
%58:sreg_64 = S_AND_B64 $exec, %57, implicit-def dead $scc
|
|
$vcc = COPY %58
|
|
S_CBRANCH_VCCZ %bb.20, implicit $vcc
|
|
|
|
bb.19:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.20:
|
|
successors: %bb.17(0x80000000)
|
|
S_BRANCH %bb.17
|
|
|
|
bb.21:
|
|
successors: %bb.23(0x80000000)
|
|
%59:sreg_32 = S_MOV_B32 0
|
|
undef %51.sub0:vreg_128 = COPY %59
|
|
S_BRANCH %bb.23
|
|
|
|
bb.22:
|
|
successors: %bb.24(0x80000000)
|
|
S_BRANCH %bb.24
|
|
|
|
bb.23:
|
|
successors: %bb.22(0x80000000)
|
|
undef %60.sub1:vreg_64 = V_CVT_I32_F32_e32 %1, implicit $mode, implicit $exec
|
|
%60.sub0:vreg_64 = V_CVT_I32_F32_e32 %0, implicit $mode, implicit $exec
|
|
undef %61.sub0:sgpr_256 = S_MOV_B32 0
|
|
%61.sub1:sgpr_256 = COPY %61.sub0
|
|
%61.sub2:sgpr_256 = COPY %61.sub0
|
|
%61.sub3:sgpr_256 = COPY %61.sub0
|
|
%61.sub4:sgpr_256 = COPY %61.sub0
|
|
%61.sub5:sgpr_256 = COPY %61.sub0
|
|
%61.sub6:sgpr_256 = COPY %61.sub0
|
|
%61.sub7:sgpr_256 = COPY %61.sub0
|
|
%62:vgpr_32 = V_MOV_B32_e32 1033100696, implicit $exec
|
|
%63:vgpr_32 = V_MUL_F32_e32 1060575065, %15.sub1, implicit $mode, implicit $exec
|
|
%63:vgpr_32 = V_MAC_F32_e32 1046066128, %15.sub0, %63, implicit $mode, implicit $exec
|
|
%64:vgpr_32 = IMAGE_LOAD_V1_V2 %60, %61, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
|
|
%64:vgpr_32 = V_MAC_F32_e32 target-flags(amdgpu-gotprel) 0, %51.sub0, %64, implicit $mode, implicit $exec
|
|
%65:vgpr_32 = V_MUL_F32_e32 0, %64, implicit $mode, implicit $exec
|
|
%66:vgpr_32 = V_MUL_F32_e32 0, %65, implicit $mode, implicit $exec
|
|
%67:vgpr_32 = V_MAD_F32_e64 0, %66, 0, %62, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%63:vgpr_32 = V_MAC_F32_e32 %15.sub2, %62, %63, implicit $mode, implicit $exec
|
|
%4:vgpr_32 = V_ADD_F32_e32 %63, %67, implicit $mode, implicit $exec
|
|
S_BRANCH %bb.22
|
|
|
|
bb.24:
|
|
%68:vgpr_32 = V_MUL_F32_e32 0, %4, implicit $mode, implicit $exec
|
|
%69:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %70:vgpr_32, 0, %68, 0, 0, implicit $mode, implicit $exec
|
|
EXP 0, undef %71:vgpr_32, %69, undef %72:vgpr_32, undef %73:vgpr_32, -1, -1, 15, implicit $exec
|
|
S_ENDPGM 0
|
|
...
|