
If we have a v_mov_b32 or v_accvgpr_write_b32 with an inline immediate, replace it with a pseudo which writes to the combined AV_* class. This relaxes the operand constraints, which will allow the allocator to inflate the register class to AV_* to potentially avoid spilling. The allocator does not know how to replace an instruction to enable the change of register class. I originally tried to do this by changing all of the places we introduce v_mov_b32 with immediate, but it's along tail of niche cases that require manual updating. Plus we can restrict this to only run on functions where we know we will be allocating AGPRs.
183 lines
13 KiB
YAML
183 lines
13 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass si-fold-operands %s -o - | FileCheck %s
|
|
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=COALESCE
|
|
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=GFX908-COALESCE
|
|
|
|
...
|
|
---
|
|
name: test
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test
|
|
; CHECK: bb.0:
|
|
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; CHECK-NEXT: liveins: $sgpr4_sgpr5
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: S_BITCMP0_B32 killed [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
|
|
; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1:
|
|
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
|
; CHECK-NEXT: S_BRANCH %bb.3
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_4:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
|
|
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_4]], %subreg.sub3
|
|
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
|
|
; CHECK-NEXT: [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MFMA_F32_16X16X16F16_e64_3:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MFMA_F32_16X16X16F16_e64_3]].sub0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.3:
|
|
; CHECK-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_]], %bb.1, [[V_MFMA_F32_16X16X16F16_e64_3]].sub0, %bb.2
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
|
|
; CHECK-NEXT: [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_PACK_B32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed [[V_CVT_F16_F32_e64_]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_PACK_B32_F16_e64_]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
|
|
; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
|
|
; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE2]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
;
|
|
; COALESCE-LABEL: name: test
|
|
; COALESCE: bb.0:
|
|
; COALESCE-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; COALESCE-NEXT: liveins: $sgpr4_sgpr5
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
|
|
; COALESCE-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
; COALESCE-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
|
|
; COALESCE-NEXT: S_BITCMP0_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
|
|
; COALESCE-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: bb.1:
|
|
; COALESCE-NEXT: successors: %bb.3(0x80000000)
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: undef [[AV_MOV_:%[0-9]+]].sub0:areg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
|
; COALESCE-NEXT: S_BRANCH %bb.3
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: bb.2:
|
|
; COALESCE-NEXT: successors: %bb.3(0x80000000)
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; COALESCE-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]].sub0_sub1
|
|
; COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: [[AV_MOV_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: {{ $}}
|
|
; COALESCE-NEXT: bb.3:
|
|
; COALESCE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0
|
|
; COALESCE-NEXT: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY2]], implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: undef [[V_PACK_B32_F16_e64_:%[0-9]+]].sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, [[V_CVT_F16_F32_e32_]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; COALESCE-NEXT: [[V_PACK_B32_F16_e64_:%[0-9]+]].sub1:vreg_64_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
|
; COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; COALESCE-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[V_PACK_B32_F16_e64_]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
|
|
; COALESCE-NEXT: S_ENDPGM 0
|
|
;
|
|
; GFX908-COALESCE-LABEL: name: test
|
|
; GFX908-COALESCE: bb.0:
|
|
; GFX908-COALESCE-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
|
; GFX908-COALESCE-NEXT: liveins: $sgpr4_sgpr5
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
|
|
; GFX908-COALESCE-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
; GFX908-COALESCE-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
|
|
; GFX908-COALESCE-NEXT: S_BITCMP0_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
|
|
; GFX908-COALESCE-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: bb.1:
|
|
; GFX908-COALESCE-NEXT: successors: %bb.3(0x80000000)
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: undef [[AV_MOV_:%[0-9]+]].sub0:areg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
|
; GFX908-COALESCE-NEXT: S_BRANCH %bb.3
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: bb.2:
|
|
; GFX908-COALESCE-NEXT: successors: %bb.3(0x80000000)
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: undef [[AV_MOV_1:%[0-9]+]].sub0:areg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[AV_MOV_1:%[0-9]+]].sub1:areg_128_align2 = COPY [[AV_MOV_1]].sub0
|
|
; GFX908-COALESCE-NEXT: [[AV_MOV_1:%[0-9]+]].sub2:areg_128_align2 = COPY [[AV_MOV_1]].sub0
|
|
; GFX908-COALESCE-NEXT: [[AV_MOV_1:%[0-9]+]].sub3:areg_128_align2 = COPY [[AV_MOV_1]].sub0
|
|
; GFX908-COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; GFX908-COALESCE-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]].sub0_sub1
|
|
; GFX908-COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[AV_MOV_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[AV_MOV_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: {{ $}}
|
|
; GFX908-COALESCE-NEXT: bb.3:
|
|
; GFX908-COALESCE-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]].sub0
|
|
; GFX908-COALESCE-NEXT: [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY2]], implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: undef [[V_PACK_B32_F16_e64_:%[0-9]+]].sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, [[V_CVT_F16_F32_e32_]], 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[V_PACK_B32_F16_e64_:%[0-9]+]].sub1:vreg_64_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
|
|
; GFX908-COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; GFX908-COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; GFX908-COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
|
|
; GFX908-COALESCE-NEXT: BUFFER_STORE_DWORDX2_OFFSET_exact [[V_PACK_B32_F16_e64_]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
|
|
; GFX908-COALESCE-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.2, %bb.1
|
|
liveins: $sgpr4_sgpr5
|
|
|
|
%0:sgpr_64(p4) = COPY $sgpr4_sgpr5
|
|
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
|
|
%2:sgpr_32 = S_MOV_B32 0
|
|
S_BITCMP0_B32 killed %1, 0, implicit-def $scc
|
|
S_CBRANCH_SCC0 %bb.2, implicit $scc
|
|
|
|
bb.1:
|
|
successors: %bb.3
|
|
|
|
%3:sgpr_32 = COPY %2
|
|
%4:vgpr_32 = COPY %3, implicit $exec
|
|
S_BRANCH %bb.3
|
|
|
|
bb.2:
|
|
successors: %bb.3
|
|
|
|
%5:sgpr_32 = S_MOV_B32 0
|
|
%6:vgpr_32 = COPY %5
|
|
%7:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
|
|
%8:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
|
|
%9:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
|
|
%10:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
|
|
%11:areg_128_align2 = REG_SEQUENCE %7, %subreg.sub0, %8, %subreg.sub1, %9, %subreg.sub2, %10, %subreg.sub3
|
|
%12:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %5, %subreg.sub1
|
|
%13:vreg_64_align2 = COPY %12
|
|
%14:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %11, 0, 0, 0, implicit $mode, implicit $exec
|
|
%15:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %14, 0, 0, 0, implicit $mode, implicit $exec
|
|
%16:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %15, 0, 0, 0, implicit $mode, implicit $exec
|
|
%17:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%18:vgpr_32 = COPY %17.sub0
|
|
%19:vgpr_32 = COPY %18
|
|
|
|
bb.3:
|
|
%20:vgpr_32 = PHI %4, %bb.1, %19, %bb.2
|
|
%21:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, %20, 0, 0, implicit $mode, implicit $exec
|
|
%22:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %21, 0, %2, 0, 0, implicit $mode, implicit $exec
|
|
%23:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
%24:vreg_64_align2 = REG_SEQUENCE %22, %subreg.sub0, killed %23, %subreg.sub1
|
|
%25:sgpr_128 = REG_SEQUENCE %2, %subreg.sub0, %2, %subreg.sub1, %2, %subreg.sub2, %2, %subreg.sub3
|
|
%26:vreg_64_align2 = COPY %24
|
|
BUFFER_STORE_DWORDX2_OFFSET_exact killed %26, killed %25, %2, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
|
|
S_ENDPGM 0
|
|
|
|
...
|