llvm-project/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir
Carl Ritson ef949ecba5 [MachineSink] Use SkipPHIsAndLabels for sink insertion points
For AMDGPU the insertion point for a block may not be the first
non-PHI instruction.  This happens when a block contains EXEC
mask manipulation related to control flow (converging lanes).

Use SkipPHIsAndLabels to determine the block insertion point
so that the target can skip any block prologue instructions.

Reviewed By: rampitec, ruiling

Differential Revision: https://reviews.llvm.org/D119399
2022-02-16 12:44:22 +09:00

123 lines
5.2 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s
# Test that MachineSink pass respects block prologues when sinking instructions.
# Specifically an instruction must not be sunk before exec mask manipulation.
---
name: _amdgpu_hs_main
alignment: 1
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
body: |
; GFX10-LABEL: name: _amdgpu_hs_main
; GFX10: bb.0:
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[DEF]], 8, 5, implicit $exec
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
; GFX10-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_1]], implicit $exec
; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc
; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.1
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.1:
; GFX10-NEXT: successors: %bb.2(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: S_BRANCH %bb.2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.2:
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_1]], implicit-def $scc
; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
; GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 31
; GFX10-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_2]], implicit $exec
; GFX10-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc
; GFX10-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc
; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; GFX10-NEXT: S_BRANCH %bb.3
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.3:
; GFX10-NEXT: successors: %bb.4(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: S_BRANCH %bb.4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.4:
; GFX10-NEXT: successors: %bb.5(0x80000000)
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_3]], implicit-def $scc
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX10-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; GFX10-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 4
; GFX10-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[S_MOV_B32_4]], killed [[S_MOV_B32_3]], implicit $exec
; GFX10-NEXT: S_BRANCH %bb.5
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: bb.5:
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[DEF2]], implicit-def $scc
; GFX10-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.4(0x40000000), %bb.5(0x40000000)
%0:sgpr_32 = IMPLICIT_DEF
%14:sreg_32 = IMPLICIT_DEF
%15:vgpr_32 = IMPLICIT_DEF
%16:sreg_32 = S_MOV_B32 8
%17:vgpr_32 = V_LSHRREV_B32_e64 %16, %15, implicit $exec
%18:vgpr_32 = V_BFE_U32_e64 %15, 8, 5, implicit $exec
%19:sreg_32 = S_MOV_B32 5
%20:sreg_32 = V_CMP_NE_U32_e64 %18, killed %19, implicit $exec
%21:sreg_32 = S_XOR_B32 %20, -1, implicit-def $scc
%22:sreg_32 = S_AND_B32 %21, $exec_lo, implicit-def $scc
%23:sreg_32 = S_XOR_B32 $exec_lo, %22, implicit-def $scc
$exec_lo = S_MOV_B32_term %22
S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.4
bb.4:
successors: %bb.5(0x80000000)
S_BRANCH %bb.5
bb.5:
successors: %bb.6(0x40000000), %bb.7(0x40000000)
$exec_lo = S_OR_B32 $exec_lo, %23, implicit-def $scc
%24:sreg_32 = S_MOV_B32 31
%25:sreg_32 = V_CMP_NE_U32_e64 %18, killed %24, implicit $exec
%26:sreg_32 = S_XOR_B32 %25, -1, implicit-def $scc
%27:sreg_32 = S_AND_B32 %26, $exec_lo, implicit-def $scc
%28:sreg_32 = S_XOR_B32 $exec_lo, %27, implicit-def $scc
$exec_lo = S_MOV_B32_term %27
S_CBRANCH_EXECZ %bb.7, implicit $exec
S_BRANCH %bb.6
bb.6:
successors: %bb.7(0x80000000)
S_BRANCH %bb.7
bb.7:
successors: %bb.8(0x80000000)
$exec_lo = S_OR_B32 $exec_lo, %28, implicit-def $scc
%29:sreg_32 = S_MOV_B32 16
%30:sreg_32 = S_MOV_B32 4
%31:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 %17, %30, killed %29, implicit $exec
S_BRANCH %bb.8
bb.8:
$exec_lo = S_OR_B32 $exec_lo, %14, implicit-def $scc
S_ENDPGM 0
...