For AMDGPU the insertion point for a block may not be the first non-PHI instruction. This happens when a block contains EXEC mask manipulation related to control flow (converging lanes). Use SkipPHIsAndLabels to determine the block insertion point so that the target can skip any block prologue instructions. Reviewed By: rampitec, ruiling Differential Revision: https://reviews.llvm.org/D119399
123 lines
5.2 KiB
YAML
123 lines
5.2 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s
|
|
|
|
# Test that MachineSink pass respects block prologues when sinking instructions.
|
|
# Specifically an instruction must not be sunk before exec mask manipulation.
|
|
|
|
---
|
|
name: _amdgpu_hs_main
|
|
alignment: 1
|
|
tracksRegLiveness: true
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
; GFX10-LABEL: name: _amdgpu_hs_main
|
|
; GFX10: bb.0:
|
|
; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8
|
|
; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[DEF]], 8, 5, implicit $exec
|
|
; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
|
|
; GFX10-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_1]], implicit $exec
|
|
; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc
|
|
; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc
|
|
; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc
|
|
; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
|
|
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
|
; GFX10-NEXT: S_BRANCH %bb.1
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: bb.1:
|
|
; GFX10-NEXT: successors: %bb.2(0x80000000)
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
|
; GFX10-NEXT: S_BRANCH %bb.2
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: bb.2:
|
|
; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_1]], implicit-def $scc
|
|
; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec
|
|
; GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 31
|
|
; GFX10-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_2]], implicit $exec
|
|
; GFX10-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc
|
|
; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc
|
|
; GFX10-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc
|
|
; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
|
|
; GFX10-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
|
|
; GFX10-NEXT: S_BRANCH %bb.3
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: bb.3:
|
|
; GFX10-NEXT: successors: %bb.4(0x80000000)
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: S_BRANCH %bb.4
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: bb.4:
|
|
; GFX10-NEXT: successors: %bb.5(0x80000000)
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_3]], implicit-def $scc
|
|
; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GFX10-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
|
; GFX10-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 4
|
|
; GFX10-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[S_MOV_B32_4]], killed [[S_MOV_B32_3]], implicit $exec
|
|
; GFX10-NEXT: S_BRANCH %bb.5
|
|
; GFX10-NEXT: {{ $}}
|
|
; GFX10-NEXT: bb.5:
|
|
; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[DEF2]], implicit-def $scc
|
|
; GFX10-NEXT: S_ENDPGM 0
|
|
bb.0:
|
|
successors: %bb.4(0x40000000), %bb.5(0x40000000)
|
|
|
|
%0:sgpr_32 = IMPLICIT_DEF
|
|
%14:sreg_32 = IMPLICIT_DEF
|
|
%15:vgpr_32 = IMPLICIT_DEF
|
|
%16:sreg_32 = S_MOV_B32 8
|
|
%17:vgpr_32 = V_LSHRREV_B32_e64 %16, %15, implicit $exec
|
|
%18:vgpr_32 = V_BFE_U32_e64 %15, 8, 5, implicit $exec
|
|
%19:sreg_32 = S_MOV_B32 5
|
|
%20:sreg_32 = V_CMP_NE_U32_e64 %18, killed %19, implicit $exec
|
|
%21:sreg_32 = S_XOR_B32 %20, -1, implicit-def $scc
|
|
%22:sreg_32 = S_AND_B32 %21, $exec_lo, implicit-def $scc
|
|
%23:sreg_32 = S_XOR_B32 $exec_lo, %22, implicit-def $scc
|
|
$exec_lo = S_MOV_B32_term %22
|
|
S_CBRANCH_EXECZ %bb.5, implicit $exec
|
|
S_BRANCH %bb.4
|
|
|
|
bb.4:
|
|
successors: %bb.5(0x80000000)
|
|
|
|
S_BRANCH %bb.5
|
|
|
|
bb.5:
|
|
successors: %bb.6(0x40000000), %bb.7(0x40000000)
|
|
|
|
$exec_lo = S_OR_B32 $exec_lo, %23, implicit-def $scc
|
|
%24:sreg_32 = S_MOV_B32 31
|
|
%25:sreg_32 = V_CMP_NE_U32_e64 %18, killed %24, implicit $exec
|
|
%26:sreg_32 = S_XOR_B32 %25, -1, implicit-def $scc
|
|
%27:sreg_32 = S_AND_B32 %26, $exec_lo, implicit-def $scc
|
|
%28:sreg_32 = S_XOR_B32 $exec_lo, %27, implicit-def $scc
|
|
$exec_lo = S_MOV_B32_term %27
|
|
S_CBRANCH_EXECZ %bb.7, implicit $exec
|
|
S_BRANCH %bb.6
|
|
|
|
bb.6:
|
|
successors: %bb.7(0x80000000)
|
|
|
|
S_BRANCH %bb.7
|
|
|
|
bb.7:
|
|
successors: %bb.8(0x80000000)
|
|
|
|
$exec_lo = S_OR_B32 $exec_lo, %28, implicit-def $scc
|
|
%29:sreg_32 = S_MOV_B32 16
|
|
%30:sreg_32 = S_MOV_B32 4
|
|
%31:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 %17, %30, killed %29, implicit $exec
|
|
S_BRANCH %bb.8
|
|
|
|
bb.8:
|
|
$exec_lo = S_OR_B32 $exec_lo, %14, implicit-def $scc
|
|
S_ENDPGM 0
|
|
|
|
...
|