
When true16 is enabled, isel start to emit sgpr_lo16 register when a trunc/sext i16/i32 is generated, or a salu32 is used by vgpr16 or vice versa. And this causes a problem as sgpr_lo16 is not fully supported in the pipeline. True16 mode works fine in -O3 mode since folding pass remove sgpr_lo16 from the pipeline. However it hit a problem in -O0 mode as folding pass is skipped. This patch did: 1. stop emitting sgpr_lo16 from isel 2. update codegen pattern to split uniformed/divergent pattern for i16/i32 conversion 3. update fix-sgpr-copy pass to address legalization requirement in true16 mode, update fix-sgpr-copies-f16-true16.mir test to include all possible combinations This patch is tested with cts and downstream repo with -O0 testing
273 lines
14 KiB
YAML
273 lines
14 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
|
|
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
|
|
|
|
---
|
|
name: cmp_f16
|
|
body: |
|
|
bb.0.entry:
|
|
; GCN-LABEL: name: cmp_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF2]], %subreg.hi16
|
|
; GCN-NEXT: [[V_CMP_LT_F16_t16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = nofpexcept V_CMP_LT_F16_t16_e64 0, killed [[REG_SEQUENCE]].lo16, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed [[V_CMP_LT_F16_t16_e64_]], implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:sreg_32 = IMPLICIT_DEF
|
|
%2:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%3:sreg_32 = COPY %2:vgpr_16
|
|
nofpexcept S_CMP_LT_F16 killed %3:sreg_32, %1:sreg_32, implicit-def $scc, implicit $mode
|
|
%4:sreg_32_xm0_xexec = COPY $scc
|
|
%5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %4, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: cvt_hi_f32_f16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: cvt_hi_f32_f16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]]
|
|
; GCN-NEXT: [[V_CVT_F32_F16_t16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_t16_e64 0, [[COPY]].hi16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_16
|
|
%3:sreg_32 = S_CVT_HI_F32_F16 %2:sreg_32, implicit $mode
|
|
...
|
|
|
|
---
|
|
name: s_or_b32
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: s_or_b32
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_U16_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[REG_SEQUENCE]], [[REG_SEQUENCE]], implicit $exec
|
|
; GCN-NEXT: [[V_CVT_F16_U16_t16_e64_1:%[0-9]+]]:vgpr_16 = V_CVT_F16_U16_t16_e64 0, [[V_OR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %0:vgpr_16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%2:sreg_32 = COPY %1:vgpr_16
|
|
%3:sreg_32 = S_OR_B32 %2:sreg_32, %2:sreg_32, implicit-def $scc
|
|
%4:vgpr_16 = V_CVT_F16_U16_t16_e64 0, %3:sreg_32, 0, 0, 0, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: salu16_usedby_salu32
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: salu16_usedby_salu32
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[DEF]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_TRUNC_F16_t16_e64_]], %subreg.lo16, [[DEF2]], %subreg.hi16
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE]], [[DEF]], implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_32
|
|
%2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode
|
|
%3:sreg_32 = S_XOR_B32 %2:sreg_32, %1:sreg_32, implicit-def $scc
|
|
...
|
|
|
|
---
|
|
name: salu16_usedby_valu32
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: salu16_usedby_valu32
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[DEF]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_TRUNC_F16_t16_e64_]], %subreg.lo16, [[DEF2]], %subreg.hi16
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE]], [[DEF]], implicit-def $scc, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_32
|
|
%2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode
|
|
%3:vgpr_32 = V_XOR_B32_e64 %2:sreg_32, %1:sreg_32, implicit-def $scc, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: salu32_usedby_salu16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: salu32_usedby_salu16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[DEF]], [[DEF]], implicit $exec
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[V_XOR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_32
|
|
%2:sreg_32 = S_XOR_B32 %1:sreg_32, %1:sreg_32, implicit-def $scc
|
|
%3:sreg_32 = S_TRUNC_F16 %2:sreg_32, implicit $mode
|
|
...
|
|
|
|
---
|
|
name: salu32_usedby_valu16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: salu32_usedby_valu16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[DEF]], [[DEF]], implicit $exec
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[V_XOR_B32_e64_]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_32
|
|
%2:sreg_32 = S_XOR_B32 %1:sreg_32, %1:sreg_32, implicit-def $scc
|
|
%3:vgpr_16 = V_TRUNC_F16_t16_e64 0, %2:sreg_32, 0, 0, 0, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: copy_vgpr16_sreg32_usedby_salu16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: copy_vgpr16_sreg32_usedby_salu16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_16
|
|
%2:sreg_32 = S_TRUNC_F16 %1:sreg_32, implicit $mode
|
|
...
|
|
|
|
---
|
|
name: copy_vgpr16_sreg32_usedby_salu32
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: copy_vgpr16_sreg32_usedby_salu32
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE]], [[REG_SEQUENCE]], implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_16
|
|
%2:sreg_32 = S_XOR_B32 %1:sreg_32, %1:sreg_32, implicit-def $scc
|
|
...
|
|
|
|
---
|
|
name: copy_vgpr32_sreg32_usedby_valu16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: copy_vgpr32_sreg32_usedby_valu16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[V_TRUNC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_TRUNC_F16_t16_e64 0, [[DEF]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_32 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_32
|
|
%2:vgpr_16 = V_TRUNC_F16_t16_e64 0, %1:sreg_32, 0, 0, 0, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: S_FMAC_F16
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: S_FMAC_F16
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_lo16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF2]], %subreg.hi16
|
|
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF3]], %subreg.hi16
|
|
; GCN-NEXT: [[V_FMAC_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_FMAC_F16_t16_e64 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE1]].lo16, 0, [[REG_SEQUENCE]].lo16, 0, 0, 0, implicit $mode, implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:sgpr_lo16 = COPY %0:vgpr_16
|
|
%2:sreg_32 = COPY %0:vgpr_16
|
|
%3:sreg_32 = COPY %1:sgpr_lo16
|
|
%4:sreg_32 = S_FMAC_F16 %3:sreg_32, %3:sreg_32, %2:sreg_32, implicit $mode
|
|
...
|
|
|
|
---
|
|
name: legalize_with_multi_user
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: legalize_with_multi_user
|
|
; GCN: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[DEF]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[V_ADD_F16_t16_e64_:%[0-9]+]]:vgpr_16 = V_ADD_F16_t16_e64 0, [[REG_SEQUENCE]].lo16, 0, 1, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
|
|
; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF3]], %subreg.hi16
|
|
; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = V_PK_FMA_F16 11, [[S_MOV_B32_]], 0, [[REG_SEQUENCE1]], 8, [[DEF2]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_ADD_F16_t16_e64_]], %subreg.lo16, [[DEF4]], %subreg.hi16
|
|
; GCN-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[REG_SEQUENCE2]], [[S_MOV_B32_]], implicit $exec
|
|
%0:vgpr_16 = IMPLICIT_DEF
|
|
%1:sreg_32 = COPY %0:vgpr_16
|
|
%2:sreg_32 = S_ADD_F16 %1:sreg_32, 1, implicit $mode
|
|
%3:sreg_32 = S_MOV_B32 32768
|
|
%4:vgpr_32 = IMPLICIT_DEF
|
|
%5:vgpr_32 = V_PK_FMA_F16 11, %3:sreg_32, 0, %2:sreg_32, 8, %4:vgpr_32, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%6:sreg_32 = S_XOR_B32 %2:sreg_32, %3:sreg_32, implicit-def dead $scc
|
|
...
|
|
|
|
---
|
|
name: vgpr16_to_spgr32
|
|
body: |
|
|
; GCN-LABEL: name: vgpr16_to_spgr32
|
|
; GCN: bb.0.entry:
|
|
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
|
|
; GCN-NEXT: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 killed [[COPY]], 0, 1, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) poison` + 8, align 4, addrspace 3)
|
|
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DS_READ2_B32_gfx9_]].sub0
|
|
; GCN-NEXT: [[V_CVT_F16_F32_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, killed [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
|
|
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
|
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
|
|
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CVT_F16_F32_t16_e64_]], %subreg.lo16, [[DEF1]], %subreg.hi16
|
|
; GCN-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]], implicit $exec
|
|
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_MOV_B32_]], killed [[V_READFIRSTLANE_B32_]], implicit-def dead $scc
|
|
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5
|
|
; GCN-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]]
|
|
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 2
|
|
; GCN-NEXT: S_CMP_LG_U32 killed [[S_MUL_I32_]], killed [[S_MOV_B32_2]], implicit-def $scc
|
|
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
; GCN-NEXT: S_BRANCH %bb.1
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.1:
|
|
; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
|
; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_3]]
|
|
; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_4]]
|
|
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0
|
|
; GCN-NEXT: {{ $}}
|
|
; GCN-NEXT: bb.2:
|
|
; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 2
|
|
; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 killed [[S_MOV_B32_5]]
|
|
; GCN-NEXT: $sgpr0 = COPY [[S_MOV_B32_6]]
|
|
; GCN-NEXT: SI_RETURN_TO_EPILOG $sgpr0
|
|
bb.0.entry:
|
|
successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%)
|
|
|
|
%5:sreg_32 = IMPLICIT_DEF
|
|
%6:vgpr_32 = COPY %5:sreg_32
|
|
%4:vreg_64 = DS_READ2_B32_gfx9 killed %6:vgpr_32, 0, 1, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) poison` + 8, align 4, addrspace 3)
|
|
%7:sgpr_32 = COPY %4.sub0:vreg_64
|
|
%8:vgpr_16 = nofpexcept V_CVT_F16_F32_t16_e64 0, killed %7:sgpr_32, 0, 0, 0, implicit $mode, implicit $exec
|
|
%9:sreg_32 = S_MOV_B32 65535
|
|
%11:sreg_32 = COPY %8:vgpr_16
|
|
%10:sreg_32 = S_AND_B32 killed %9:sreg_32, killed %11:sreg_32, implicit-def dead $scc
|
|
%12:sreg_32 = S_MOV_B32 5
|
|
%13:sreg_32 = S_MUL_I32 killed %10:sreg_32, killed %12:sreg_32
|
|
%14:sreg_32 = S_MOV_B32 2
|
|
S_CMP_LG_U32 killed %13:sreg_32, killed %14:sreg_32, implicit-def $scc
|
|
S_CBRANCH_SCC1 %bb.2, implicit $scc
|
|
S_BRANCH %bb.1
|
|
bb.1:
|
|
%17:sreg_32 = S_MOV_B32 1
|
|
%18:sreg_32 = S_MOV_B32 killed %17:sreg_32
|
|
$sgpr0 = COPY %18:sreg_32
|
|
SI_RETURN_TO_EPILOG $sgpr0
|
|
bb.2:
|
|
%15:sreg_32 = S_MOV_B32 2
|
|
%16:sreg_32 = S_MOV_B32 killed %15:sreg_32
|
|
$sgpr0 = COPY %16:sreg_32
|
|
SI_RETURN_TO_EPILOG $sgpr0
|
|
...
|