llvm-project/llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll
Matt Arsenault 056e5a32c8
AMDGPU: Change ABI of 16-bit scalar values for gfx6/gfx7 (#175795)
Keep bf16/f16 values encoded as the low half of a 32-bit register,
instead of promoting to float. This avoids unwanted FP effects
from the fpext/fptrunc which should not be implied by just
passing an argument. This also fixes ABI divergence between
SelectionDAG and GlobalISel.

I've wanted to make this change for ages, and failed the last
few times. The main complication was the hack to return
shader integer types in SGPRs, which now needs to inspect
the underlying IR type.
2026-01-22 18:34:06 +00:00

14073 lines
731 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
define bfloat @v_minimumnum_bf16(bfloat %x, bfloat %y) {
; GFX7-LABEL: v_minimumnum_bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: s_movk_i32 s4, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s4, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX900-NEXT: s_movk_i32 s4, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s4, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: s_movk_i32 s0, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s0, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v2
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v1.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v1.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call bfloat @llvm.minimumnum.bf16(bfloat %x, bfloat %y)
ret bfloat %result
}
define bfloat @v_minimumnum_bf16_nnan(bfloat %x, bfloat %y) {
; GFX7-LABEL: v_minimumnum_bf16_nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: s_movk_i32 s4, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s4, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_bf16_nnan:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_movk_i32 s4, 0x8000
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0
; GFX900-NEXT: v_cmp_lt_f32_e64 s[4:5], v3, v2
; GFX900-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[4:5]
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX900-NEXT: v_cmp_eq_f32_e64 s[4:5], 0, v2
; GFX900-NEXT: s_and_b64 vcc, s[4:5], vcc
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_bf16_nnan:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: s_movk_i32 s0, 0x8000
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s0, v0
; GFX950-NEXT: v_cmp_lt_f32_e64 s[0:1], v3, v2
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, v0, s[0:1]
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e64 s[0:1], 0, v2
; GFX950-NEXT: s_and_b64 vcc, s[0:1], vcc
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_bf16_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v2
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_bf16_nnan:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v0.l, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_bf16_nnan:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_bf16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.h, v0.l
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v0.l, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_bf16_nnan:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call nnan bfloat @llvm.minimumnum.bf16(bfloat %x, bfloat %y)
ret bfloat %result
}
define <2 x bfloat> @v_minimumnum_v2bf16(<2 x bfloat> %x, <2 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v2bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v2, v3, v2
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v0, v2, v0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v2bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX8-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v2bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX900-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v2bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX950-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v2, v0, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX10-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cndmask_b32_sdwa v6, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v6
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v4
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v2bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v3, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v1.h, v2.h, s0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v3
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.l, v1.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v4.h, s2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v2
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v4.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v2bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v4, v3 :: v_dual_lshlrev_b32 v5, 16, v0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v1, v0 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v2bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v3, v3
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v1.h, v2.h, s0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.l, v1.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v4.h, s2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v4.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v2bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v2, v4, v3 :: v_dual_lshlrev_b32 v5, 16, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v1, v1, v0 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %x, <2 x bfloat> %y)
ret <2 x bfloat> %result
}
define <2 x bfloat> @v_minimumnum_v2bf16_nnan(<2 x bfloat> %x, <2 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v2bf16_nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_min_f32_e32 v2, v3, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_alignbit_b32 v0, v0, v2, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v2bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_cndmask_b32_sdwa v1, v1, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_sdwa v0, v1, v0, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v2bf16_nnan:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX900-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX900-NEXT: v_cndmask_b32_sdwa v1, v1, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v2bf16_nnan:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX950-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_sdwa v1, v1, v0, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v0, v2, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2bf16_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
; GFX10-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v5, v4
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX10-NEXT: v_cndmask_b32_e64 v3, v1, v0, s4
; GFX10-NEXT: v_cndmask_b32_sdwa v1, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v4
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v5
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v2bf16_nnan:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v4, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v0.l, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.h, v0.h, s1
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v2bf16_nnan:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v7, v6 :: v_dual_lshlrev_b32 v3, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v2bf16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v4, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v0.l, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.h, v0.h, s1
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v2bf16_nnan:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v2, v7, v6 :: v_dual_lshlrev_b32 v3, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call nnan <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %x, <2 x bfloat> %y)
ret <2 x bfloat> %result
}
define <3 x bfloat> @v_minimumnum_v3bf16(<3 x bfloat> %x, <3 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v3bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v3, v4, v3
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_alignbit_b32 v0, v3, v0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v3bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX8-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v3bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX900-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v4, v0, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v3bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX950-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v4, v0, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v6, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v6, v0, v2, s4
; GFX10-NEXT: v_cndmask_b32_sdwa v0, v0, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s7, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v9, v5
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v8
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v1, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s7
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v3bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v4.h, s0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v3.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v5.h, v4.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.h, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, v4.h, v0.h, s0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v3bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v6, v5 :: v_dual_and_b32 v7, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_lshlrev_b32 v6, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v4 :: v_dual_lshlrev_b32 v8, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v7, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX11-FAKE16-NEXT: s_and_b32 s0, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v3bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v3.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v5.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.l, v4.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v3bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v6, v5 :: v_dual_and_b32 v7, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_lshlrev_b32 v6, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v4 :: v_dual_lshlrev_b32 v8, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v7, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX12-FAKE16-NEXT: s_and_b32 s0, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x bfloat> @llvm.minimumnum.v3bf16(<3 x bfloat> %x, <3 x bfloat> %y)
ret <3 x bfloat> %result
}
define <3 x bfloat> @v_minimumnum_v3bf16_nnan(<3 x bfloat> %x, <3 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v3bf16_nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_min_f32_e32 v3, v4, v3
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_alignbit_b32 v0, v0, v3, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v3bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v3bf16_nnan:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX900-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v3bf16_nnan:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX950-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3bf16_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX10-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v9
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX10-NEXT: v_cndmask_b32_e32 v5, v10, v9, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v7
; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v9, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1
; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-NEXT: s_and_b32 vcc_lo, s7, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v3bf16_nnan:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.l, vcc_lo
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.l, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.l, s1
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.h, v0.l, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.h, v0.h, s1
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v4.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v3bf16_nnan:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v9
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v10, v9 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v4
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v5, v9 :: v_dual_lshlrev_b32 v7, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v7
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s3, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v3bf16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.l, vcc_lo
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX12-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.l, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.l, s1
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.h, v0.l, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.h, v0.h, s1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v4.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v3bf16_nnan:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v10, v9 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v4
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v2, v5, v9 :: v_dual_lshlrev_b32 v7, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v7
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s3, vcc_lo
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call nnan <3 x bfloat> @llvm.minimumnum.v3bf16(<3 x bfloat> %x, <3 x bfloat> %y)
ret <3 x bfloat> %result
}
define <4 x bfloat> @v_minimumnum_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v4bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v4, v5, v4
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v3, v5, v3
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_alignbit_b32 v0, v3, v0, 16
; GFX7-NEXT: v_alignbit_b32 v1, v4, v1, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v4bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX8-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v4bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX900-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX900-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v5, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v4, v1, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v4bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX950-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v4, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v3
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX10-NEXT: v_cndmask_b32_sdwa v11, v1, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v11
; GFX10-NEXT: v_cndmask_b32_e32 v6, v8, v4, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v6
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v10, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v6, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v2
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v7, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v8, v13
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v1, s5
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v0, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v6
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v8
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v9
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s7, s8
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v5, v1, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v4bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v1.h, v3.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v5, v5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v0.h, v2.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 16, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v8, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v1.l, v3.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v5.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v3.l, v7.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v6
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v7.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v6.h, v7.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v2.l, v0.h, s2
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v6.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v4
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v4bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc_lo
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v9, v8 :: v_dual_lshlrev_b32 v13, 16, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v4
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_lshlrev_b32 v13, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v11, v11
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v8, v9
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v6, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v11, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v5
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v4bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v1.h, v3.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v5, v5
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v0.h, v2.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 16, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v8, v8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v1.l, v3.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v5.h, v6.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v3.l, v7.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v6
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v7.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.l, v6.h, v7.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v2.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v6.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v4
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v4bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc_lo
; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v6, v9, v8 :: v_dual_lshlrev_b32 v13, 16, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v4
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_lshlrev_b32 v13, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v11, v11
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v12
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v8, v9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s1
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v6, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v11, v10
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v5, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v5
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y)
ret <4 x bfloat> %result
}
define <4 x bfloat> @v_minimumnum_v4bf16_nnan(<4 x bfloat> %x, <4 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v4bf16_nnan:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
; GFX7-NEXT: v_min_f32_e32 v4, v5, v4
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_min_f32_e32 v3, v5, v3
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_alignbit_b32 v0, v0, v3, 16
; GFX7-NEXT: v_alignbit_b32 v1, v1, v4, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v4bf16_nnan:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v3
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
; GFX8-NEXT: v_cndmask_b32_sdwa v5, v3, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v4
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v6
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v4bf16_nnan:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX900-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX900-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4
; GFX900-NEXT: v_perm_b32 v1, v1, v4, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v4bf16_nnan:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX950-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX950-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX950-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX950-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v0, v3, s0
; GFX950-NEXT: v_perm_b32 v1, v1, v4, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4bf16_nnan:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX10-NEXT: v_and_b32_e32 v11, 0xffff0000, v2
; GFX10-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v3
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v9, v8
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc_lo
; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v2
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v0, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v12, v11
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v7, v6
; GFX10-NEXT: v_cndmask_b32_e64 v8, v14, v13, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s5
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v8
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v13
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v5
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v4
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cndmask_b32_e32 v2, v8, v13, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s7, s8
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v4bf16_nnan:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.l, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.l
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v6, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.l, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v1.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v4.h, v1.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.l, s1
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.h, v0.l, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.h, v0.h, s1
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v4.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v4bf16_nnan:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX11-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v9, v8
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v13
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v12, v11
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v8, v14, v13, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v4
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v1 :: v_dual_and_b32 v6, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v7, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v8
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v8, v13, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v4bf16_nnan:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.l, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v6, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v1.h
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.l, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v1.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX12-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v4.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.l, s1
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e64 s1, v5, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v4.h, v0.l, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.h, v0.h, s1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v4.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v4bf16_nnan:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX12-FAKE16-NEXT: v_and_b32_e32 v11, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v9, v8
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v12, v11
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v8, v14, v13, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v1, v4, v1 :: v_dual_and_b32 v6, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v7, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v5, s1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v8, v13, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call nnan <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y)
ret <4 x bfloat> %result
}
define <6 x bfloat> @v_minimumnum_v6bf16(<6 x bfloat> %x, <6 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v6bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_min_f32_e32 v6, v7, v6
; GFX7-NEXT: v_min_f32_e32 v2, v2, v5
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v5, v7, v5
; GFX7-NEXT: v_min_f32_e32 v1, v1, v4
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v4, v7, v4
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_min_f32_e32 v0, v0, v3
; GFX7-NEXT: v_alignbit_b32 v0, v4, v0, 16
; GFX7-NEXT: v_alignbit_b32 v1, v5, v1, 16
; GFX7-NEXT: v_alignbit_b32 v2, v6, v2, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v6bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_and_b32_e32 v9, 0xffff0000, v5
; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v8, v9
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v6
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff0000, v4
; GFX8-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v9, v10
; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v7
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
; GFX8-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3
; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX8-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v8
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v9, v5
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v8
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v7
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v6
; GFX8-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v6bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_and_b32_e32 v9, 0xffff0000, v5
; GFX900-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v8, v9
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v6
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX900-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v4
; GFX900-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX900-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v9, v10
; GFX900-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v7
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
; GFX900-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v3
; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
; GFX900-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v8
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v9, v5
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v8, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v7, v1, s4
; GFX900-NEXT: v_perm_b32 v2, v6, v2, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v6bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_and_b32_e32 v9, 0xffff0000, v5
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v6
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v8, v9
; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v4
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v8
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX950-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc
; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v8, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v7
; GFX950-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v9, v10
; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v8, v8, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v3
; GFX950-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
; GFX950-NEXT: v_and_b32_e32 v8, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v8
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v2
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v9, v5
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v4
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v8, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v7, v1, s0
; GFX950-NEXT: v_perm_b32 v2, v6, v2, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v6bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v1
; GFX10-NEXT: v_and_b32_e32 v8, 0xffff0000, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v4
; GFX10-NEXT: v_and_b32_e32 v11, 0xffff0000, v4
; GFX10-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v3
; GFX10-NEXT: v_cndmask_b32_sdwa v14, v2, v7, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v0
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v14
; GFX10-NEXT: v_cndmask_b32_e32 v9, v10, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v14
; GFX10-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v9
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v14, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v7
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v16, 16, v6
; GFX10-NEXT: v_cndmask_b32_e32 v12, v15, v13, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v12
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v14, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v7
; GFX10-NEXT: v_cndmask_b32_e32 v8, v13, v12, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v15, v16
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v8
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v11, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s5
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v15
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v14, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v11, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v10
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v0
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v9, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v2
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v4, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v10, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v3, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v9, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v1, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v11, v11
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v0, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v10, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v2, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v10, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v1, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v13, v11
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v15, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v0, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v12
; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v2, s5
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v3
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v10
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v11
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s7, s8
; GFX10-NEXT: v_perm_b32 v1, v6, v1, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v8, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v2, v7, v2, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v6bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v4
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v2.h, v5.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v11, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v5.h, v8.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v8.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v8.h, s0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v8.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v1.h, v4.h, s1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v4.h, v8.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v8.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v7
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v8.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v0.h, v3.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v3.h, v8.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v8.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v9
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v8.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v8.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.h, v2.l, v5.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v9.h, v8.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v5.l, v10.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v9, v9
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v10.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v4.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.l, v8.h, v10.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v4.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v3.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v8.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.l, v8.h, v1.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v3.l, v0.h, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v7
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, v8.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, v6
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v6bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v4
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v10, v9 :: v_dual_and_b32 v11, 0xffff0000, v4
; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v8 :: v_dual_lshlrev_b32 v12, 16, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v10, v15, v14 :: v_dual_lshlrev_b32 v13, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v10
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v12, v14, v10 :: v_dual_lshlrev_b32 v15, 16, v9
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v15
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v11
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v11, v12, v10, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v15
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v9, v8 :: v_dual_lshlrev_b32 v7, 16, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v4, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v3, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v1, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v2, s0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v9, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v1, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v13, v12
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v15, v14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v2, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v10, v11, v10 :: v_dual_lshlrev_b32 v11, 16, v5
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v11
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v8, v1, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v10, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v2, v6, v2, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v6bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v5
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v4
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v3
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, v8.l
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v2.h, v5.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v11, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v5.h, v8.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v8.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v8.h, s0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v8.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v1.h, v4.h, s1
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v9.l, v8.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v4.h, v8.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v8.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v7
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v8.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v0.h, v3.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v3.h, v8.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v8.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v8.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v10.l, v8.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.h, v2.l, v5.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v9.h, v8.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v5.l, v10.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v10, v8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v9, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v10.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v4.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v8.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.l, v8.h, v10.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v4.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v3.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v8.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.l, v8.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v3.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v7
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.l, v8.h, v0.h, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v6
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v6bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v6, v8, v7, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v8, v10, v9 :: v_dual_and_b32 v11, 0xffff0000, v4
; GFX12-FAKE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v8 :: v_dual_lshlrev_b32 v12, 16, v6
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v10, v15, v14 :: v_dual_lshlrev_b32 v13, 16, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v12, v14, v10 :: v_dual_lshlrev_b32 v15, 16, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v14
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v11, v12, v10, s1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v15
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v8, v9, v8 :: v_dual_lshlrev_b32 v7, 16, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v4, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v3, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v1, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v7, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v2, s0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v9, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v1, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v13, v12
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v15, v14
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v2, s1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v10, v11, v10 :: v_dual_lshlrev_b32 v11, 16, v5
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v11
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v8, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v10, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v2, v6, v2, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <6 x bfloat> @llvm.minimumnum.v6bf16(<6 x bfloat> %x, <6 x bfloat> %y)
ret <6 x bfloat> %result
}
define <8 x bfloat> @v_minimumnum_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v8bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v7
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_min_f32_e32 v8, v9, v8
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v6
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_min_f32_e32 v7, v9, v7
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
; GFX7-NEXT: v_and_b32_e32 v6, 0xffff0000, v5
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v6, v9, v6
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v4
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v5, v9, v5
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
; GFX7-NEXT: v_alignbit_b32 v0, v5, v0, 16
; GFX7-NEXT: v_alignbit_b32 v1, v6, v1, 16
; GFX7-NEXT: v_alignbit_b32 v2, v7, v2, 16
; GFX7-NEXT: v_alignbit_b32 v3, v8, v3, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v8bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX8-NEXT: v_and_b32_e32 v11, 0xffff0000, v7
; GFX8-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v8
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX8-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v6
; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_and_b32_e32 v12, 0xffff0000, v6
; GFX8-NEXT: v_cndmask_b32_e32 v9, v11, v10, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v11, v12
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v9
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
; GFX8-NEXT: v_and_b32_e32 v10, 0xffff0000, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX8-NEXT: v_and_b32_e32 v13, 0xffff0000, v5
; GFX8-NEXT: v_cndmask_b32_e32 v10, v12, v11, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v12, v13
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v10
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; GFX8-NEXT: v_and_b32_e32 v11, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX8-NEXT: v_and_b32_e32 v14, 0xffff0000, v4
; GFX8-NEXT: v_cndmask_b32_e32 v11, v13, v12, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v13, v14
; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v11
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v11, v12, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v12, v7
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v11
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v10
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v9
; GFX8-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v8
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v8bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX900-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v7
; GFX900-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v8
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX900-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v10, 16, v6
; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_and_b32_e32 v12, 0xffff0000, v6
; GFX900-NEXT: v_cndmask_b32_e32 v9, v11, v10, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX900-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v11, v12
; GFX900-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v9
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v1
; GFX900-NEXT: v_lshrrev_b32_e32 v11, 16, v5
; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX900-NEXT: v_and_b32_e32 v13, 0xffff0000, v5
; GFX900-NEXT: v_cndmask_b32_e32 v10, v12, v11, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX900-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v12, v13
; GFX900-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v10
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX900-NEXT: v_and_b32_e32 v14, 0xffff0000, v4
; GFX900-NEXT: v_cndmask_b32_e32 v11, v13, v12, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX900-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v13, v14
; GFX900-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v11
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v11, v12, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v12, v7
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v11, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v10, v1, s4
; GFX900-NEXT: v_perm_b32 v2, v9, v2, s4
; GFX900-NEXT: v_perm_b32 v3, v8, v3, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v8bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX950-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v7
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v8
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v8
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v10, v11
; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v2
; GFX950-NEXT: v_and_b32_e32 v12, 0xffff0000, v6
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v10, 16, v6
; GFX950-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
; GFX950-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_and_b32_e32 v13, 0xffff0000, v5
; GFX950-NEXT: v_and_b32_e32 v14, 0xffff0000, v4
; GFX950-NEXT: v_cndmask_b32_e32 v9, v11, v10, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v9
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v9
; GFX950-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v11, v12
; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v10, v10, v9, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v11, 16, v5
; GFX950-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc
; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v10, v12, v11, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v10
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v10
; GFX950-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v12, v13
; GFX950-NEXT: v_lshrrev_b32_e32 v13, 16, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v12, 16, v4
; GFX950-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v11, v13, v12, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v11
; GFX950-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v13, v14
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v12, v12, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v11, v12, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v3
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v7
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v2
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v12, v7
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v11, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v10, v1, s0
; GFX950-NEXT: v_perm_b32 v2, v9, v2, s0
; GFX950-NEXT: v_perm_b32 v3, v8, v3, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v8bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX10-NEXT: v_and_b32_e32 v11, 0xffff0000, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX10-NEXT: v_lshrrev_b32_e32 v13, 16, v2
; GFX10-NEXT: v_and_b32_e32 v14, 0xffff0000, v7
; GFX10-NEXT: v_lshrrev_b32_e32 v16, 16, v0
; GFX10-NEXT: v_and_b32_e32 v17, 0xffff0000, v4
; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_and_b32_e32 v11, 0xffff0000, v6
; GFX10-NEXT: v_cndmask_b32_e32 v10, v13, v12, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v10
; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_cndmask_b32_e32 v11, v12, v10, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v11
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v12
; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v15
; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v15, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v9
; GFX10-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX10-NEXT: v_and_b32_e32 v12, 0xffff0000, v1
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v13
; GFX10-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo
; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX10-NEXT: v_and_b32_e32 v13, 0xffff0000, v0
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX10-NEXT: v_and_b32_e32 v12, 0xffff0000, v5
; GFX10-NEXT: v_cndmask_b32_e32 v9, v9, v14, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v12, v12
; GFX10-NEXT: v_cndmask_b32_e32 v13, v16, v15, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v12, v14, v9, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v17, v17
; GFX10-NEXT: v_lshlrev_b32_e32 v16, 16, v9
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v18, 16, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v17, 16, v12
; GFX10-NEXT: v_cndmask_b32_e64 v14, v15, v13, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v3
; GFX10-NEXT: s_and_b32 vcc_lo, s5, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v13
; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v19, 16, v14
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v15, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v7
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v7, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v16, v17
; GFX10-NEXT: v_lshlrev_b32_e32 v17, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v9, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v18, v19
; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v13, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v15, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v14
; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v3, s4
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v15
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v16, 16, v7
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v3
; GFX10-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v17, v16
; GFX10-NEXT: v_cndmask_b32_e32 v12, v14, v13, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX10-NEXT: v_cndmask_b32_e64 v7, v7, v3, s7
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX10-NEXT: v_cmp_eq_u16_e64 s10, 0x8000, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v16, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v2
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v17, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v16, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v6
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v18, v17
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v11
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX10-NEXT: s_and_b32 s5, s5, s6
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX10-NEXT: v_cndmask_b32_e64 v2, v6, v2, s5
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v13
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s9, 0, v14
; GFX10-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
; GFX10-NEXT: s_and_b32 s5, s7, s8
; GFX10-NEXT: v_perm_b32 v2, v10, v2, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s5
; GFX10-NEXT: s_and_b32 s5, s9, s10
; GFX10-NEXT: v_perm_b32 v3, v8, v3, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s5
; GFX10-NEXT: v_perm_b32 v0, v12, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v9, v1, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v8bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v7
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v11.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v3.h, v7.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v9.l, v11.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v7.h, v11.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v11.h, s0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v2.h, v6.h, s1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v6.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v10, v10
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, v11.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v9
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v9
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v11.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v1.h, v5.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff0000, v4
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.h, v5.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v12, v12
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v12.l, v11.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.h, v10.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v10
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.h, v10.h, v11.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v0.h, v4.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v12.h, v4.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v11.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v12
; GFX11-TRUE16-NEXT: v_cndmask_b16 v12.h, v12.h, v11.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v13.l, v11.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v12
; GFX11-TRUE16-NEXT: v_cndmask_b16 v13.h, v3.l, v7.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v12.h, v11.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v7.l, v13.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v13.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v11
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v12, v12
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v13.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v6.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v11.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.l, v11.h, v13.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v6.l, v2.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v5.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.l, v11.h, v2.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v5.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v11
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, v9
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v4.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.l, v11.h, v1.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v4.l, v0.h, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v10
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v11
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v11
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, v11.h, v0.h, s0
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v3 :: v_dual_mov_b32 v3, v8
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v8bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v8, v10, v9 :: v_dual_and_b32 v11, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff0000, v7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v10, v13, v12 :: v_dual_and_b32 v11, 0xffff0000, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v8 :: v_dual_lshlrev_b32 v14, 16, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v11, v12, v10 :: v_dual_lshlrev_b32 v12, 16, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v11
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v9
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v15
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v5
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX11-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff0000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v14 :: v_dual_and_b32 v12, 0xffff0000, v5
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v16, v15, vcc_lo
; GFX11-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v12, v14, v9, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v9
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v10
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v13
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v17, v17
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v12
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v13
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v14, v15, v13, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v10, v11, v10 :: v_dual_lshlrev_b32 v15, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v14
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v16, v17
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v9, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v18, v19
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v13, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v3, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v15
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v9, v12, v9 :: v_dual_lshlrev_b32 v16, 16, v7
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v12, v14, v13, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v17, v16
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v3, s3
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v1 :: v_dual_lshlrev_b32 v16, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v11
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v6, v2 :: v_dual_lshlrev_b32 v17, 16, v5
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v16, v15
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v4, v0 :: v_dual_lshlrev_b32 v11, 16, v6
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v18, v17
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v6, v2, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v13
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v14
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX11-FAKE16-NEXT: v_perm_b32 v2, v10, v2, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v4, v0, s1
; GFX11-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v8, v3, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v5, v1, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v12, v0, 0x5040100
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v9, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v8bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v7
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v9, v9
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v11.l
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v3.h, v7.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v9.l, v11.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v7.h, v11.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v11.h, s0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v6
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v2.h, v6.h, s1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v1
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v6.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v10, v10
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v10.l, v11.l
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v9
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v9.h, v11.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v1.h, v5.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v13, 0xffff0000, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.h, v5.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v12, v12
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v12.l, v11.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.h, v10.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v11.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v10
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.h, v10.h, v11.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v0.h, v4.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v12.h, v4.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v11.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v11, v12
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v12.h, v12.h, v11.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v13.l, v11.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v12
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v13.h, v3.l, v7.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v12.h, v11.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v7.l, v13.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v12, 16, v6
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v13.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v11
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v12, v12
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v13.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v6.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v11.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.l, v11.h, v13.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v6.l, v2.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v5.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v11.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.l, v11.h, v2.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v5.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v4
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v11
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v9
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v4.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.l, v11.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v4.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v10
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v11
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v11.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v11
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.l, v11.h, v0.h, s0
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, v3 :: v_dual_mov_b32 v3, v8
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v8bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v7
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v13, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v16, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v8, v10, v9 :: v_dual_and_b32 v11, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_and_b32_e32 v14, 0xffff0000, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v10, v13, v12 :: v_dual_and_b32 v11, 0xffff0000, v6
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v8 :: v_dual_lshlrev_b32 v14, 16, v10
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v11, v12, v10 :: v_dual_lshlrev_b32 v12, 16, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v13, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v9, v9, v8, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v11
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v9
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v15
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v14, 16, v5
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v15, 16, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v10, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v8
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc_lo
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v12, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX12-FAKE16-NEXT: v_and_b32_e32 v13, 0xffff0000, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v9, v9, v14 :: v_dual_and_b32 v12, 0xffff0000, v5
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v12, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v13, v16, v15, vcc_lo
; GFX12-FAKE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v12, v14, v9, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v16, 16, v9
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v10
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v13
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v17, v17
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v12
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v13
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, vcc_lo
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v14, v15, v13, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v10, v11, v10 :: v_dual_lshlrev_b32 v15, 16, v3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v14
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v7, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v16, v17
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v17, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v9, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v18, v19
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v13, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v14
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v3, s0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v15
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v9, v12, v9 :: v_dual_lshlrev_b32 v16, 16, v7
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v12, v14, v13, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v17, v16
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v3, s3
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v11, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v1 :: v_dual_lshlrev_b32 v16, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v14, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v6, v6, v2 :: v_dual_lshlrev_b32 v17, 16, v5
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v16, v15
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v4, v0 :: v_dual_lshlrev_b32 v11, 16, v6
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v18, v17
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v6, v2, s1
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v13
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v14
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX12-FAKE16-NEXT: v_perm_b32 v2, v10, v2, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v4, v0, s1
; GFX12-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX12-FAKE16-NEXT: v_perm_b32 v3, v8, v3, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v5, v1, s1
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v12, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v9, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %x, <8 x bfloat> %y)
ret <8 x bfloat> %result
}
define <16 x bfloat> @v_minimumnum_v16bf16(<16 x bfloat> %x, <16 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v16bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v16, 0xffff0000, v15
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_min_f32_e32 v16, v17, v16
; GFX7-NEXT: v_min_f32_e32 v7, v7, v15
; GFX7-NEXT: v_and_b32_e32 v15, 0xffff0000, v14
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_min_f32_e32 v15, v17, v15
; GFX7-NEXT: v_min_f32_e32 v6, v6, v14
; GFX7-NEXT: v_and_b32_e32 v14, 0xffff0000, v13
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_min_f32_e32 v14, v17, v14
; GFX7-NEXT: v_min_f32_e32 v5, v5, v13
; GFX7-NEXT: v_and_b32_e32 v13, 0xffff0000, v12
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_min_f32_e32 v13, v17, v13
; GFX7-NEXT: v_min_f32_e32 v4, v4, v12
; GFX7-NEXT: v_and_b32_e32 v12, 0xffff0000, v11
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_min_f32_e32 v12, v17, v12
; GFX7-NEXT: v_min_f32_e32 v3, v3, v11
; GFX7-NEXT: v_and_b32_e32 v11, 0xffff0000, v10
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_min_f32_e32 v11, v17, v11
; GFX7-NEXT: v_min_f32_e32 v2, v2, v10
; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v9
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v10, v17, v10
; GFX7-NEXT: v_min_f32_e32 v1, v1, v9
; GFX7-NEXT: v_and_b32_e32 v9, 0xffff0000, v8
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v9, v17, v9
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v16
; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v15
; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v14
; GFX7-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v12
; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; GFX7-NEXT: v_min_f32_e32 v0, v0, v8
; GFX7-NEXT: v_alignbit_b32 v0, v9, v0, 16
; GFX7-NEXT: v_alignbit_b32 v1, v10, v1, 16
; GFX7-NEXT: v_alignbit_b32 v2, v11, v2, 16
; GFX7-NEXT: v_alignbit_b32 v3, v12, v3, 16
; GFX7-NEXT: v_alignbit_b32 v4, v13, v4, 16
; GFX7-NEXT: v_alignbit_b32 v5, v14, v5, 16
; GFX7-NEXT: v_alignbit_b32 v6, v15, v6, 16
; GFX7-NEXT: v_alignbit_b32 v7, v16, v7, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v16bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v16, v16
; GFX8-NEXT: v_lshrrev_b32_e32 v16, 16, v15
; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v7
; GFX8-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX8-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v16
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v18, v19
; GFX8-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v16
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v17
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX8-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v14
; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v6
; GFX8-NEXT: v_and_b32_e32 v20, 0xffff0000, v14
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v17, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v17
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v19, v20
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v18
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX8-NEXT: v_and_b32_e32 v18, 0xffff0000, v5
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX8-NEXT: v_lshrrev_b32_e32 v18, 16, v13
; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v5
; GFX8-NEXT: v_and_b32_e32 v21, 0xffff0000, v13
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v20, v21
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v18
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v19
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX8-NEXT: v_and_b32_e32 v19, 0xffff0000, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v12
; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v4
; GFX8-NEXT: v_and_b32_e32 v22, 0xffff0000, v12
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v19, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v19
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v21, v22
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v20
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX8-NEXT: v_and_b32_e32 v20, 0xffff0000, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX8-NEXT: v_lshrrev_b32_e32 v20, 16, v11
; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v3
; GFX8-NEXT: v_and_b32_e32 v23, 0xffff0000, v11
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v20, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v20
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v22, v23
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v21
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX8-NEXT: v_and_b32_e32 v21, 0xffff0000, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v10
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v2
; GFX8-NEXT: v_and_b32_e32 v24, 0xffff0000, v10
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v21, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v21
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v23, v24
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v22
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX8-NEXT: v_and_b32_e32 v22, 0xffff0000, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX8-NEXT: v_lshrrev_b32_e32 v22, 16, v9
; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v1
; GFX8-NEXT: v_and_b32_e32 v25, 0xffff0000, v9
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v22, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v22
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v24, v25
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v23
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX8-NEXT: v_and_b32_e32 v23, 0xffff0000, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v8
; GFX8-NEXT: v_lshrrev_b32_e32 v24, 16, v0
; GFX8-NEXT: v_and_b32_e32 v26, 0xffff0000, v8
; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v25, v26
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v24
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v7
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v24, v15
; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v15
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v6
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX8-NEXT: v_lshlrev_b32_e32 v15, 16, v5
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v15, v14
; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v14
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX8-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v14, v13
; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v4
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX8-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX8-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v12, v11
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX8-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v11, v10
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v23
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v22
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v21
; GFX8-NEXT: v_or_b32_sdwa v2, v2, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v20
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v19
; GFX8-NEXT: v_or_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v18
; GFX8-NEXT: v_or_b32_sdwa v5, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v17
; GFX8-NEXT: v_or_b32_sdwa v6, v6, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v16
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v16bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v16, v16
; GFX900-NEXT: v_lshrrev_b32_e32 v16, 16, v15
; GFX900-NEXT: v_lshrrev_b32_e32 v17, 16, v7
; GFX900-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX900-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v16
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v18, v19
; GFX900-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v16
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v17
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
; GFX900-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX900-NEXT: v_lshrrev_b32_e32 v17, 16, v14
; GFX900-NEXT: v_lshrrev_b32_e32 v18, 16, v6
; GFX900-NEXT: v_and_b32_e32 v20, 0xffff0000, v14
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v17, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v17
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v19, v20
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v18
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v18, vcc
; GFX900-NEXT: v_and_b32_e32 v18, 0xffff0000, v5
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX900-NEXT: v_lshrrev_b32_e32 v18, 16, v13
; GFX900-NEXT: v_lshrrev_b32_e32 v19, 16, v5
; GFX900-NEXT: v_and_b32_e32 v21, 0xffff0000, v13
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v20, v21
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v18
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v19
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
; GFX900-NEXT: v_and_b32_e32 v19, 0xffff0000, v4
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX900-NEXT: v_lshrrev_b32_e32 v19, 16, v12
; GFX900-NEXT: v_lshrrev_b32_e32 v20, 16, v4
; GFX900-NEXT: v_and_b32_e32 v22, 0xffff0000, v12
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v19, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v19
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v21, v22
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v20
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v20, vcc
; GFX900-NEXT: v_and_b32_e32 v20, 0xffff0000, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX900-NEXT: v_lshrrev_b32_e32 v20, 16, v11
; GFX900-NEXT: v_lshrrev_b32_e32 v21, 16, v3
; GFX900-NEXT: v_and_b32_e32 v23, 0xffff0000, v11
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v20, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v20
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v22, v23
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v21
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v21, vcc
; GFX900-NEXT: v_and_b32_e32 v21, 0xffff0000, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX900-NEXT: v_lshrrev_b32_e32 v21, 16, v10
; GFX900-NEXT: v_lshrrev_b32_e32 v22, 16, v2
; GFX900-NEXT: v_and_b32_e32 v24, 0xffff0000, v10
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v21, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v21
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v23, v24
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v22
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v22, vcc
; GFX900-NEXT: v_and_b32_e32 v22, 0xffff0000, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX900-NEXT: v_lshrrev_b32_e32 v22, 16, v9
; GFX900-NEXT: v_lshrrev_b32_e32 v23, 16, v1
; GFX900-NEXT: v_and_b32_e32 v25, 0xffff0000, v9
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v22, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v22
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v24, v25
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v23
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v23, vcc
; GFX900-NEXT: v_and_b32_e32 v23, 0xffff0000, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX900-NEXT: v_lshrrev_b32_e32 v23, 16, v8
; GFX900-NEXT: v_lshrrev_b32_e32 v24, 16, v0
; GFX900-NEXT: v_and_b32_e32 v26, 0xffff0000, v8
; GFX900-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v25, v26
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v24
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX900-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX900-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v7
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX900-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX900-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v24, v15
; GFX900-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v15
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v6
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX900-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX900-NEXT: v_lshlrev_b32_e32 v15, 16, v5
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v15, v14
; GFX900-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v14
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX900-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v14, v13
; GFX900-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v4
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX900-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX900-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX900-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX900-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v12, v11
; GFX900-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v2
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX900-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v11, v10
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX900-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX900-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v23, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v22, v1, s4
; GFX900-NEXT: v_perm_b32 v2, v21, v2, s4
; GFX900-NEXT: v_perm_b32 v3, v20, v3, s4
; GFX900-NEXT: v_perm_b32 v4, v19, v4, s4
; GFX900-NEXT: v_perm_b32 v5, v18, v5, s4
; GFX900-NEXT: v_perm_b32 v6, v17, v6, s4
; GFX900-NEXT: v_perm_b32 v7, v16, v7, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v16bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX950-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX950-NEXT: v_lshrrev_b32_e32 v18, 16, v7
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v16, v16
; GFX950-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v16, v18, v17, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v16
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v16
; GFX950-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v18, v19
; GFX950-NEXT: v_lshrrev_b32_e32 v19, 16, v6
; GFX950-NEXT: v_and_b32_e32 v20, 0xffff0000, v14
; GFX950-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v18, 16, v14
; GFX950-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
; GFX950-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX950-NEXT: v_and_b32_e32 v21, 0xffff0000, v13
; GFX950-NEXT: v_and_b32_e32 v22, 0xffff0000, v12
; GFX950-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v17
; GFX950-NEXT: v_cndmask_b32_e32 v18, v18, v17, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v18
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v19, v20
; GFX950-NEXT: v_lshrrev_b32_e32 v20, 16, v5
; GFX950-NEXT: v_and_b32_e32 v23, 0xffff0000, v11
; GFX950-NEXT: v_cndmask_b32_e32 v18, v18, v17, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v19, 16, v13
; GFX950-NEXT: v_cndmask_b32_e32 v17, v18, v17, vcc
; GFX950-NEXT: v_and_b32_e32 v18, 0xffff0000, v5
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX950-NEXT: v_and_b32_e32 v24, 0xffff0000, v10
; GFX950-NEXT: v_and_b32_e32 v25, 0xffff0000, v9
; GFX950-NEXT: v_cndmask_b32_e32 v18, v20, v19, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v18
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v18
; GFX950-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v20, v21
; GFX950-NEXT: v_lshrrev_b32_e32 v21, 16, v4
; GFX950-NEXT: v_and_b32_e32 v26, 0xffff0000, v8
; GFX950-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v20, 16, v12
; GFX950-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
; GFX950-NEXT: v_and_b32_e32 v19, 0xffff0000, v4
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v19, v21, v20, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v19
; GFX950-NEXT: v_cndmask_b32_e32 v20, v20, v19, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v21, v22
; GFX950-NEXT: v_lshrrev_b32_e32 v22, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v20, v20, v19, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v21, 16, v11
; GFX950-NEXT: v_cndmask_b32_e32 v19, v20, v19, vcc
; GFX950-NEXT: v_and_b32_e32 v20, 0xffff0000, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v20, v22, v21, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v20
; GFX950-NEXT: v_cndmask_b32_e32 v21, v21, v20, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v22, v23
; GFX950-NEXT: v_lshrrev_b32_e32 v23, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v21, v21, v20, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v22, 16, v10
; GFX950-NEXT: v_cndmask_b32_e32 v20, v21, v20, vcc
; GFX950-NEXT: v_and_b32_e32 v21, 0xffff0000, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v21, v23, v22, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v21
; GFX950-NEXT: v_cndmask_b32_e32 v22, v22, v21, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v23, v24
; GFX950-NEXT: v_lshrrev_b32_e32 v24, 16, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v22, v22, v21, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v23, 16, v9
; GFX950-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
; GFX950-NEXT: v_and_b32_e32 v22, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v22, v24, v23, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v22
; GFX950-NEXT: v_cndmask_b32_e32 v23, v23, v22, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v24, v25
; GFX950-NEXT: v_lshrrev_b32_e32 v25, 16, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v23, v23, v22, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v24, 16, v8
; GFX950-NEXT: v_cndmask_b32_e32 v22, v23, v22, vcc
; GFX950-NEXT: v_and_b32_e32 v23, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v23, v25, v24, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v23
; GFX950-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v24
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v25, v26
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v23, v24, v23, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v7
; GFX950-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v15, v15, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v15
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v15, 16, v6
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX950-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v6
; GFX950-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v24, v15
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v14, v14, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v15, 16, v14
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v15
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v13, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
; GFX950-NEXT: v_lshlrev_b32_e32 v15, 16, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v15, v14
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v13
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v14
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v4
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v4
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v4
; GFX950-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v14, v13
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v12, v12, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v13
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v11, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
; GFX950-NEXT: v_lshlrev_b32_e32 v13, 16, v3
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v3
; GFX950-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v13, v12
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v12
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v2
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v2
; GFX950-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v12, v11
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v11
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v11, v10
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v9
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v10
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v10, v9
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v8, v8, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v8
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v9
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v23, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v22, v1, s0
; GFX950-NEXT: v_perm_b32 v2, v21, v2, s0
; GFX950-NEXT: v_perm_b32 v3, v20, v3, s0
; GFX950-NEXT: v_perm_b32 v4, v19, v4, s0
; GFX950-NEXT: v_perm_b32 v5, v18, v5, s0
; GFX950-NEXT: v_perm_b32 v6, v17, v6, s0
; GFX950-NEXT: v_perm_b32 v7, v16, v7, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v16bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX10-NEXT: v_lshrrev_b32_e32 v18, 16, v7
; GFX10-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX10-NEXT: v_lshrrev_b32_e32 v20, 16, v6
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
; GFX10-NEXT: v_and_b32_e32 v23, 0xffff0000, v14
; GFX10-NEXT: v_and_b32_e32 v24, 0xffff0000, v13
; GFX10-NEXT: v_and_b32_e32 v26, 0xffff0000, v12
; GFX10-NEXT: v_and_b32_e32 v28, 0xffff0000, v11
; GFX10-NEXT: v_cndmask_b32_e32 v16, v18, v17, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX10-NEXT: v_and_b32_e32 v18, 0xffff0000, v6
; GFX10-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GFX10-NEXT: v_lshrrev_b32_e32 v29, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v16
; GFX10-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX10-NEXT: v_cndmask_b32_e32 v18, v20, v19, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v21, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX10-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v20, 16, v17
; GFX10-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v20
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v19
; GFX10-NEXT: v_and_b32_e32 v20, 0xffff0000, v5
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v21, v22
; GFX10-NEXT: v_lshrrev_b32_e32 v21, 16, v13
; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v20, v20
; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v18, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v16
; GFX10-NEXT: v_cndmask_b32_e64 v20, v22, v21, s5
; GFX10-NEXT: v_lshrrev_b32_e32 v22, 16, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v19
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v24, v24
; GFX10-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v23
; GFX10-NEXT: v_lshrrev_b32_e32 v23, 16, v4
; GFX10-NEXT: v_cndmask_b32_e64 v17, v21, v20, s4
; GFX10-NEXT: v_and_b32_e32 v21, 0xffff0000, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v17
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v21, v21
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v24, v25
; GFX10-NEXT: v_cndmask_b32_e64 v21, v23, v22, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v18
; GFX10-NEXT: v_lshrrev_b32_e32 v24, 16, v11
; GFX10-NEXT: v_lshrrev_b32_e32 v25, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v23, v17, v20, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v26, v26
; GFX10-NEXT: v_and_b32_e32 v17, 0xffff0000, v3
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v21
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v20
; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v21, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v17, v17
; GFX10-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28
; GFX10-NEXT: v_lshlrev_b32_e32 v18, 16, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v22
; GFX10-NEXT: v_cndmask_b32_e64 v25, v25, v24, s5
; GFX10-NEXT: v_lshrrev_b32_e32 v28, 16, v10
; GFX10-NEXT: v_cndmask_b32_e32 v19, v24, v25, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v26, v27
; GFX10-NEXT: v_and_b32_e32 v27, 0xffff0000, v2
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v19
; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v21, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v18
; GFX10-NEXT: v_lshlrev_b32_e32 v18, 16, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v22
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v18, v24
; GFX10-NEXT: v_cndmask_b32_e32 v18, v23, v20, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v21
; GFX10-NEXT: v_cndmask_b32_e64 v24, v19, v25, s5
; GFX10-NEXT: v_and_b32_e32 v19, 0xffff0000, v10
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v27, v27
; GFX10-NEXT: v_lshlrev_b32_e32 v20, 16, v24
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX10-NEXT: v_cndmask_b32_e64 v27, v29, v28, s5
; GFX10-NEXT: v_lshrrev_b32_e32 v29, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v20
; GFX10-NEXT: v_cndmask_b32_e32 v19, v28, v27, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v20, 16, v27
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v26
; GFX10-NEXT: v_and_b32_e32 v26, 0xffff0000, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v28, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v19
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v27
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v20, v23
; GFX10-NEXT: v_and_b32_e32 v23, 0xffff0000, v9
; GFX10-NEXT: v_cndmask_b32_e64 v20, v19, v27, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v26, v26
; GFX10-NEXT: v_cndmask_b32_e32 v19, v22, v21, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v26, v29, v28, s7
; GFX10-NEXT: v_lshrrev_b32_e32 v29, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v22, v28, v26, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_lshrrev_b32_e32 v28, 16, v8
; GFX10-NEXT: v_cndmask_b32_e32 v23, v24, v25, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v21
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX10-NEXT: v_and_b32_e32 v25, 0xffff0000, v0
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v21, v24
; GFX10-NEXT: v_and_b32_e32 v24, 0xffff0000, v8
; GFX10-NEXT: v_cndmask_b32_e32 v20, v20, v27, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v21, v22, v26, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v25, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v21
; GFX10-NEXT: v_cndmask_b32_e64 v22, v29, v28, s5
; GFX10-NEXT: v_lshlrev_b32_e32 v29, 16, v14
; GFX10-NEXT: v_cndmask_b32_e32 v24, v28, v22, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v28, 16, v15
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v22
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v28, v28
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v27
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v28, 16, v6
; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v7, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v26
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v25, v27
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v22, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v28, v28
; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v26, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v28, 16, v24
; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v14, s5
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v27, v25
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX10-NEXT: v_cndmask_b32_e64 v15, v15, v7, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v29, v29
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v22, v24, v22, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v5
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v27, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v13
; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v6, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v25, v25
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v14
; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v13, s7
; GFX10-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v7, v15, v7, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v13
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v5
; GFX10-NEXT: v_perm_b32 v7, v16, v7, 0x5040100
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v15, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v12, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v25, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v11
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v11, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v26, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v27, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v5, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v15, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v13
; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v4, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v25, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v15
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v12
; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v3, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v25, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v11
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v4
; GFX10-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v12, v12, v4, s7
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v27, v26
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v15, v15
; GFX10-NEXT: v_cndmask_b32_e32 v5, v13, v5, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v12
; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v3, s7
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v10, s6
; GFX10-NEXT: v_perm_b32 v5, v18, v5, 0x5040100
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v2
; GFX10-NEXT: v_cmp_eq_u16_e64 s10, 0x8000, v2
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v14, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v9
; GFX10-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc_lo
; GFX10-NEXT: v_perm_b32 v6, v17, v6, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v9, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v13, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v10
; GFX10-NEXT: v_perm_b32 v4, v19, v4, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v8, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v14, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v1, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v15, v15
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v0, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v13, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v2, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v14, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v25, 16, v10
; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v1, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v24, v15
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v26, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v0, s6
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v3
; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v2, s7
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v8
; GFX10-NEXT: s_and_b32 s5, s5, s6
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s5
; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v13
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v14
; GFX10-NEXT: v_perm_b32 v3, v23, v3, 0x5040100
; GFX10-NEXT: v_cmp_eq_f32_e64 s9, 0, v11
; GFX10-NEXT: s_and_b32 s5, s5, s6
; GFX10-NEXT: v_cndmask_b32_e64 v1, v9, v1, s5
; GFX10-NEXT: s_and_b32 s5, s7, s8
; GFX10-NEXT: v_cndmask_b32_e64 v0, v8, v0, s5
; GFX10-NEXT: s_and_b32 s5, s9, s10
; GFX10-NEXT: v_perm_b32 v1, v21, v1, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s5
; GFX10-NEXT: v_perm_b32 v0, v22, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v2, v20, v2, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v16bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v16, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v23.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff0000, v14
; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v13
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v16
; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v11
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v17, v17
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v17.l, v23.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff0000, v10
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v15
; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v25, 0xffff0000, v8
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v15
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v16.h, v15.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v26, v26
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v15.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v7
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v6.h, v14.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff0000, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v17.h, v14.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v18, v18
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v18.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v17
; GFX11-TRUE16-NEXT: v_cndmask_b16 v17.h, v17.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v17
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX11-TRUE16-NEXT: v_cndmask_b16 v17.h, v17.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v5.h, v13.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v4
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v18.h, v13.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v19, v19
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v19.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v18
; GFX11-TRUE16-NEXT: v_cndmask_b16 v18.h, v18.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v18
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v18.h, v18.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v12
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v4.h, v12.h, s1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20
; GFX11-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v19.h, v12.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v20, v20
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v20.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v19
; GFX11-TRUE16-NEXT: v_cndmask_b16 v19.h, v19.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v19
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21
; GFX11-TRUE16-NEXT: v_cndmask_b16 v19.h, v19.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v3.h, v11.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v20.h, v11.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v21, v21
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v21.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v20
; GFX11-TRUE16-NEXT: v_cndmask_b16 v20.h, v20.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v20
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22
; GFX11-TRUE16-NEXT: v_cndmask_b16 v20.h, v20.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v2.h, v10.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff0000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v21.h, v10.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v22, v22
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v22.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v21
; GFX11-TRUE16-NEXT: v_cndmask_b16 v21.h, v21.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v21
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-TRUE16-NEXT: v_cndmask_b16 v21.h, v21.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v1.h, v9.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v22.h, v9.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v24, v24
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v24.l, v23.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v22
; GFX11-TRUE16-NEXT: v_cndmask_b16 v22.h, v22.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v22
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX11-TRUE16-NEXT: v_cndmask_b16 v22.h, v22.h, v23.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v0.h, v8.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v16
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v24.h, v8.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v23.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v24
; GFX11-TRUE16-NEXT: v_cndmask_b16 v24.h, v24.h, v23.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v14
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v24
; GFX11-TRUE16-NEXT: v_cndmask_b16 v16.h, v16.l, v15.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v16.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v15.h, v24.h, v23.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v15.l, v16.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v16.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v25, v25
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v16, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v16.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.l, v14.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.l, v23.h, v16.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v14.l, v6.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v13
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v23
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v16, v16
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.l, v13.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v17.l, v23.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v13.l, v5.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v5.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v23
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v5.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v11
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.l, v12.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v18.l, v23.h, v5.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v12.l, v4.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v17
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v10
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.l, v11.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v19.l, v23.h, v4.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v11.l, v3.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v18
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v3.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v9
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v10.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v20.l, v23.h, v3.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v10.l, v2.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v19
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v9.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v21.l, v23.h, v2.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v9.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v3, v3
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, v20
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, v21
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v8.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v23.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v22.l, v23.h, v1.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v8.l, v0.h, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v22
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v23
; GFX11-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v23
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v15.l, v23.h, v0.h, s0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v15
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v16bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v7
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v13
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
; GFX11-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX11-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff0000, v12
; GFX11-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff0000, v11
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v18, v17, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v17, v17, v16 :: v_dual_and_b32 v18, 0xffff0000, v6
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v16
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v20, v19, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v21, v22
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc_lo
; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff0000, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v17
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v20
; GFX11-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v19
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v20, v20
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v21, v22
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v13
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v18, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v16
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v20, v22, v21, s1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v19
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v24, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v23
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v17, v21, v20, s0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v17
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v21, v21
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v21, v23, v22, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v18
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v24, v25
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v11
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v20
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v20, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v26, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v21
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v21, s1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v22
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v23, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v23, v25, v24, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v26, v27
; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v23
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v22, v22, v21 :: v_dual_lshlrev_b32 v25, 16, v24
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v19
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v22
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v19, v25
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v17, v17, v20, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v21
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v19, v24, v23, s1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v10
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v25, v29, v28, s1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v20
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v28, v25, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v24
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v25
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v20, v26
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v20, v24, v25, s3
; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v9
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v27, v27
; GFX11-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v26, v29, v28, s3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v28, v26, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v8
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v19, v19, v23, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v22
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v24
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v22, v23
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v20, v20, v25 :: v_dual_lshlrev_b32 v25, 16, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v22, v24, v26, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX11-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v22
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v23, v29, v28, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v24, v28, v23 :: v_dual_lshlrev_b32 v29, 16, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v15
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v28, v28
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v6
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v15, v15, v7, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v26
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v25, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v24, v24, v23, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v28, v28
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v26, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v6, v6, v14, s1
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v27, v25
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v15, v15, v7, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v6, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v23, v24, v23, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v13
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v6, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v25, v25
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v14
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v7, v15, v7 :: v_dual_lshlrev_b32 v26, 16, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v13
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v5
; GFX11-FAKE16-NEXT: v_perm_b32 v7, v16, v7, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v12, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v11
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v26, v24
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v13, v13, v5, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v13
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v4, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v12
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v11, v11, v3, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v25, v24
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v11
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v4, s3
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v15, v15
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v13, v5 :: v_dual_lshlrev_b32 v14, 16, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v11, v11, v3, s3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v10, s2
; GFX11-FAKE16-NEXT: v_perm_b32 v5, v17, v5, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v9
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_perm_b32 v6, v18, v6, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v9, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v10
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v21, v4, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v8, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v1, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v15, v15
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v0, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v10, v10, v2, s2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v14, v13
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v10
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v1, s2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v24, v15
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v26, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v0, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v10, v10, v2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v8
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v19, v3, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v11
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v9, v1, s1
; GFX11-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v8, v0, s1
; GFX11-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v22, v1, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v10, v2, s1
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v23, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v2, v20, v2, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v16bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v16, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v23.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v17, 0xffff0000, v6
; GFX12-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff0000, v14
; GFX12-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v13
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v16
; GFX12-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v11
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v17, v17
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v17.l, v23.l
; GFX12-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff0000, v10
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v15
; GFX12-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v25, 0xffff0000, v8
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v15
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v16.h, v15.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v23.l
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v26, v26
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v15.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v7
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v6.h, v14.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v18, 0xffff0000, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v17.h, v14.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v18, v18
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v18.l, v23.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v17
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v17.h, v17.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v17
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v17.h, v17.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v5.h, v13.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v18.h, v13.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v19, v19
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v19.l, v23.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v18
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v18.h, v18.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v18
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v18.h, v18.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v12
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v4.h, v12.h, s1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v20, v20
; GFX12-TRUE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v19.h, v12.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v20, v20
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v20.l, v23.l
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v19
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v19.h, v19.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v19
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v21, v21
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v19.h, v19.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v3.h, v11.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v20.h, v11.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v21, v21
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v21.l, v23.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v20
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v20.h, v20.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v20
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v22, v22
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v20.h, v20.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v2.h, v10.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v22, 0xffff0000, v1
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v21.h, v10.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v22, v22
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v22.l, v23.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v21
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v21.h, v21.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v21
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v21.h, v21.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v1.h, v9.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v22.h, v9.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v24, v24
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v24.l, v23.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v22
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v22.h, v22.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v22
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v22.h, v22.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v0.h, v8.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v16
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v24.h, v8.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v23.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v23, v24
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v24.h, v24.h, v23.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v14
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v24
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v16.h, v16.l, v15.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v16.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v15.h, v24.h, v23.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v15.l, v16.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v16.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v25, v25
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v16, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v16.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.l, v14.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.l, v23.h, v16.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v14.l, v6.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v14, 16, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v16, 16, v13
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v23
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v16, v16
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.l, v13.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v17.l, v23.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v13.l, v5.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v13, 16, v12
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v5.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v23
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v5.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v11
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.l, v12.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v18.l, v23.h, v5.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v12.l, v4.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v17
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v10
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.l, v11.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v19.l, v23.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v11.l, v3.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v18
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v3.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v9
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v10.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v20.l, v23.h, v3.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v10.l, v2.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v19
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v9.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v21.l, v23.h, v2.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v9.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v3, v3
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, v20
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v21
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v8.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v23.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v22.l, v23.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v8.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v22
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v23
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v23.h, v23.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v23
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v15.l, v23.h, v0.h, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v15
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v16bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v16, 0xffff0000, v7
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v17, 16, v15
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v18, 16, v7
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v20, 16, v6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v13
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v16, v16
; GFX12-FAKE16-NEXT: v_and_b32_e32 v19, 0xffff0000, v15
; GFX12-FAKE16-NEXT: v_and_b32_e32 v26, 0xffff0000, v12
; GFX12-FAKE16-NEXT: v_and_b32_e32 v28, 0xffff0000, v11
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v16, v18, v17, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v19, v19
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v19, 16, v14
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v17, v17, v16 :: v_dual_and_b32 v18, 0xffff0000, v6
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v18, v18
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v16
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v18, v20, v19, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v21, v22
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v17, v17, v16, vcc_lo
; GFX12-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff0000, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v17
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v19, v19, v18, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v20
; GFX12-FAKE16-NEXT: v_and_b32_e32 v20, 0xffff0000, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v19
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v20, v20
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v21, v22
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v21, 16, v13
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v18, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v16
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v20, v22, v21, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v22, 16, v12
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v19
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v24, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v23
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v23, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v17, v21, v20, s0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v21, 0xffff0000, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v17
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v21, v21
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v21, v23, v22, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v18
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v24, v25
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v24, 16, v11
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v25, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v20
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v20, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v26, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v21
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v17
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v21, s1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v23, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v10
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v22
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v23, v23
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v23, v25, v24, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v23, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v26, v27
; GFX12-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v23
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v22, v22, v21 :: v_dual_lshlrev_b32 v25, 16, v24
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v19
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v23
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v22
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v19, v25
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v17, v17, v20, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v21
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v19, v24, v23, s1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v10
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX12-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v25, v29, v28, s1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v20
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v24, v28, v25, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v20, 16, v25
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v24
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v25
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v20, v26
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v20, v24, v25, s3
; GFX12-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v9
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v27, v27
; GFX12-FAKE16-NEXT: v_and_b32_e32 v27, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v26, v29, v28, s3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v29, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v24, v28, v26, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v19, v19, v23, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v22
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v24
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v22, v23
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v20, v20, v25 :: v_dual_lshlrev_b32 v25, 16, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v22, v24, v26, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX12-FAKE16-NEXT: v_and_b32_e32 v24, 0xffff0000, v8
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v22
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v23, v29, v28, s1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v24, v28, v23 :: v_dual_lshlrev_b32 v29, 16, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v15
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v28, v28
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v7, v7, v15, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v15, v15, v7, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v26
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v25, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v24, v24, v23, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v28, v28
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v22, v22, v26, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v23
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v6, v6, v14, s1
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v27, v25
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v15, v15, v7, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v15
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v6, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v23, v24, v23, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v14, v14, v6, s3
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v25, v25
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v14
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v13, s3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v13, v13, v5, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v7, v15, v7 :: v_dual_lshlrev_b32 v26, 16, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v13
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v5
; GFX12-FAKE16-NEXT: v_perm_b32 v7, v16, v7, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v12, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v11, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v26, v24
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v13, v13, v5, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v15, v15
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v4, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v15
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v11, v11, v3, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v25, v24
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v11
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v12, v12, v4, s3
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v15, v15
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v13, v5 :: v_dual_lshlrev_b32 v14, 16, v12
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v11, v11, v3, s3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v10, s2
; GFX12-FAKE16-NEXT: v_perm_b32 v5, v17, v5, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_perm_b32 v6, v18, v6, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v9, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v10
; GFX12-FAKE16-NEXT: v_perm_b32 v4, v21, v4, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v8, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v1, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v15, v15
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v0, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v13, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v10, v10, v2, s2
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v14, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v1, s2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v24, v15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v26, v25
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v13, 16, v9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v0, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v3
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v10, v10, v2, s3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v8
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v11, v3, s1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v10
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v13
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v14
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_perm_b32 v3, v19, v3, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v11
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v9, v1, s1
; GFX12-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v8, v0, s1
; GFX12-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v22, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v10, v2, s1
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v23, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v2, v20, v2, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %x, <16 x bfloat> %y)
ret <16 x bfloat> %result
}
define <32 x bfloat> @v_minimumnum_v32bf16(<32 x bfloat> %x, <32 x bfloat> %y) {
; GFX7-LABEL: v_minimumnum_v32bf16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v31, 0xffff0000, v30
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v14
; GFX7-NEXT: v_lshlrev_b32_e32 v30, 16, v30
; GFX7-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; GFX7-NEXT: v_mul_f32_e32 v31, 1.0, v31
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v14, 1.0, v14
; GFX7-NEXT: v_min_f32_e32 v31, v32, v31
; GFX7-NEXT: v_min_f32_e32 v14, v14, v30
; GFX7-NEXT: v_and_b32_e32 v30, 0xffff0000, v29
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v13
; GFX7-NEXT: v_lshlrev_b32_e32 v29, 16, v29
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; GFX7-NEXT: v_mul_f32_e32 v30, 1.0, v30
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_mul_f32_e32 v13, 1.0, v13
; GFX7-NEXT: v_min_f32_e32 v30, v32, v30
; GFX7-NEXT: v_min_f32_e32 v13, v13, v29
; GFX7-NEXT: v_and_b32_e32 v29, 0xffff0000, v28
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v12
; GFX7-NEXT: v_lshlrev_b32_e32 v28, 16, v28
; GFX7-NEXT: v_lshlrev_b32_e32 v12, 16, v12
; GFX7-NEXT: v_mul_f32_e32 v29, 1.0, v29
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_mul_f32_e32 v12, 1.0, v12
; GFX7-NEXT: v_min_f32_e32 v29, v32, v29
; GFX7-NEXT: v_min_f32_e32 v12, v12, v28
; GFX7-NEXT: v_and_b32_e32 v28, 0xffff0000, v27
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v11
; GFX7-NEXT: v_mul_f32_e32 v28, 1.0, v28
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v28, v32, v28
; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32
; GFX7-NEXT: v_lshlrev_b32_e32 v27, 16, v27
; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v11, 1.0, v11
; GFX7-NEXT: v_min_f32_e32 v11, v11, v27
; GFX7-NEXT: v_and_b32_e32 v27, 0xffff0000, v15
; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15
; GFX7-NEXT: v_mul_f32_e32 v27, 1.0, v27
; GFX7-NEXT: v_mul_f32_e32 v15, 1.0, v15
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v32
; GFX7-NEXT: v_lshlrev_b32_e32 v32, 16, v32
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_min_f32_e32 v27, v27, v33
; GFX7-NEXT: v_min_f32_e32 v15, v15, v32
; GFX7-NEXT: v_and_b32_e32 v32, 0xffff0000, v26
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v10
; GFX7-NEXT: v_lshlrev_b32_e32 v26, 16, v26
; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; GFX7-NEXT: v_mul_f32_e32 v32, 1.0, v32
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_mul_f32_e32 v10, 1.0, v10
; GFX7-NEXT: v_min_f32_e32 v32, v33, v32
; GFX7-NEXT: v_min_f32_e32 v10, v10, v26
; GFX7-NEXT: v_and_b32_e32 v26, 0xffff0000, v25
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v9
; GFX7-NEXT: v_lshlrev_b32_e32 v25, 16, v25
; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX7-NEXT: v_mul_f32_e32 v26, 1.0, v26
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v9, 1.0, v9
; GFX7-NEXT: v_min_f32_e32 v26, v33, v26
; GFX7-NEXT: v_min_f32_e32 v9, v9, v25
; GFX7-NEXT: v_and_b32_e32 v25, 0xffff0000, v24
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v8
; GFX7-NEXT: v_lshlrev_b32_e32 v24, 16, v24
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; GFX7-NEXT: v_mul_f32_e32 v25, 1.0, v25
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v8, 1.0, v8
; GFX7-NEXT: v_min_f32_e32 v25, v33, v25
; GFX7-NEXT: v_min_f32_e32 v8, v8, v24
; GFX7-NEXT: v_and_b32_e32 v24, 0xffff0000, v23
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23
; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX7-NEXT: v_mul_f32_e32 v24, 1.0, v24
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_mul_f32_e32 v7, 1.0, v7
; GFX7-NEXT: v_min_f32_e32 v24, v33, v24
; GFX7-NEXT: v_min_f32_e32 v7, v7, v23
; GFX7-NEXT: v_and_b32_e32 v23, 0xffff0000, v22
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v22, 16, v22
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX7-NEXT: v_mul_f32_e32 v23, 1.0, v23
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_mul_f32_e32 v6, 1.0, v6
; GFX7-NEXT: v_min_f32_e32 v23, v33, v23
; GFX7-NEXT: v_min_f32_e32 v6, v6, v22
; GFX7-NEXT: v_and_b32_e32 v22, 0xffff0000, v21
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v5
; GFX7-NEXT: v_lshlrev_b32_e32 v21, 16, v21
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX7-NEXT: v_mul_f32_e32 v22, 1.0, v22
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v5, 1.0, v5
; GFX7-NEXT: v_min_f32_e32 v22, v33, v22
; GFX7-NEXT: v_min_f32_e32 v5, v5, v21
; GFX7-NEXT: v_and_b32_e32 v21, 0xffff0000, v20
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v4
; GFX7-NEXT: v_lshlrev_b32_e32 v20, 16, v20
; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_mul_f32_e32 v21, 1.0, v21
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4
; GFX7-NEXT: v_min_f32_e32 v21, v33, v21
; GFX7-NEXT: v_min_f32_e32 v4, v4, v20
; GFX7-NEXT: v_and_b32_e32 v20, 0xffff0000, v19
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v19, 16, v19
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_mul_f32_e32 v20, 1.0, v20
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3
; GFX7-NEXT: v_min_f32_e32 v20, v33, v20
; GFX7-NEXT: v_min_f32_e32 v3, v3, v19
; GFX7-NEXT: v_and_b32_e32 v19, 0xffff0000, v18
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v18, 16, v18
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_mul_f32_e32 v19, 1.0, v19
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2
; GFX7-NEXT: v_min_f32_e32 v19, v33, v19
; GFX7-NEXT: v_min_f32_e32 v2, v2, v18
; GFX7-NEXT: v_and_b32_e32 v18, 0xffff0000, v17
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v17, 16, v17
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_mul_f32_e32 v18, 1.0, v18
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1
; GFX7-NEXT: v_min_f32_e32 v18, v33, v18
; GFX7-NEXT: v_min_f32_e32 v1, v1, v17
; GFX7-NEXT: v_and_b32_e32 v17, 0xffff0000, v16
; GFX7-NEXT: v_and_b32_e32 v33, 0xffff0000, v0
; GFX7-NEXT: v_mul_f32_e32 v17, 1.0, v17
; GFX7-NEXT: v_mul_f32_e32 v33, 1.0, v33
; GFX7-NEXT: v_lshlrev_b32_e32 v16, 16, v16
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v17, v33, v17
; GFX7-NEXT: v_mul_f32_e32 v16, 1.0, v16
; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v17
; GFX7-NEXT: v_alignbit_b32 v0, v16, v0, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v18
; GFX7-NEXT: v_alignbit_b32 v1, v16, v1, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v19
; GFX7-NEXT: v_alignbit_b32 v2, v16, v2, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v20
; GFX7-NEXT: v_alignbit_b32 v3, v16, v3, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v21
; GFX7-NEXT: v_alignbit_b32 v4, v16, v4, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v22
; GFX7-NEXT: v_alignbit_b32 v5, v16, v5, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v23
; GFX7-NEXT: v_alignbit_b32 v6, v16, v6, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v24
; GFX7-NEXT: v_alignbit_b32 v7, v16, v7, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v25
; GFX7-NEXT: v_alignbit_b32 v8, v16, v8, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v26
; GFX7-NEXT: v_alignbit_b32 v9, v16, v9, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v32
; GFX7-NEXT: v_alignbit_b32 v10, v16, v10, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v28
; GFX7-NEXT: v_alignbit_b32 v11, v16, v11, 16
; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v27
; GFX7-NEXT: v_lshrrev_b32_e32 v17, 16, v31
; GFX7-NEXT: v_lshrrev_b32_e32 v18, 16, v30
; GFX7-NEXT: v_lshrrev_b32_e32 v19, 16, v29
; GFX7-NEXT: v_alignbit_b32 v12, v19, v12, 16
; GFX7-NEXT: v_alignbit_b32 v13, v18, v13, 16
; GFX7-NEXT: v_alignbit_b32 v14, v17, v14, 16
; GFX7-NEXT: v_alignbit_b32 v15, v16, v15, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v32bf16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: buffer_load_dword v55, off, s[0:3], s32
; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v14
; GFX8-NEXT: v_lshrrev_b32_e32 v32, 16, v30
; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v14
; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v13
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v31, v31
; GFX8-NEXT: v_and_b32_e32 v36, 0xffff0000, v30
; GFX8-NEXT: v_lshrrev_b32_e32 v38, 16, v29
; GFX8-NEXT: v_lshrrev_b32_e32 v39, 16, v13
; GFX8-NEXT: v_cndmask_b32_e32 v31, v35, v32, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX8-NEXT: v_and_b32_e32 v48, 0xffff0000, v29
; GFX8-NEXT: v_cndmask_b32_e32 v35, v39, v38, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX8-NEXT: v_cndmask_b32_e32 v32, v32, v31, vcc
; GFX8-NEXT: v_cmp_u_f32_e64 s[4:5], v48, v48
; GFX8-NEXT: v_lshlrev_b32_e32 v37, 16, v31
; GFX8-NEXT: v_cndmask_b32_e64 v38, v38, v35, s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v39, 16, v32
; GFX8-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX8-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX8-NEXT: v_cmp_lt_f32_e64 s[6:7], v37, v39
; GFX8-NEXT: v_cndmask_b32_e64 v32, v32, v31, s[6:7]
; GFX8-NEXT: v_cmp_lt_f32_e64 s[6:7], v36, v48
; GFX8-NEXT: s_movk_i32 s10, 0x8000
; GFX8-NEXT: v_cndmask_b32_e64 v36, v38, v35, s[6:7]
; GFX8-NEXT: v_lshlrev_b32_e32 v37, 16, v32
; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s10, v31
; GFX8-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX8-NEXT: v_cmp_eq_f32_e64 s[6:7], 0, v37
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v35
; GFX8-NEXT: v_cmp_eq_f32_e64 s[8:9], 0, v38
; GFX8-NEXT: s_and_b64 vcc, s[6:7], vcc
; GFX8-NEXT: v_and_b32_e32 v33, 0xffff0000, v15
; GFX8-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc
; GFX8-NEXT: s_and_b64 vcc, s[8:9], s[4:5]
; GFX8-NEXT: v_lshrrev_b32_e32 v34, 16, v15
; GFX8-NEXT: v_cndmask_b32_e32 v32, v36, v35, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v33, v33
; GFX8-NEXT: v_and_b32_e32 v49, 0xffff0000, v12
; GFX8-NEXT: v_lshrrev_b32_e32 v50, 16, v28
; GFX8-NEXT: v_lshrrev_b32_e32 v51, 16, v12
; GFX8-NEXT: v_and_b32_e32 v38, 0xffff0000, v27
; GFX8-NEXT: v_and_b32_e32 v39, 0xffff0000, v26
; GFX8-NEXT: v_and_b32_e32 v48, 0xffff0000, v25
; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v21
; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v20
; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v19
; GFX8-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX8-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX8-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX8-NEXT: v_and_b32_e32 v40, 0xffff0000, v18
; GFX8-NEXT: v_and_b32_e32 v41, 0xffff0000, v17
; GFX8-NEXT: v_and_b32_e32 v42, 0xffff0000, v16
; GFX8-NEXT: s_waitcnt vmcnt(3)
; GFX8-NEXT: v_lshrrev_b32_e32 v35, 16, v55
; GFX8-NEXT: v_and_b32_e32 v36, 0xffff0000, v55
; GFX8-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX8-NEXT: v_cndmask_b32_e32 v35, v35, v33, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v34, 16, v33
; GFX8-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX8-NEXT: v_cmp_lt_f32_e64 s[4:5], v34, v36
; GFX8-NEXT: v_cndmask_b32_e64 v34, v35, v33, s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, s10, v33
; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], 0, v35
; GFX8-NEXT: s_and_b64 vcc, s[4:5], vcc
; GFX8-NEXT: v_cndmask_b32_e32 v33, v34, v33, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX8-NEXT: v_and_b32_e32 v36, 0xffff0000, v28
; GFX8-NEXT: v_cndmask_b32_e32 v34, v51, v50, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX8-NEXT: v_cndmask_b32_e32 v36, v50, v34, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX8-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v35, v37
; GFX8-NEXT: v_cndmask_b32_e32 v35, v36, v34, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v36
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v34
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v34, v35, v34, vcc
; GFX8-NEXT: v_and_b32_e32 v35, 0xffff0000, v11
; GFX8-NEXT: v_lshrrev_b32_e32 v36, 16, v27
; GFX8-NEXT: v_lshrrev_b32_e32 v37, 16, v11
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v35, v35
; GFX8-NEXT: v_cndmask_b32_e32 v35, v37, v36, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX8-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v37, 16, v35
; GFX8-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v37, v38
; GFX8-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v37
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v35
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc
; GFX8-NEXT: v_and_b32_e32 v36, 0xffff0000, v10
; GFX8-NEXT: v_lshrrev_b32_e32 v37, 16, v26
; GFX8-NEXT: v_lshrrev_b32_e32 v38, 16, v10
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX8-NEXT: v_cndmask_b32_e32 v36, v38, v37, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX8-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX8-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v38, v39
; GFX8-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v38, 16, v37
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v38
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v36
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v36, v37, v36, vcc
; GFX8-NEXT: v_and_b32_e32 v37, 0xffff0000, v9
; GFX8-NEXT: v_lshrrev_b32_e32 v38, 16, v25
; GFX8-NEXT: v_lshrrev_b32_e32 v39, 16, v9
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX8-NEXT: v_cndmask_b32_e32 v37, v39, v38, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX8-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX8-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v39, v48
; GFX8-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v39, 16, v38
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v37
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v37, v38, v37, vcc
; GFX8-NEXT: v_and_b32_e32 v38, 0xffff0000, v8
; GFX8-NEXT: v_lshrrev_b32_e32 v39, 16, v24
; GFX8-NEXT: v_lshrrev_b32_e32 v48, 16, v8
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX8-NEXT: v_and_b32_e32 v49, 0xffff0000, v24
; GFX8-NEXT: v_cndmask_b32_e32 v38, v48, v39, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX8-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX8-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v48, v49
; GFX8-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v48, 16, v39
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v48
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v38
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v38, v39, v38, vcc
; GFX8-NEXT: v_and_b32_e32 v39, 0xffff0000, v7
; GFX8-NEXT: v_lshrrev_b32_e32 v48, 16, v23
; GFX8-NEXT: v_lshrrev_b32_e32 v49, 16, v7
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX8-NEXT: v_and_b32_e32 v50, 0xffff0000, v23
; GFX8-NEXT: v_cndmask_b32_e32 v39, v49, v48, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX8-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX8-NEXT: v_lshlrev_b32_e32 v50, 16, v48
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v49, v50
; GFX8-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v49, 16, v48
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v49
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v39
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v39, v48, v39, vcc
; GFX8-NEXT: v_and_b32_e32 v48, 0xffff0000, v6
; GFX8-NEXT: v_lshrrev_b32_e32 v49, 16, v22
; GFX8-NEXT: v_lshrrev_b32_e32 v50, 16, v6
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX8-NEXT: v_and_b32_e32 v51, 0xffff0000, v22
; GFX8-NEXT: v_cndmask_b32_e32 v48, v50, v49, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX8-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v50, 16, v48
; GFX8-NEXT: v_lshlrev_b32_e32 v51, 16, v49
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v50, v51
; GFX8-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v50, 16, v49
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v50
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v48
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v48, v49, v48, vcc
; GFX8-NEXT: v_and_b32_e32 v49, 0xffff0000, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v50, 16, v21
; GFX8-NEXT: v_lshrrev_b32_e32 v51, 16, v5
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX8-NEXT: v_cndmask_b32_e32 v49, v51, v50, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX8-NEXT: v_cndmask_b32_e32 v50, v50, v49, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v51, 16, v49
; GFX8-NEXT: v_lshlrev_b32_e32 v52, 16, v50
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v51, v52
; GFX8-NEXT: v_cndmask_b32_e32 v50, v50, v49, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v51, 16, v50
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v51
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v49
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v49, v50, v49, vcc
; GFX8-NEXT: v_and_b32_e32 v50, 0xffff0000, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v51, 16, v20
; GFX8-NEXT: v_lshrrev_b32_e32 v52, 16, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX8-NEXT: v_cndmask_b32_e32 v50, v52, v51, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX8-NEXT: v_cndmask_b32_e32 v51, v51, v50, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v52, 16, v50
; GFX8-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v52, v53
; GFX8-NEXT: v_cndmask_b32_e32 v51, v51, v50, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v52, 16, v51
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v52
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v50
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v50, v51, v50, vcc
; GFX8-NEXT: v_and_b32_e32 v51, 0xffff0000, v3
; GFX8-NEXT: v_lshrrev_b32_e32 v52, 16, v19
; GFX8-NEXT: v_lshrrev_b32_e32 v53, 16, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX8-NEXT: v_cndmask_b32_e32 v51, v53, v52, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX8-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX8-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v53, v54
; GFX8-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v53, 16, v52
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v53
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v51
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v51, v52, v51, vcc
; GFX8-NEXT: v_and_b32_e32 v52, 0xffff0000, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v53, 16, v18
; GFX8-NEXT: v_lshrrev_b32_e32 v54, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX8-NEXT: v_cndmask_b32_e32 v52, v54, v53, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX8-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v53
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v54, v40
; GFX8-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v54, 16, v53
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v54
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v52
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v52, v53, v52, vcc
; GFX8-NEXT: v_and_b32_e32 v53, 0xffff0000, v1
; GFX8-NEXT: v_lshrrev_b32_e32 v54, 16, v17
; GFX8-NEXT: v_lshrrev_b32_e32 v40, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX8-NEXT: v_cndmask_b32_e32 v53, v40, v54, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v41, v41
; GFX8-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v53
; GFX8-NEXT: v_lshlrev_b32_e32 v41, 16, v54
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v40, v41
; GFX8-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v54
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v53
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v53, v54, v53, vcc
; GFX8-NEXT: v_and_b32_e32 v54, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v40, 16, v16
; GFX8-NEXT: v_lshrrev_b32_e32 v41, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX8-NEXT: v_cndmask_b32_e32 v54, v41, v40, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v42, v42
; GFX8-NEXT: v_cndmask_b32_e32 v40, v40, v54, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v41, 16, v54
; GFX8-NEXT: v_lshlrev_b32_e32 v42, 16, v40
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v41, v42
; GFX8-NEXT: v_cndmask_b32_e32 v40, v40, v54, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v41, 16, v40
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v41
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v54
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v54, v40, v54, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v15
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX8-NEXT: v_cndmask_b32_e32 v15, v15, v55, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX8-NEXT: v_cndmask_b32_e32 v55, v55, v15, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX8-NEXT: v_lshlrev_b32_e32 v41, 16, v15
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v41, v40
; GFX8-NEXT: v_cndmask_b32_e32 v55, v55, v15, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v15
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v55, 16, v14
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX8-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX8-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX8-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX8-NEXT: v_lshlrev_b32_e32 v40, 16, v14
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v40, v55
; GFX8-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v55
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v14
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v14, v30, v14, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v30, 16, v13
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX8-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX8-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX8-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX8-NEXT: v_lshlrev_b32_e32 v55, 16, v13
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v55, v30
; GFX8-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v30
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v13
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v13, v29, v13, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v29, 16, v12
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX8-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX8-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX8-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX8-NEXT: v_lshlrev_b32_e32 v30, 16, v12
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v30, v29
; GFX8-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v29
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v12
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v12, v28, v12, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v28, 16, v11
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX8-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX8-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX8-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX8-NEXT: v_lshlrev_b32_e32 v29, 16, v11
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v29, v28
; GFX8-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v28
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v11
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v11, v27, v11, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v27, 16, v10
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX8-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX8-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX8-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX8-NEXT: v_lshlrev_b32_e32 v28, 16, v10
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v28, v27
; GFX8-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v27
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v10
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v9
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX8-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX8-NEXT: v_lshlrev_b32_e32 v27, 16, v9
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v27, v26
; GFX8-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v26
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v9
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v9, v25, v9, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v8
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX8-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX8-NEXT: v_lshlrev_b32_e32 v26, 16, v8
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v26, v25
; GFX8-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v8
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX8-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX8-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v7
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v7, v23, v7, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v6
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX8-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v24, v23
; GFX8-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX8-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX8-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX8-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v6
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v5
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX8-NEXT: v_lshlrev_b32_e32 v23, 16, v5
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v23, v22
; GFX8-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v4
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX8-NEXT: v_lshlrev_b32_e32 v22, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v22, v21
; GFX8-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v4
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v3
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX8-NEXT: v_lshlrev_b32_e32 v21, 16, v3
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v21, v20
; GFX8-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v3
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v2
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX8-NEXT: v_lshlrev_b32_e32 v20, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v20, v19
; GFX8-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v2
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX8-NEXT: v_lshlrev_b32_e32 v19, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v19, v18
; GFX8-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v17, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX8-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX8-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX8-NEXT: v_lshlrev_b32_e32 v18, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v18, v17
; GFX8-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v17
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v54
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v53
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v52
; GFX8-NEXT: v_or_b32_sdwa v2, v2, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v51
; GFX8-NEXT: v_or_b32_sdwa v3, v3, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v50
; GFX8-NEXT: v_or_b32_sdwa v4, v4, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v49
; GFX8-NEXT: v_or_b32_sdwa v5, v5, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v48
; GFX8-NEXT: v_or_b32_sdwa v6, v6, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v39
; GFX8-NEXT: v_or_b32_sdwa v7, v7, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v38
; GFX8-NEXT: v_or_b32_sdwa v8, v8, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v37
; GFX8-NEXT: v_or_b32_sdwa v9, v9, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v36
; GFX8-NEXT: v_or_b32_sdwa v10, v10, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v35
; GFX8-NEXT: v_or_b32_sdwa v11, v11, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v34
; GFX8-NEXT: v_or_b32_sdwa v12, v12, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v32
; GFX8-NEXT: v_or_b32_sdwa v13, v13, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v31
; GFX8-NEXT: v_or_b32_sdwa v14, v14, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v16, 16, v33
; GFX8-NEXT: v_or_b32_sdwa v15, v15, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v32bf16:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: buffer_load_dword v55, off, s[0:3], s32
; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v14
; GFX900-NEXT: v_lshrrev_b32_e32 v32, 16, v30
; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v14
; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v13
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v31, v31
; GFX900-NEXT: v_and_b32_e32 v36, 0xffff0000, v30
; GFX900-NEXT: v_lshrrev_b32_e32 v38, 16, v29
; GFX900-NEXT: v_lshrrev_b32_e32 v39, 16, v13
; GFX900-NEXT: v_cndmask_b32_e32 v31, v35, v32, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX900-NEXT: v_and_b32_e32 v48, 0xffff0000, v29
; GFX900-NEXT: v_cndmask_b32_e32 v35, v39, v38, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX900-NEXT: v_cndmask_b32_e32 v32, v32, v31, vcc
; GFX900-NEXT: v_cmp_u_f32_e64 s[4:5], v48, v48
; GFX900-NEXT: v_lshlrev_b32_e32 v37, 16, v31
; GFX900-NEXT: v_cndmask_b32_e64 v38, v38, v35, s[4:5]
; GFX900-NEXT: v_lshlrev_b32_e32 v39, 16, v32
; GFX900-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX900-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX900-NEXT: v_cmp_lt_f32_e64 s[6:7], v37, v39
; GFX900-NEXT: v_cndmask_b32_e64 v32, v32, v31, s[6:7]
; GFX900-NEXT: v_cmp_lt_f32_e64 s[6:7], v36, v48
; GFX900-NEXT: s_movk_i32 s10, 0x8000
; GFX900-NEXT: v_cndmask_b32_e64 v36, v38, v35, s[6:7]
; GFX900-NEXT: v_lshlrev_b32_e32 v37, 16, v32
; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s10, v31
; GFX900-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX900-NEXT: v_cmp_eq_f32_e64 s[6:7], 0, v37
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v35
; GFX900-NEXT: v_cmp_eq_f32_e64 s[8:9], 0, v38
; GFX900-NEXT: s_and_b64 vcc, s[6:7], vcc
; GFX900-NEXT: v_and_b32_e32 v33, 0xffff0000, v15
; GFX900-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc
; GFX900-NEXT: s_and_b64 vcc, s[8:9], s[4:5]
; GFX900-NEXT: v_lshrrev_b32_e32 v34, 16, v15
; GFX900-NEXT: v_cndmask_b32_e32 v32, v36, v35, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v33, v33
; GFX900-NEXT: v_and_b32_e32 v49, 0xffff0000, v12
; GFX900-NEXT: v_lshrrev_b32_e32 v50, 16, v28
; GFX900-NEXT: v_lshrrev_b32_e32 v51, 16, v12
; GFX900-NEXT: v_and_b32_e32 v38, 0xffff0000, v27
; GFX900-NEXT: v_and_b32_e32 v39, 0xffff0000, v26
; GFX900-NEXT: v_and_b32_e32 v48, 0xffff0000, v25
; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v21
; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v20
; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v19
; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX900-NEXT: v_and_b32_e32 v40, 0xffff0000, v18
; GFX900-NEXT: v_and_b32_e32 v41, 0xffff0000, v17
; GFX900-NEXT: v_and_b32_e32 v42, 0xffff0000, v16
; GFX900-NEXT: s_waitcnt vmcnt(3)
; GFX900-NEXT: v_lshrrev_b32_e32 v35, 16, v55
; GFX900-NEXT: v_and_b32_e32 v36, 0xffff0000, v55
; GFX900-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX900-NEXT: v_cndmask_b32_e32 v35, v35, v33, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v34, 16, v33
; GFX900-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX900-NEXT: v_cmp_lt_f32_e64 s[4:5], v34, v36
; GFX900-NEXT: v_cndmask_b32_e64 v34, v35, v33, s[4:5]
; GFX900-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX900-NEXT: v_cmp_eq_u16_e32 vcc, s10, v33
; GFX900-NEXT: v_cmp_eq_f32_e64 s[4:5], 0, v35
; GFX900-NEXT: s_and_b64 vcc, s[4:5], vcc
; GFX900-NEXT: v_cndmask_b32_e32 v33, v34, v33, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX900-NEXT: v_and_b32_e32 v36, 0xffff0000, v28
; GFX900-NEXT: v_cndmask_b32_e32 v34, v51, v50, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX900-NEXT: v_cndmask_b32_e32 v36, v50, v34, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX900-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v35, v37
; GFX900-NEXT: v_cndmask_b32_e32 v35, v36, v34, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v36
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v34
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v34, v35, v34, vcc
; GFX900-NEXT: v_and_b32_e32 v35, 0xffff0000, v11
; GFX900-NEXT: v_lshrrev_b32_e32 v36, 16, v27
; GFX900-NEXT: v_lshrrev_b32_e32 v37, 16, v11
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v35, v35
; GFX900-NEXT: v_cndmask_b32_e32 v35, v37, v36, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX900-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v37, 16, v35
; GFX900-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v37, v38
; GFX900-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v37
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v35
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc
; GFX900-NEXT: v_and_b32_e32 v36, 0xffff0000, v10
; GFX900-NEXT: v_lshrrev_b32_e32 v37, 16, v26
; GFX900-NEXT: v_lshrrev_b32_e32 v38, 16, v10
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX900-NEXT: v_cndmask_b32_e32 v36, v38, v37, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX900-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX900-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v38, v39
; GFX900-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v38, 16, v37
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v38
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v36
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v36, v37, v36, vcc
; GFX900-NEXT: v_and_b32_e32 v37, 0xffff0000, v9
; GFX900-NEXT: v_lshrrev_b32_e32 v38, 16, v25
; GFX900-NEXT: v_lshrrev_b32_e32 v39, 16, v9
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX900-NEXT: v_cndmask_b32_e32 v37, v39, v38, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX900-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX900-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v39, v48
; GFX900-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v39, 16, v38
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v37
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v37, v38, v37, vcc
; GFX900-NEXT: v_and_b32_e32 v38, 0xffff0000, v8
; GFX900-NEXT: v_lshrrev_b32_e32 v39, 16, v24
; GFX900-NEXT: v_lshrrev_b32_e32 v48, 16, v8
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX900-NEXT: v_and_b32_e32 v49, 0xffff0000, v24
; GFX900-NEXT: v_cndmask_b32_e32 v38, v48, v39, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX900-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX900-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v48, v49
; GFX900-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v48, 16, v39
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v48
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v38
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v38, v39, v38, vcc
; GFX900-NEXT: v_and_b32_e32 v39, 0xffff0000, v7
; GFX900-NEXT: v_lshrrev_b32_e32 v48, 16, v23
; GFX900-NEXT: v_lshrrev_b32_e32 v49, 16, v7
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX900-NEXT: v_and_b32_e32 v50, 0xffff0000, v23
; GFX900-NEXT: v_cndmask_b32_e32 v39, v49, v48, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX900-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX900-NEXT: v_lshlrev_b32_e32 v50, 16, v48
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v49, v50
; GFX900-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v49, 16, v48
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v49
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v39
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v39, v48, v39, vcc
; GFX900-NEXT: v_and_b32_e32 v48, 0xffff0000, v6
; GFX900-NEXT: v_lshrrev_b32_e32 v49, 16, v22
; GFX900-NEXT: v_lshrrev_b32_e32 v50, 16, v6
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX900-NEXT: v_and_b32_e32 v51, 0xffff0000, v22
; GFX900-NEXT: v_cndmask_b32_e32 v48, v50, v49, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX900-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v50, 16, v48
; GFX900-NEXT: v_lshlrev_b32_e32 v51, 16, v49
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v50, v51
; GFX900-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v50, 16, v49
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v50
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v48
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v48, v49, v48, vcc
; GFX900-NEXT: v_and_b32_e32 v49, 0xffff0000, v5
; GFX900-NEXT: v_lshrrev_b32_e32 v50, 16, v21
; GFX900-NEXT: v_lshrrev_b32_e32 v51, 16, v5
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX900-NEXT: v_cndmask_b32_e32 v49, v51, v50, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX900-NEXT: v_cndmask_b32_e32 v50, v50, v49, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v51, 16, v49
; GFX900-NEXT: v_lshlrev_b32_e32 v52, 16, v50
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v51, v52
; GFX900-NEXT: v_cndmask_b32_e32 v50, v50, v49, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v51, 16, v50
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v51
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v49
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v49, v50, v49, vcc
; GFX900-NEXT: v_and_b32_e32 v50, 0xffff0000, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v51, 16, v20
; GFX900-NEXT: v_lshrrev_b32_e32 v52, 16, v4
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX900-NEXT: v_cndmask_b32_e32 v50, v52, v51, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX900-NEXT: v_cndmask_b32_e32 v51, v51, v50, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v52, 16, v50
; GFX900-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v52, v53
; GFX900-NEXT: v_cndmask_b32_e32 v51, v51, v50, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v52, 16, v51
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v52
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v50
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v50, v51, v50, vcc
; GFX900-NEXT: v_and_b32_e32 v51, 0xffff0000, v3
; GFX900-NEXT: v_lshrrev_b32_e32 v52, 16, v19
; GFX900-NEXT: v_lshrrev_b32_e32 v53, 16, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX900-NEXT: v_cndmask_b32_e32 v51, v53, v52, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX900-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX900-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v53, v54
; GFX900-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v53, 16, v52
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v53
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v51
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v51, v52, v51, vcc
; GFX900-NEXT: v_and_b32_e32 v52, 0xffff0000, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v53, 16, v18
; GFX900-NEXT: v_lshrrev_b32_e32 v54, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX900-NEXT: v_cndmask_b32_e32 v52, v54, v53, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX900-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v53
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v54, v40
; GFX900-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v54, 16, v53
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v54
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v52
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v52, v53, v52, vcc
; GFX900-NEXT: v_and_b32_e32 v53, 0xffff0000, v1
; GFX900-NEXT: v_lshrrev_b32_e32 v54, 16, v17
; GFX900-NEXT: v_lshrrev_b32_e32 v40, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX900-NEXT: v_cndmask_b32_e32 v53, v40, v54, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v41, v41
; GFX900-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v53
; GFX900-NEXT: v_lshlrev_b32_e32 v41, 16, v54
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v40, v41
; GFX900-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v54
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v53
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v53, v54, v53, vcc
; GFX900-NEXT: v_and_b32_e32 v54, 0xffff0000, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v40, 16, v16
; GFX900-NEXT: v_lshrrev_b32_e32 v41, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX900-NEXT: v_cndmask_b32_e32 v54, v41, v40, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v42, v42
; GFX900-NEXT: v_cndmask_b32_e32 v40, v40, v54, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v41, 16, v54
; GFX900-NEXT: v_lshlrev_b32_e32 v42, 16, v40
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v41, v42
; GFX900-NEXT: v_cndmask_b32_e32 v40, v40, v54, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v41, 16, v40
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v41
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v54
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v54, v40, v54, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v15
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX900-NEXT: v_cndmask_b32_e32 v15, v15, v55, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX900-NEXT: v_cndmask_b32_e32 v55, v55, v15, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX900-NEXT: v_lshlrev_b32_e32 v41, 16, v15
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v41, v40
; GFX900-NEXT: v_cndmask_b32_e32 v55, v55, v15, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v15
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v15, v55, v15, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v55, 16, v14
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX900-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX900-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX900-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX900-NEXT: v_lshlrev_b32_e32 v40, 16, v14
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v40, v55
; GFX900-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v55, 16, v30
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v55
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v14
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v14, v30, v14, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v13
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX900-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX900-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX900-NEXT: v_lshlrev_b32_e32 v55, 16, v13
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v55, v30
; GFX900-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v30
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v13
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v13, v29, v13, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v29, 16, v12
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX900-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX900-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX900-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v12
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v30, v29
; GFX900-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v29
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v12
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v12, v28, v12, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v11
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX900-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX900-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX900-NEXT: v_lshlrev_b32_e32 v29, 16, v11
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v29, v28
; GFX900-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v28
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v11
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v11, v27, v11, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v27, 16, v10
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX900-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX900-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX900-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v10
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v28, v27
; GFX900-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v27
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v10
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v9
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX900-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX900-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX900-NEXT: v_lshlrev_b32_e32 v27, 16, v9
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v27, v26
; GFX900-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v26
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v9
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v9, v25, v9, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v8
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX900-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX900-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v8
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v26, v25
; GFX900-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v8
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX900-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX900-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX900-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v7
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v7, v23, v7, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v6
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v24, v23
; GFX900-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v6
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v5
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX900-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX900-NEXT: v_lshlrev_b32_e32 v23, 16, v5
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v23, v22
; GFX900-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v4
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v22, v21
; GFX900-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v4
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v3
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX900-NEXT: v_lshlrev_b32_e32 v21, 16, v3
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v21, v20
; GFX900-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v3
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v2
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v20, v19
; GFX900-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v2
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX900-NEXT: v_lshlrev_b32_e32 v19, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v19, v18
; GFX900-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v17, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX900-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX900-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v18, v17
; GFX900-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v17
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s10, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v54, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v53, v1, s4
; GFX900-NEXT: v_perm_b32 v2, v52, v2, s4
; GFX900-NEXT: v_perm_b32 v3, v51, v3, s4
; GFX900-NEXT: v_perm_b32 v4, v50, v4, s4
; GFX900-NEXT: v_perm_b32 v5, v49, v5, s4
; GFX900-NEXT: v_perm_b32 v6, v48, v6, s4
; GFX900-NEXT: v_perm_b32 v7, v39, v7, s4
; GFX900-NEXT: v_perm_b32 v8, v38, v8, s4
; GFX900-NEXT: v_perm_b32 v9, v37, v9, s4
; GFX900-NEXT: v_perm_b32 v10, v36, v10, s4
; GFX900-NEXT: v_perm_b32 v11, v35, v11, s4
; GFX900-NEXT: v_perm_b32 v12, v34, v12, s4
; GFX900-NEXT: v_perm_b32 v13, v32, v13, s4
; GFX900-NEXT: v_perm_b32 v14, v31, v14, s4
; GFX900-NEXT: v_perm_b32 v15, v33, v15, s4
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v32bf16:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v50, off, s32
; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v14
; GFX950-NEXT: v_lshrrev_b32_e32 v32, 16, v30
; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v14
; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v13
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v31, v31
; GFX950-NEXT: v_and_b32_e32 v36, 0xffff0000, v30
; GFX950-NEXT: v_lshrrev_b32_e32 v38, 16, v29
; GFX950-NEXT: v_lshrrev_b32_e32 v39, 16, v13
; GFX950-NEXT: v_cndmask_b32_e32 v31, v35, v32, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX950-NEXT: v_and_b32_e32 v48, 0xffff0000, v29
; GFX950-NEXT: v_cmp_u_f32_e64 s[0:1], v48, v48
; GFX950-NEXT: v_cndmask_b32_e32 v35, v39, v38, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v31
; GFX950-NEXT: v_cndmask_b32_e64 v38, v38, v35, s[0:1]
; GFX950-NEXT: v_cndmask_b32_e32 v32, v32, v31, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v32
; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX950-NEXT: v_cmp_lt_f32_e64 s[2:3], v37, v39
; GFX950-NEXT: s_movk_i32 s6, 0x8000
; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s6, v31
; GFX950-NEXT: v_cndmask_b32_e64 v32, v32, v31, s[2:3]
; GFX950-NEXT: v_cmp_lt_f32_e64 s[2:3], v36, v48
; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v32
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v35
; GFX950-NEXT: v_cndmask_b32_e64 v36, v38, v35, s[2:3]
; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX950-NEXT: v_cmp_eq_f32_e64 s[2:3], 0, v37
; GFX950-NEXT: v_cmp_eq_f32_e64 s[4:5], 0, v38
; GFX950-NEXT: s_and_b64 vcc, s[2:3], vcc
; GFX950-NEXT: v_and_b32_e32 v33, 0xffff0000, v15
; GFX950-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc
; GFX950-NEXT: s_and_b64 vcc, s[4:5], s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v34, 16, v15
; GFX950-NEXT: v_cndmask_b32_e32 v32, v36, v35, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v33, v33
; GFX950-NEXT: v_and_b32_e32 v49, 0xffff0000, v12
; GFX950-NEXT: v_lshrrev_b32_e32 v51, 16, v28
; GFX950-NEXT: v_lshrrev_b32_e32 v52, 16, v12
; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v27
; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v26
; GFX950-NEXT: v_and_b32_e32 v48, 0xffff0000, v25
; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v21
; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v20
; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v19
; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse
; GFX950-NEXT: v_and_b32_e32 v40, 0xffff0000, v18
; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse
; GFX950-NEXT: v_and_b32_e32 v41, 0xffff0000, v17
; GFX950-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse
; GFX950-NEXT: v_and_b32_e32 v42, 0xffff0000, v16
; GFX950-NEXT: s_waitcnt vmcnt(0)
; GFX950-NEXT: v_lshrrev_b32_e32 v35, 16, v50
; GFX950-NEXT: v_and_b32_e32 v36, 0xffff0000, v50
; GFX950-NEXT: v_cndmask_b32_e32 v33, v34, v35, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX950-NEXT: v_lshlrev_b32_e32 v34, 16, v33
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v35, v35, v33, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX950-NEXT: v_cmp_lt_f32_e64 s[0:1], v34, v36
; GFX950-NEXT: v_cmp_eq_u16_e32 vcc, s6, v33
; GFX950-NEXT: v_and_b32_e32 v36, 0xffff0000, v28
; GFX950-NEXT: v_cndmask_b32_e64 v34, v35, v33, s[0:1]
; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX950-NEXT: v_cmp_eq_f32_e64 s[0:1], 0, v35
; GFX950-NEXT: s_and_b64 vcc, s[0:1], vcc
; GFX950-NEXT: v_cndmask_b32_e32 v33, v34, v33, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX950-NEXT: v_and_b32_e32 v49, 0xffff0000, v24
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v34, v52, v51, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v34
; GFX950-NEXT: v_cndmask_b32_e32 v36, v51, v34, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v35, v37
; GFX950-NEXT: v_lshrrev_b32_e32 v37, 16, v11
; GFX950-NEXT: v_and_b32_e32 v51, 0xffff0000, v23
; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v34, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v35
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v36
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v36, 16, v27
; GFX950-NEXT: v_cndmask_b32_e32 v34, v35, v34, vcc
; GFX950-NEXT: v_and_b32_e32 v35, 0xffff0000, v11
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v35, v35
; GFX950-NEXT: v_and_b32_e32 v52, 0xffff0000, v22
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v35, v37, v36, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v35
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v35
; GFX950-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v37, v38
; GFX950-NEXT: v_lshrrev_b32_e32 v38, 16, v10
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v36
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v37
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v37, 16, v26
; GFX950-NEXT: v_cndmask_b32_e32 v35, v36, v35, vcc
; GFX950-NEXT: v_and_b32_e32 v36, 0xffff0000, v10
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v36, v36
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v36, v38, v37, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v36
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v36
; GFX950-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v38, v39
; GFX950-NEXT: v_lshrrev_b32_e32 v39, 16, v9
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v37, v37, v36, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v37
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v38
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v38, 16, v25
; GFX950-NEXT: v_cndmask_b32_e32 v36, v37, v36, vcc
; GFX950-NEXT: v_and_b32_e32 v37, 0xffff0000, v9
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v37, v37
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v37, v39, v38, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v37
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v37
; GFX950-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v39, v48
; GFX950-NEXT: v_lshrrev_b32_e32 v48, 16, v8
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v38, v38, v37, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v38
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v39
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v39, 16, v24
; GFX950-NEXT: v_cndmask_b32_e32 v37, v38, v37, vcc
; GFX950-NEXT: v_and_b32_e32 v38, 0xffff0000, v8
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v38, v38
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v38, v48, v39, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v38
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v38
; GFX950-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v48, v49
; GFX950-NEXT: v_lshrrev_b32_e32 v49, 16, v7
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v39, v39, v38, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v39
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v48
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v48, 16, v23
; GFX950-NEXT: v_cndmask_b32_e32 v38, v39, v38, vcc
; GFX950-NEXT: v_and_b32_e32 v39, 0xffff0000, v7
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v39, v39
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v39, v49, v48, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v39
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v39
; GFX950-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v51, 16, v48
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v49, v51
; GFX950-NEXT: v_lshrrev_b32_e32 v51, 16, v6
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v48, v48, v39, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v48
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v49
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v49, 16, v22
; GFX950-NEXT: v_cndmask_b32_e32 v39, v48, v39, vcc
; GFX950-NEXT: v_and_b32_e32 v48, 0xffff0000, v6
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v48, v48
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v48, v51, v49, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX950-NEXT: v_lshlrev_b32_e32 v51, 16, v48
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v48
; GFX950-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v49
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v51, v52
; GFX950-NEXT: v_lshrrev_b32_e32 v52, 16, v5
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v49, v49, v48, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v51, 16, v49
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v51
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v51, 16, v21
; GFX950-NEXT: v_cndmask_b32_e32 v48, v49, v48, vcc
; GFX950-NEXT: v_and_b32_e32 v49, 0xffff0000, v5
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v49, v49
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v49, v52, v51, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v49
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v49
; GFX950-NEXT: v_cndmask_b32_e32 v51, v51, v49, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v52, v53
; GFX950-NEXT: v_lshrrev_b32_e32 v53, 16, v4
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v51, v51, v49, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v51
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v52
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v52, 16, v20
; GFX950-NEXT: v_cndmask_b32_e32 v49, v51, v49, vcc
; GFX950-NEXT: v_and_b32_e32 v51, 0xffff0000, v4
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v51, v51
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v51, v53, v52, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v51
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v51
; GFX950-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v53, v54
; GFX950-NEXT: v_lshrrev_b32_e32 v54, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v52, v52, v51, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v52
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v53
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v53, 16, v19
; GFX950-NEXT: v_cndmask_b32_e32 v51, v52, v51, vcc
; GFX950-NEXT: v_and_b32_e32 v52, 0xffff0000, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v52, v52
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v52, v54, v53, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX950-NEXT: v_lshlrev_b32_e32 v54, 16, v52
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v52
; GFX950-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v55, 16, v53
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v54, v55
; GFX950-NEXT: v_lshrrev_b32_e32 v55, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v53, v53, v52, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v54, 16, v53
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v54
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v54, 16, v18
; GFX950-NEXT: v_cndmask_b32_e32 v52, v53, v52, vcc
; GFX950-NEXT: v_and_b32_e32 v53, 0xffff0000, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v53, v53
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v53, v55, v54, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX950-NEXT: v_lshlrev_b32_e32 v55, 16, v53
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v53
; GFX950-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v54
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v55, v40
; GFX950-NEXT: v_lshrrev_b32_e32 v40, 16, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v54, v54, v53, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v55, 16, v54
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v55
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v55, 16, v17
; GFX950-NEXT: v_cndmask_b32_e32 v53, v54, v53, vcc
; GFX950-NEXT: v_and_b32_e32 v54, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v54, v54
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v54, v40, v55, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v41, v41
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v54
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v54
; GFX950-NEXT: v_cndmask_b32_e32 v55, v55, v54, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v55
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v40, v41
; GFX950-NEXT: v_lshrrev_b32_e32 v41, 16, v0
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v55, v55, v54, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v55
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v40, 16, v16
; GFX950-NEXT: v_cndmask_b32_e32 v54, v55, v54, vcc
; GFX950-NEXT: v_and_b32_e32 v55, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v55, v55
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v55, v41, v40, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v42, v42
; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v55
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v55
; GFX950-NEXT: v_cndmask_b32_e32 v40, v40, v55, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v42, 16, v40
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v41, v42
; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v40, v40, v55, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v40
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v41
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v55, v40, v55, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v15
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v50
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v15, v15, v50, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v40, v40
; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v15
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v15
; GFX950-NEXT: v_cndmask_b32_e32 v50, v50, v15, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v50
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v41, v40
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v50, v50, v15, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v50
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v40
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v15, v50, v15, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v50, 16, v14
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX950-NEXT: v_lshlrev_b32_e32 v50, 16, v30
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v50, v50
; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v14
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v14
; GFX950-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v50, 16, v30
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v40, v50
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v30, v30, v14, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v50, 16, v30
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v50
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v14, v30, v14, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v13
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v13, v13, v29, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
; GFX950-NEXT: v_lshlrev_b32_e32 v50, 16, v13
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v13
; GFX950-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v50, v30
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v29, v29, v13, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v29
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v30
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v13, v29, v13, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v12
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v12
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v12
; GFX950-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v30, v29
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v29
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v12, v28, v12, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v11
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v11
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v11
; GFX950-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v29, v28
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v27
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v28
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v11, v27, v11, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v10
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v10
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v10
; GFX950-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v28, v27
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v26
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v27
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v9
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v9, v9, v25, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v9
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v9
; GFX950-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v27, v26
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v25, v25, v9, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v25
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v26
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v9, v25, v9, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v8
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v8, v8, v24, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v8
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v8
; GFX950-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v26, v25
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v24
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v25
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
; GFX950-NEXT: v_lshlrev_b32_e32 v25, 16, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v7
; GFX950-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v25, v24
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v23, v23, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v23
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v24
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v7, v23, v7, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v6
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v22, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v6
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v6
; GFX950-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v24, v23
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v22, v22, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v22
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v23
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v5
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v5, v21, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
; GFX950-NEXT: v_lshlrev_b32_e32 v23, 16, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v5
; GFX950-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v23, v22
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v22
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v4
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v20, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v4
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v4
; GFX950-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v22, v21
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v20, v20, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v20
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v21
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v3
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v19, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
; GFX950-NEXT: v_lshlrev_b32_e32 v21, 16, v3
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v3
; GFX950-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v21, v20
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v19, v19, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v19
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v20
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v3, v19, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v2
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v18, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v2
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v2
; GFX950-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v20, v19
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v18, v18, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v19
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
; GFX950-NEXT: v_lshlrev_b32_e32 v19, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v1
; GFX950-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v19, v18
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v17, v17, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v17
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v18
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v17, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX950-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s6, v0
; GFX950-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v18, v17
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v16, v16, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v17, 16, v16
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v17
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v55, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v54, v1, s0
; GFX950-NEXT: v_perm_b32 v2, v53, v2, s0
; GFX950-NEXT: v_perm_b32 v3, v52, v3, s0
; GFX950-NEXT: v_perm_b32 v4, v51, v4, s0
; GFX950-NEXT: v_perm_b32 v5, v49, v5, s0
; GFX950-NEXT: v_perm_b32 v6, v48, v6, s0
; GFX950-NEXT: v_perm_b32 v7, v39, v7, s0
; GFX950-NEXT: v_perm_b32 v8, v38, v8, s0
; GFX950-NEXT: v_perm_b32 v9, v37, v9, s0
; GFX950-NEXT: v_perm_b32 v10, v36, v10, s0
; GFX950-NEXT: v_perm_b32 v11, v35, v11, s0
; GFX950-NEXT: v_perm_b32 v12, v34, v12, s0
; GFX950-NEXT: v_perm_b32 v13, v32, v13, s0
; GFX950-NEXT: v_perm_b32 v14, v31, v14, s0
; GFX950-NEXT: v_perm_b32 v15, v33, v15, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v32bf16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v31, 0xffff0000, v13
; GFX10-NEXT: v_lshrrev_b32_e32 v33, 16, v29
; GFX10-NEXT: v_lshrrev_b32_e32 v32, 16, v13
; GFX10-NEXT: v_lshrrev_b32_e32 v37, 16, v28
; GFX10-NEXT: v_and_b32_e32 v38, 0xffff0000, v11
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31
; GFX10-NEXT: v_and_b32_e32 v31, 0xffff0000, v12
; GFX10-NEXT: v_and_b32_e32 v36, 0xffff0000, v29
; GFX10-NEXT: v_lshrrev_b32_e32 v49, 16, v27
; GFX10-NEXT: v_lshrrev_b32_e32 v39, 16, v11
; GFX10-NEXT: v_cndmask_b32_e32 v34, v32, v33, vcc_lo
; GFX10-NEXT: v_lshrrev_b32_e32 v32, 16, v12
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v31, v31
; GFX10-NEXT: v_and_b32_e32 v50, 0xffff0000, v28
; GFX10-NEXT: v_and_b32_e32 v51, 0xffff0000, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v35, 16, v34
; GFX10-NEXT: v_lshrrev_b32_e32 v52, 16, v26
; GFX10-NEXT: v_cndmask_b32_e32 v32, v32, v37, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX10-NEXT: v_and_b32_e32 v38, 0xffff0000, v27
; GFX10-NEXT: v_lshrrev_b32_e32 v53, 16, v10
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v51, v51
; GFX10-NEXT: v_lshrrev_b32_e32 v54, 16, v8
; GFX10-NEXT: v_cndmask_b32_e32 v31, v39, v49, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36
; GFX10-NEXT: v_lshlrev_b32_e32 v36, 16, v32
; GFX10-NEXT: v_lshrrev_b32_e32 v64, 16, v23
; GFX10-NEXT: v_lshrrev_b32_e32 v65, 16, v7
; GFX10-NEXT: v_lshrrev_b32_e32 v67, 16, v6
; GFX10-NEXT: v_cndmask_b32_e32 v48, v33, v34, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX10-NEXT: v_lshrrev_b32_e32 v70, 16, v4
; GFX10-NEXT: v_and_b32_e32 v71, 0xffff0000, v21
; GFX10-NEXT: v_and_b32_e32 v80, 0xffff0000, v20
; GFX10-NEXT: v_lshlrev_b32_e32 v33, 16, v48
; GFX10-NEXT: v_cndmask_b32_e32 v39, v37, v32, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX10-NEXT: v_cmp_eq_u16_e64 s21, 0x8000, v31
; GFX10-NEXT: v_lshlrev_b32_e32 v87, 16, v27
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v35, v33
; GFX10-NEXT: v_lshlrev_b32_e32 v37, 16, v39
; GFX10-NEXT: v_cndmask_b32_e32 v38, v49, v31, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v35, 0xffff0000, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v31
; GFX10-NEXT: v_cndmask_b32_e64 v33, v53, v52, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v36, v37
; GFX10-NEXT: v_lshlrev_b32_e32 v50, 16, v38
; GFX10-NEXT: v_and_b32_e32 v36, 0xffff0000, v9
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v35, v35
; GFX10-NEXT: v_lshrrev_b32_e32 v37, 16, v25
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v33
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v49, v50
; GFX10-NEXT: v_lshrrev_b32_e32 v49, 16, v9
; GFX10-NEXT: v_cndmask_b32_e64 v50, v52, v33, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v36, v36
; GFX10-NEXT: v_and_b32_e32 v36, 0xffff0000, v25
; GFX10-NEXT: v_lshrrev_b32_e32 v52, 16, v24
; GFX10-NEXT: v_cndmask_b32_e32 v38, v38, v31, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v50
; GFX10-NEXT: v_cndmask_b32_e64 v35, v49, v37, s6
; GFX10-NEXT: v_and_b32_e32 v49, 0xffff0000, v8
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v36, v36
; GFX10-NEXT: v_cndmask_b32_e64 v53, v37, v35, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v49, v49
; GFX10-NEXT: v_and_b32_e32 v37, 0xffff0000, v24
; GFX10-NEXT: v_and_b32_e32 v49, 0xffff0000, v7
; GFX10-NEXT: v_cndmask_b32_e64 v36, v54, v52, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v37, v37
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v51, v55
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v35
; GFX10-NEXT: v_lshlrev_b32_e32 v54, 16, v53
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v36
; GFX10-NEXT: v_cndmask_b32_e64 v66, v52, v36, s7
; GFX10-NEXT: v_and_b32_e32 v52, 0xffff0000, v23
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v49, v49
; GFX10-NEXT: v_and_b32_e32 v49, 0xffff0000, v6
; GFX10-NEXT: v_cmp_lt_f32_e64 s9, v51, v54
; GFX10-NEXT: v_and_b32_e32 v51, 0xffff0000, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v54, 16, v21
; GFX10-NEXT: v_cndmask_b32_e64 v37, v65, v64, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v52, v52
; GFX10-NEXT: v_lshrrev_b32_e32 v65, 16, v22
; GFX10-NEXT: v_and_b32_e32 v52, 0xffff0000, v22
; GFX10-NEXT: v_cmp_u_f32_e64 s10, v51, v51
; GFX10-NEXT: v_lshlrev_b32_e32 v68, 16, v37
; GFX10-NEXT: v_cndmask_b32_e64 v64, v64, v37, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v49, v49
; GFX10-NEXT: v_lshlrev_b32_e32 v69, 16, v64
; GFX10-NEXT: v_cndmask_b32_e64 v49, v67, v65, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v52, v52
; GFX10-NEXT: v_lshrrev_b32_e32 v52, 16, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v67, 16, v66
; GFX10-NEXT: v_cndmask_b32_e64 v65, v65, v49, s7
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v68, v69
; GFX10-NEXT: v_and_b32_e32 v68, 0xffff0000, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v69, 16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v51, v52, v54, s10
; GFX10-NEXT: v_cmp_lt_f32_e64 s8, v55, v67
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v49
; GFX10-NEXT: v_cmp_u_f32_e64 s10, v68, v68
; GFX10-NEXT: v_lshlrev_b32_e32 v67, 16, v65
; GFX10-NEXT: v_lshlrev_b32_e32 v68, 16, v51
; GFX10-NEXT: v_cndmask_b32_e64 v52, v70, v69, s10
; GFX10-NEXT: v_cmp_u_f32_e64 s10, v71, v71
; GFX10-NEXT: v_lshrrev_b32_e32 v71, 16, v19
; GFX10-NEXT: v_lshlrev_b32_e32 v82, 16, v52
; GFX10-NEXT: v_cndmask_b32_e64 v70, v54, v51, s10
; GFX10-NEXT: v_cmp_u_f32_e64 s10, v80, v80
; GFX10-NEXT: v_and_b32_e32 v54, 0xffff0000, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v80, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v81, 16, v70
; GFX10-NEXT: v_cndmask_b32_e64 v69, v69, v52, s10
; GFX10-NEXT: v_cmp_u_f32_e64 s10, v54, v54
; GFX10-NEXT: v_cmp_lt_f32_e64 s11, v68, v81
; GFX10-NEXT: v_lshlrev_b32_e32 v83, 16, v69
; GFX10-NEXT: v_cndmask_b32_e64 v54, v80, v71, s10
; GFX10-NEXT: v_cmp_lt_f32_e64 s10, v55, v67
; GFX10-NEXT: v_and_b32_e32 v67, 0xffff0000, v19
; GFX10-NEXT: v_and_b32_e32 v68, 0xffff0000, v2
; GFX10-NEXT: v_cmp_lt_f32_e64 s12, v82, v83
; GFX10-NEXT: v_lshrrev_b32_e32 v80, 16, v18
; GFX10-NEXT: v_lshrrev_b32_e32 v81, 16, v2
; GFX10-NEXT: v_cmp_u_f32_e64 s13, v67, v67
; GFX10-NEXT: v_and_b32_e32 v82, 0xffff0000, v18
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v54
; GFX10-NEXT: v_cndmask_b32_e64 v67, v71, v54, s13
; GFX10-NEXT: v_cmp_u_f32_e64 s13, v68, v68
; GFX10-NEXT: v_and_b32_e32 v71, 0xffff0000, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v83, 16, v67
; GFX10-NEXT: v_cndmask_b32_e64 v68, v81, v80, s13
; GFX10-NEXT: v_cmp_u_f32_e64 s13, v82, v82
; GFX10-NEXT: v_lshrrev_b32_e32 v81, 16, v17
; GFX10-NEXT: v_lshrrev_b32_e32 v82, 16, v1
; GFX10-NEXT: v_cndmask_b32_e64 v80, v80, v68, s13
; GFX10-NEXT: v_cmp_u_f32_e64 s13, v71, v71
; GFX10-NEXT: v_and_b32_e32 v71, 0xffff0000, v17
; GFX10-NEXT: v_cndmask_b32_e64 v82, v82, v81, s13
; GFX10-NEXT: v_cmp_u_f32_e64 s14, v71, v71
; GFX10-NEXT: v_cmp_lt_f32_e64 s13, v55, v83
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v68
; GFX10-NEXT: v_lshlrev_b32_e32 v83, 16, v80
; GFX10-NEXT: v_cmp_eq_u16_e64 s22, 0x8000, v82
; GFX10-NEXT: v_cndmask_b32_e64 v71, v81, v82, s14
; GFX10-NEXT: v_cndmask_b32_e64 v67, v67, v54, s13
; GFX10-NEXT: v_cmp_lt_f32_e64 s14, v55, v83
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v82
; GFX10-NEXT: v_lshlrev_b32_e32 v81, 16, v71
; GFX10-NEXT: v_lshrrev_b32_e32 v83, 16, v0
; GFX10-NEXT: v_cmp_lt_f32_e64 s15, v55, v81
; GFX10-NEXT: v_and_b32_e32 v55, 0xffff0000, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v81, 16, v16
; GFX10-NEXT: v_cmp_u_f32_e64 s16, v55, v55
; GFX10-NEXT: v_and_b32_e32 v55, 0xffff0000, v16
; GFX10-NEXT: v_cndmask_b32_e64 v83, v83, v81, s16
; GFX10-NEXT: v_cmp_u_f32_e64 s16, v55, v55
; GFX10-NEXT: v_cmp_eq_u16_e64 s23, 0x8000, v83
; GFX10-NEXT: v_cndmask_b32_e64 v55, v81, v83, s16
; GFX10-NEXT: v_lshlrev_b32_e32 v81, 16, v83
; GFX10-NEXT: v_lshlrev_b32_e32 v84, 16, v55
; GFX10-NEXT: v_cmp_lt_f32_e64 s16, v81, v84
; GFX10-NEXT: v_lshlrev_b32_e32 v81, 16, v14
; GFX10-NEXT: v_cndmask_b32_e64 v55, v55, v83, s16
; GFX10-NEXT: v_cmp_u_f32_e64 s17, v81, v81
; GFX10-NEXT: v_lshlrev_b32_e32 v81, 16, v30
; GFX10-NEXT: v_cndmask_b32_e64 v84, v14, v30, s17
; GFX10-NEXT: v_cmp_u_f32_e64 s17, v81, v81
; GFX10-NEXT: v_lshlrev_b32_e32 v86, 16, v84
; GFX10-NEXT: v_cndmask_b32_e64 v81, v30, v84, s17
; GFX10-NEXT: v_cmp_eq_u16_e64 s24, 0x8000, v84
; GFX10-NEXT: v_lshlrev_b32_e32 v85, 16, v81
; GFX10-NEXT: v_cmp_lt_f32_e64 s17, v86, v85
; GFX10-NEXT: v_lshlrev_b32_e32 v85, 16, v13
; GFX10-NEXT: v_cmp_u_f32_e64 s18, v85, v85
; GFX10-NEXT: v_cndmask_b32_e64 v85, v13, v29, s18
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v29
; GFX10-NEXT: v_lshlrev_b32_e32 v86, 16, v85
; GFX10-NEXT: v_cmp_u_f32_e64 s18, v13, v13
; GFX10-NEXT: v_cndmask_b32_e64 v29, v29, v85, s18
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v29
; GFX10-NEXT: v_cmp_lt_f32_e64 s18, v86, v13
; GFX10-NEXT: v_and_b32_e32 v13, 0xffff0000, v14
; GFX10-NEXT: v_lshrrev_b32_e32 v14, 16, v14
; GFX10-NEXT: v_lshrrev_b32_e32 v86, 16, v30
; GFX10-NEXT: v_cmp_u_f32_e64 s19, v13, v13
; GFX10-NEXT: v_and_b32_e32 v13, 0xffff0000, v30
; GFX10-NEXT: v_cndmask_b32_e64 v14, v14, v86, s19
; GFX10-NEXT: v_cmp_u_f32_e64 s19, v13, v13
; GFX10-NEXT: v_lshlrev_b32_e32 v30, 16, v14
; GFX10-NEXT: v_cndmask_b32_e64 v13, v86, v14, s19
; GFX10-NEXT: v_cmp_eq_u16_e64 s20, 0x8000, v14
; GFX10-NEXT: v_lshlrev_b32_e32 v86, 16, v13
; GFX10-NEXT: v_cmp_lt_f32_e64 s19, v30, v86
; GFX10-NEXT: v_and_b32_e32 v86, 0xffff0000, v15
; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v14, s19
; GFX10-NEXT: v_lshlrev_b32_e32 v30, 16, v13
; GFX10-NEXT: v_cmp_eq_f32_e64 s19, 0, v30
; GFX10-NEXT: s_and_b32 s19, s19, s20
; GFX10-NEXT: v_cmp_eq_u16_e64 s20, 0x8000, v32
; GFX10-NEXT: v_cndmask_b32_e64 v14, v13, v14, s19
; GFX10-NEXT: v_cndmask_b32_e64 v13, v48, v34, s5
; GFX10-NEXT: v_cmp_eq_u16_e64 s19, 0x8000, v34
; GFX10-NEXT: v_cndmask_b32_e64 v48, v53, v35, s9
; GFX10-NEXT: v_cndmask_b32_e64 v53, v64, v37, s7
; GFX10-NEXT: v_cndmask_b32_e64 v64, v65, v49, s10
; GFX10-NEXT: v_lshlrev_b32_e32 v30, 16, v13
; GFX10-NEXT: v_cndmask_b32_e64 v65, v70, v51, s11
; GFX10-NEXT: v_cndmask_b32_e64 v70, v71, v82, s15
; GFX10-NEXT: v_cndmask_b32_e64 v71, v81, v84, s17
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v30
; GFX10-NEXT: v_cndmask_b32_e64 v30, v39, v32, s4
; GFX10-NEXT: v_cndmask_b32_e64 v39, v50, v33, s6
; GFX10-NEXT: v_cndmask_b32_e64 v50, v66, v36, s8
; GFX10-NEXT: v_cndmask_b32_e64 v66, v69, v52, s12
; GFX10-NEXT: s_and_b32 s4, s5, s19
; GFX10-NEXT: v_cndmask_b32_e64 v69, v80, v68, s14
; GFX10-NEXT: v_cndmask_b32_e64 v13, v13, v34, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v30
; GFX10-NEXT: v_cndmask_b32_e64 v80, v29, v85, s18
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v38
; GFX10-NEXT: v_lshlrev_b32_e32 v29, 16, v80
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s20
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v39
; GFX10-NEXT: v_cmp_eq_f32_e64 s17, 0, v29
; GFX10-NEXT: v_lshlrev_b32_e32 v29, 16, v12
; GFX10-NEXT: v_cmp_eq_u16_e64 s20, 0x8000, v35
; GFX10-NEXT: s_and_b32 s4, s4, s21
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v48
; GFX10-NEXT: v_cmp_u_f32_e64 s18, v29, v29
; GFX10-NEXT: v_lshlrev_b32_e32 v29, 16, v28
; GFX10-NEXT: v_cmp_eq_u16_e64 s21, 0x8000, v68
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v50
; GFX10-NEXT: v_cndmask_b32_e64 v81, v12, v28, s18
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v11
; GFX10-NEXT: v_cmp_u_f32_e64 s18, v29, v29
; GFX10-NEXT: v_cndmask_b32_e64 v29, v38, v31, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v53
; GFX10-NEXT: v_cmp_u_f32_e64 s19, v12, v12
; GFX10-NEXT: v_cndmask_b32_e32 v12, v30, v32, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v33
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v37
; GFX10-NEXT: v_cmp_eq_f32_e64 s8, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v64
; GFX10-NEXT: v_cndmask_b32_e64 v96, v11, v27, s19
; GFX10-NEXT: s_and_b32 vcc_lo, s5, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v49
; GFX10-NEXT: v_cndmask_b32_e32 v30, v39, v33, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s9, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v65
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s20
; GFX10-NEXT: v_lshlrev_b32_e32 v33, 16, v15
; GFX10-NEXT: v_cndmask_b32_e32 v31, v48, v35, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v36
; GFX10-NEXT: v_cmp_eq_f32_e64 s10, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v66
; GFX10-NEXT: v_cndmask_b32_e64 v97, v28, v81, s18
; GFX10-NEXT: v_lshrrev_b32_e32 v38, 16, v15
; GFX10-NEXT: s_and_b32 vcc_lo, s7, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v51
; GFX10-NEXT: v_cmp_eq_f32_e64 s11, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v67
; GFX10-NEXT: v_cndmask_b32_e32 v11, v50, v36, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s8, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s19, 0x8000, v52
; GFX10-NEXT: v_cndmask_b32_e32 v28, v53, v37, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s12, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v69
; GFX10-NEXT: s_and_b32 vcc_lo, s9, s5
; GFX10-NEXT: v_cmp_eq_u16_e64 s20, 0x8000, v54
; GFX10-NEXT: v_cndmask_b32_e32 v32, v64, v49, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33
; GFX10-NEXT: v_cmp_eq_f32_e64 s13, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v70
; GFX10-NEXT: v_lshlrev_b32_e32 v53, 16, v96
; GFX10-NEXT: v_cmp_eq_f32_e64 s14, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v55
; GFX10-NEXT: v_cmp_eq_f32_e64 s15, 0, v34
; GFX10-NEXT: v_lshlrev_b32_e32 v34, 16, v71
; GFX10-NEXT: v_cmp_eq_f32_e64 s16, 0, v34
; GFX10-NEXT: buffer_load_dword v34, off, s[0:3], s32
; GFX10-NEXT: s_and_b32 s7, s16, s24
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v35, 16, v34
; GFX10-NEXT: v_cndmask_b32_e32 v37, v15, v34, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v86, v86
; GFX10-NEXT: v_lshlrev_b32_e32 v33, 16, v34
; GFX10-NEXT: v_and_b32_e32 v36, 0xffff0000, v34
; GFX10-NEXT: v_cndmask_b32_e32 v38, v38, v35, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s10, s6
; GFX10-NEXT: v_cndmask_b32_e32 v15, v65, v51, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v33, v33
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v38
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v38
; GFX10-NEXT: v_cndmask_b32_e32 v39, v34, v37, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v36, v36
; GFX10-NEXT: v_lshlrev_b32_e32 v36, 16, v37
; GFX10-NEXT: v_lshlrev_b32_e32 v48, 16, v39
; GFX10-NEXT: v_cndmask_b32_e32 v35, v35, v38, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s11, s19
; GFX10-NEXT: v_cndmask_b32_e32 v33, v66, v52, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s12, s20
; GFX10-NEXT: v_lshlrev_b32_e32 v50, 16, v35
; GFX10-NEXT: v_cndmask_b32_e32 v34, v67, v54, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v36, v48
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v54, 16, v26
; GFX10-NEXT: v_cndmask_b32_e32 v51, v39, v37, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v49, v50
; GFX10-NEXT: v_lshlrev_b32_e32 v36, 16, v51
; GFX10-NEXT: v_cndmask_b32_e32 v49, v35, v38, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s13, s21
; GFX10-NEXT: v_cndmask_b32_e32 v35, v69, v68, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s14, s22
; GFX10-NEXT: v_lshlrev_b32_e32 v50, 16, v49
; GFX10-NEXT: v_cndmask_b32_e32 v39, v70, v82, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s15, s23
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v36
; GFX10-NEXT: v_cndmask_b32_e32 v48, v55, v83, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v37
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v50
; GFX10-NEXT: v_lshlrev_b32_e32 v50, 16, v81
; GFX10-NEXT: v_lshlrev_b32_e32 v55, 16, v25
; GFX10-NEXT: v_cndmask_b32_e64 v36, v71, v84, s7
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v87, v87
; GFX10-NEXT: v_cndmask_b32_e32 v37, v51, v37, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_perm_b32 v14, v14, v36, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e32 v38, v49, v38, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v85
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v97
; GFX10-NEXT: v_cndmask_b32_e64 v51, v27, v96, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v81
; GFX10-NEXT: s_and_b32 vcc_lo, s17, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v27, v80, v85, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v50, v49
; GFX10-NEXT: v_lshlrev_b32_e32 v50, 16, v51
; GFX10-NEXT: v_perm_b32 v13, v13, v27, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e32 v49, v97, v81, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v52, v52
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v49
; GFX10-NEXT: v_cndmask_b32_e32 v10, v10, v26, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v53, v50
; GFX10-NEXT: v_cndmask_b32_e32 v50, v51, v96, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v54, v54
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v54, 16, v10
; GFX10-NEXT: v_cndmask_b32_e32 v26, v26, v10, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v51, v51
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v50
; GFX10-NEXT: v_lshlrev_b32_e32 v53, 16, v26
; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v25, s5
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v52
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v10
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v54, v53
; GFX10-NEXT: v_lshlrev_b32_e32 v53, 16, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v54, 16, v8
; GFX10-NEXT: v_cndmask_b32_e64 v26, v26, v10, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v55, v55
; GFX10-NEXT: v_cndmask_b32_e64 v51, v25, v9, s6
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v96
; GFX10-NEXT: v_cndmask_b32_e32 v25, v49, v81, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v51
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_perm_b32 v12, v12, v25, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e32 v50, v50, v96, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v49
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v53, v52
; GFX10-NEXT: v_lshlrev_b32_e32 v53, 16, v22
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e64 v49, v51, v9, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v54, v54
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v24
; GFX10-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v49
; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v24, s5
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v51, v51
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v23
; GFX10-NEXT: v_perm_b32 v10, v30, v10, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v51, v51
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v24
; GFX10-NEXT: v_cndmask_b32_e32 v7, v7, v23, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v52
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v8
; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v7, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v9
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v52, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v52, 16, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e64 v24, v24, v8, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v51, v51
; GFX10-NEXT: v_cndmask_b32_e32 v9, v49, v9, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v24
; GFX10-NEXT: v_cndmask_b32_e64 v6, v6, v22, s5
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v52, v26
; GFX10-NEXT: v_perm_b32 v9, v31, v9, 0x5040100
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v51
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v6
; GFX10-NEXT: v_cndmask_b32_e64 v23, v23, v7, s5
; GFX10-NEXT: v_cmp_u_f32_e64 s5, v53, v53
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v5
; GFX10-NEXT: v_perm_b32 v8, v11, v8, 0x5040100
; GFX10-NEXT: v_perm_b32 v11, v29, v50, 0x5040100
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v51, v49
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v21
; GFX10-NEXT: v_cndmask_b32_e64 v22, v22, v6, s7
; GFX10-NEXT: v_cmp_u_f32_e64 s7, v26, v26
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX10-NEXT: v_cndmask_b32_e64 v5, v5, v21, s7
; GFX10-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v7, v23, v7, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v4
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v21
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v5
; GFX10-NEXT: v_perm_b32 v7, v28, v7, 0x5040100
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v23, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v20, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v26, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v19
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v19, s4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v49, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v51, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v21, v21, v5, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v23, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX10-NEXT: v_cndmask_b32_e64 v20, v20, v4, s4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v26, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v23
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v3, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v2
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v26, v24
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v19
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v4
; GFX10-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v20, v20, v4, s7
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v51, v49
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v23, v23
; GFX10-NEXT: v_cndmask_b32_e32 v5, v21, v5, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v19, v19, v3, s7
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v18, s6
; GFX10-NEXT: v_perm_b32 v5, v15, v5, 0x5040100
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v49, 16, v2
; GFX10-NEXT: v_cmp_eq_u16_e64 s10, 0x8000, v2
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v21
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v0
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v22, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX10-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
; GFX10-NEXT: v_perm_b32 v6, v32, v6, 0x5040100
; GFX10-NEXT: v_perm_b32 v15, v38, v37, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v21, v21
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX10-NEXT: v_perm_b32 v4, v33, v4, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v22, v22
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v1, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v23, v23
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v0, s6
; GFX10-NEXT: v_cmp_u_f32_e64 s6, v21, v21
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX10-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v2, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v22, v21
; GFX10-NEXT: v_lshlrev_b32_e32 v26, 16, v18
; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v1, s6
; GFX10-NEXT: v_cmp_lt_f32_e64 s6, v24, v23
; GFX10-NEXT: v_cmp_lt_f32_e64 s7, v49, v26
; GFX10-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v0, s6
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v3
; GFX10-NEXT: v_cndmask_b32_e64 v18, v18, v2, s7
; GFX10-NEXT: v_lshlrev_b32_e32 v22, 16, v16
; GFX10-NEXT: s_and_b32 s5, s5, s6
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e64 v3, v19, v3, s5
; GFX10-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v21
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v22
; GFX10-NEXT: v_perm_b32 v3, v34, v3, 0x5040100
; GFX10-NEXT: v_cmp_eq_f32_e64 s9, 0, v19
; GFX10-NEXT: s_and_b32 s5, s5, s6
; GFX10-NEXT: v_cndmask_b32_e64 v1, v17, v1, s5
; GFX10-NEXT: s_and_b32 s5, s7, s8
; GFX10-NEXT: v_cndmask_b32_e64 v0, v16, v0, s5
; GFX10-NEXT: s_and_b32 s5, s9, s10
; GFX10-NEXT: v_perm_b32 v1, v39, v1, 0x5040100
; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s5
; GFX10-NEXT: v_perm_b32 v0, v48, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v2, v35, v2, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v32bf16:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: scratch_load_b32 v55, off, s32
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v53, v15 :: v_dual_mov_b32 v48, v13
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v31, v10
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v33, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v54.l, 0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v51, v14 :: v_dual_and_b32 v8, 0xffff0000, v53
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v34, v11 :: v_dual_mov_b32 v37, v12
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v36, v9
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v51
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v30
; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v24
; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v23
; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v22
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v48
; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v19
; GFX11-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff0000, v18
; GFX11-TRUE16-NEXT: v_and_b32_e32 v64, 0xffff0000, v17
; GFX11-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff0000, v16
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v66.l, v54.l
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v55
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v53.h, v55.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v54.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v55.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v15.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v51.h, v30.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v29
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v37
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v30.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v14.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v48.h, v29.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v28
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v34
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v29.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v13.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v37.h, v28.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v31
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v28.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v12.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v34.h, v27.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v36
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v27.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v31.h, v26.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v25
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v33
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v26.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v36.h, v25.h, s1
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v25.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v33.h, v24.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v7
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v24.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v32, v32
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v32.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v7.h, v23.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v6
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v23.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX11-TRUE16-NEXT: v_cndmask_b16 v50.h, v32.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v6.h, v22.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX11-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v4
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v22.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX11-TRUE16-NEXT: v_cndmask_b16 v39.h, v32.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v5.h, v21.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v35.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v21.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v4.h, v20.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v35.h, v20.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v38, v38
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v38.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v35
; GFX11-TRUE16-NEXT: v_cndmask_b16 v35.h, v35.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v35
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
; GFX11-TRUE16-NEXT: v_cndmask_b16 v35.h, v35.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v3.h, v19.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v38.h, v19.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v49, v49
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v49.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v38
; GFX11-TRUE16-NEXT: v_cndmask_b16 v38.h, v38.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v38
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v52, v52
; GFX11-TRUE16-NEXT: v_cndmask_b16 v38.h, v38.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v2.h, v18.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff0000, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v49.h, v18.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v52, v52
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v52.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v49
; GFX11-TRUE16-NEXT: v_cndmask_b16 v49.h, v49.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v49
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v64, v64
; GFX11-TRUE16-NEXT: v_cndmask_b16 v49.h, v49.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v1.h, v17.h, s1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v64, 0xffff0000, v0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v52.h, v17.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v64, v64
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v64.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v52
; GFX11-TRUE16-NEXT: v_cndmask_b16 v52.h, v52.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v52
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX11-TRUE16-NEXT: v_cndmask_b16 v52.h, v52.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v0.h, v16.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 16, v53
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v64.h, v16.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v54.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v64
; GFX11-TRUE16-NEXT: v_cndmask_b16 v64.h, v64.h, v54.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 16, v55
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v64
; GFX11-TRUE16-NEXT: v_cndmask_b16 v66.h, v53.l, v55.l, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v53.h, v64.h, v54.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v55.l, v66.h, vcc_lo
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 16, v51
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 16, v30
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v66.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v66, v54
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v64, v64
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v66.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v55, v55
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 16, v29
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v51.h, v51.l, v30.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v51.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v15.l, v54.h, v66.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v30.l, v51.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v48
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v51.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v55, v55
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v51, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v51.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v30.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v30.h, v48.l, v29.l, vcc_lo
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 16, v28
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v14.l, v54.h, v51.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v29.l, v30.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v37
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v30.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v48, v48
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v30, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v30.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v29.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v29.h, v37.l, v28.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v13.l, v54.h, v30.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v28.l, v29.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v34
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v27
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v29.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v29, v54
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v30, v30
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v29.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v28.l, v54.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v28.h, v34.l, v27.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v12.l, v54.h, v29.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v27.l, v28.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v31
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v26
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v28.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v28, v54
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v29, v29
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v28.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v27.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v27.h, v31.l, v26.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v11.l, v54.h, v28.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v26.l, v27.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v36
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v25
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v27.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v27, v54
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v28, v28
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v27.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v26.l, v54.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v26.h, v36.l, v25.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v10.l, v54.h, v27.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v25.l, v26.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v33
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v26.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v26, v54
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v27, v27
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v26.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v25.l, v54.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v25.h, v33.l, v24.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v9.l, v54.h, v26.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v24.l, v25.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v25.h
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v25, v54
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v26, v26
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v25.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.l, v23.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v8.l, v54.h, v25.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v23.l, v7.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v6
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v24, v24
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v7.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.l, v22.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v50.l, v54.h, v7.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v22.l, v6.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v54
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v20
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.l, v21.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v39.l, v54.h, v6.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v21.l, v5.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v5.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v50
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v5.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v19
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.l, v20.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v32.l, v54.h, v5.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v20.l, v4.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v39
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v18
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.l, v19.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v35.l, v54.h, v4.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v19.l, v3.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v32
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v3.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v17
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v18.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v38.l, v54.h, v3.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v18.l, v2.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v35
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v16
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v17.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v49.l, v54.h, v2.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v17.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v3, v3
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, v38
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, v49
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v16.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v54.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v52.l, v54.h, v1.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v16.l, v0.h, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v52
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v54
; GFX11-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v54
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v53.l, v54.h, v0.h, s0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v53
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v32bf16:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: scratch_load_b32 v31, off, s32
; GFX11-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v14
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v30
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v14
; GFX11-FAKE16-NEXT: v_and_b32_e32 v118, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v119, 16, v19
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34
; GFX11-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v128, 16, v3
; GFX11-FAKE16-NEXT: v_and_b32_e32 v130, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v131, 16, v18
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v118, v118
; GFX11-FAKE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v29
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v132, 16, v2
; GFX11-FAKE16-NEXT: v_and_b32_e32 v134, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v54, 0xffff0000, v11
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v118, v128, v119, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v130, v130
; GFX11-FAKE16-NEXT: v_and_b32_e32 v53, 0xffff0000, v28
; GFX11-FAKE16-NEXT: v_and_b32_e32 v82, 0xffff0000, v8
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v135, 16, v17
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v144, 16, v1
; GFX11-FAKE16-NEXT: v_and_b32_e32 v146, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v128, v132, v131, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v134, v134
; GFX11-FAKE16-NEXT: v_and_b32_e32 v69, 0xffff0000, v26
; GFX11-FAKE16-NEXT: v_and_b32_e32 v70, 0xffff0000, v9
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v83, 16, v24
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v84, 16, v8
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v147, 16, v16
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v54, v54
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v14
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s5, v82, v82
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v130, v144, v135, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v146, v146
; GFX11-FAKE16-NEXT: v_and_b32_e32 v81, 0xffff0000, v25
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s4, v70, v70
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 16, v13
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v84, v84, v83, s5
; GFX11-FAKE16-NEXT: v_and_b32_e32 v86, 0xffff0000, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v147, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v54, v54
; GFX11-FAKE16-NEXT: v_and_b32_e32 v65, 0xffff0000, v27
; GFX11-FAKE16-NEXT: v_and_b32_e32 v102, 0xffff0000, v5
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s6, v86, v86
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v70, v70
; GFX11-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v15
; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v13
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v13
; GFX11-FAKE16-NEXT: v_and_b32_e32 v50, 0xffff0000, v12
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v87, 16, v23
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v96, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s8, v102, v102
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v13, v13, v29 :: v_dual_lshlrev_b32 v102, 16, v11
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v86, v86
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v51, 16, v28
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v52, 16, v12
; GFX11-FAKE16-NEXT: v_and_b32_e32 v97, 0xffff0000, v23
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v50, v50
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v29
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v96, v96, v87, s6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v98, 0xffff0000, v6
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v102, v102
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v64, 16, v11
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v99, 16, v22
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v100, 16, v6
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s7, v98, v98
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v28
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v52, v52, v51, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v53, v53
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s15, v82, v82
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v29
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v100, v100, v99, s7
; GFX11-FAKE16-NEXT: v_and_b32_e32 v133, 0xffff0000, v18
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v35, v36, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v38, v38
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v51, v51, v52, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v29, v29, v13, s15
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s15, v98, v98
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s11, v133, v133
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v48, v48, v39, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v49, v49
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 16, v52
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v28, v28, v12, s15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 16, v51
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v55, 16, v27
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v67, 16, v26
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v68, 16, v10
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v71, 16, v25
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s17, v49, v133
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v80, 16, v9
; GFX11-FAKE16-NEXT: v_and_b32_e32 v145, 0xffff0000, v17
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v64, v64, v55, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v39, v39, v48, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v51, v51, v52, s17
; GFX11-FAKE16-NEXT: v_and_b32_e32 v66, 0xffff0000, v10
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v65, v65
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v15
; GFX11-FAKE16-NEXT: v_and_b32_e32 v85, 0xffff0000, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v80, v80, v71, s4
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v66, v66
; GFX11-FAKE16-NEXT: v_and_b32_e32 v129, 0xffff0000, v19
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v36
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v48
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v55, v55, v64, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v68, v68, v67, s3
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v69, v69
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s4, v81, v81
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s12, v145, v145
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v35
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 16, v39
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v67, v67, v68, s3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v64
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 16, v68
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v70, v71, v80, s4
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s5, v85, v85
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s10, v129, v129
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v129, v135, v130, s12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 16, v55
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 16, v67
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v54, v98
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s16, v37, v132
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 16, v80
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v81, v83, v84, s5
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s6, v97, v97
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 16, v70
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s18, v53, v134
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v35, v35, v36, s15
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v39, v39, v48, s16
; GFX11-FAKE16-NEXT: v_and_b32_e32 v101, 0xffff0000, v22
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v65, v135
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v103, 16, v21
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v112, 16, v5
; GFX11-FAKE16-NEXT: v_and_b32_e32 v114, 0xffff0000, v4
; GFX11-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v16
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 16, v84
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v85, v87, v96, s6
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s7, v101, v101
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v145, 16, v81
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v55, v55, v64, s18
; GFX11-FAKE16-NEXT: v_and_b32_e32 v113, 0xffff0000, v21
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v65, v67, v68, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v69, v144
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v115, 16, v20
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v116, 16, v4
; GFX11-FAKE16-NEXT: v_and_b32_e32 v117, 0xffff0000, v20
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s9, v114, v114
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v112, v112, v103, s8
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 16, v96
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v87, v99, v100, s7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s8, v113, v113
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s13, v38, v38
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v146, 16, v85
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v67, v70, v80, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v71, v145
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v116, v116, v115, s9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v99, v103, v112, s8
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s9, v117, v117
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v38, v147, v34, s13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v147, 16, v87
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v69, v81, v84, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v83, v146
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 16, v112
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v102, v115, v116, s9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v99
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 16, v116
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v70, v85, v96, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v86, v147
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v102
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v113, v119, v118, s10
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v117, v131, v128, s11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v30
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v71, v87, v100, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v97, v54
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v113
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 16, v128
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 16, v117
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 16, v130
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v54, v99, v112, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v101, v98
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s14, v66, v66
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 16, v129
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 16, v34
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 16, v38
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v81, v102, v116, s15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v103, 16, v118
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v30, v30, v14, s14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 16, v29
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v103, v37
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v30
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v36
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 16, v55
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v48
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v37, v113, v118, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v115, v132
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v65
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v52
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v67
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v64
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v83, v117, v128, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v119, v49
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s18, 0, v85
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 16, v69
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s3, 0x8000, v68
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s19, 0, v86
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v49, v129, v130, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v131, v133
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v70
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v80
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s20, 0, v87
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 16, v71
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v38, v38, v34, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v66, v53
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v35
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v39
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v84
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s21, 0, v97
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v30, v30, v14, s15
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v82, v134
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v51
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s16, 0, v66
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v96
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s22, 0, v98
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v29, v29, v13, s15
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s15, 0, v53
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s17, 0, v82
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s7, 0x8000, v100
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s23, 0, v99
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v15
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s15, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 16, v54
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v35, v35, v36, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s16, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v112
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v36, v39, v48, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s17, s1
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s24, 0, v101
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v39, v51, v52, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s18, s2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v102, 16, v81
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v48, v55, v64, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s19, s3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v103, 16, v37
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v51, v65, v68, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s20, s4
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s9, 0x8000, v116
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v52, v67, v80, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s21, s5
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s25, 0, v102
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v53, v69, v84, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s22, s6
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s10, 0x8000, v118
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v55, v70, v96, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s23, s7
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s26, 0, v103
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v64, v71, v100, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 16, v83
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 16, v49
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 16, v38
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s11, 0x8000, v128
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s12, 0x8000, v130
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s27, 0, v113
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s28, 0, v115
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s13, 0x8000, v34
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s29, 0, v117
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 16, v30
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s14, 0x8000, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 16, v29
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 16, v27
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s40, 0, v119
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s41, 0, v129
; GFX11-FAKE16-NEXT: s_and_b32 s3, s40, s14
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v14, v30, v14, s3
; GFX11-FAKE16-NEXT: v_perm_b32 v14, v35, v14, 0x5040100
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v65, 16, v31
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v31
; GFX11-FAKE16-NEXT: v_and_b32_e32 v66, 0xffff0000, v31
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v32, v33, v65, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s24, s8
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v33, v54, v112, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 16, v32
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v32
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v15, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v66, v66
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v31
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v50, v65, v32 :: v_dual_lshlrev_b32 v65, 16, v15
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s25, s9
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v54, v81, v116, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s26, s10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 16, v50
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v37, v37, v118, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v65, v66
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v15, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v67, v68
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v31
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v50, v50, v32, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s27, s11
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v65, v83, v128, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s28, s12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 16, v50
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v49, v49, v130, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s29, s13
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v66
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v34, v38, v34, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v15
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v67
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v26
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v114, v114
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v15, v31, v15, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v28
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v30, v50, v32, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v13
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v12
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v27, v27, v11, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v25
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v12
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s41, vcc_lo
; GFX11-FAKE16-NEXT: v_perm_b32 v15, v30, v15, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v13, v29, v13, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v10
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v32, v31
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v11
; GFX11-FAKE16-NEXT: v_perm_b32 v13, v36, v13, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v10, v10, v26 :: v_dual_lshlrev_b32 v29, 16, v28
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v32, v31
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v26, v26, v10 :: v_dual_lshlrev_b32 v31, 16, v27
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v29
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v26
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v12, v28, v12, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v38, v32
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v25, s1
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v31
; GFX11-FAKE16-NEXT: v_perm_b32 v12, v39, v12, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v26, v26, v10, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v50, v50
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v26
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v25, v25, v9, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v11
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v25
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v11, v27, v11, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v31, v29
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v22
; GFX11-FAKE16-NEXT: v_perm_b32 v11, v48, v11, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v25, v25, v9, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v24, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v23
; GFX11-FAKE16-NEXT: v_perm_b32 v10, v51, v10, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v27, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_lshlrev_b32 v26, 16, v24
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v23, v23, v7, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v9
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v28, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v7
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v24, v24, v8, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v9, v25, v9, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v6, v6, v22, s1
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v28, v26
; GFX11-FAKE16-NEXT: v_perm_b32 v9, v52, v9, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v27
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v23, v23, v7, s1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v6, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v22
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_perm_b32 v8, v53, v8, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v21
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v6, s3
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v25, v25
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v21, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v7, v23, v7 :: v_dual_lshlrev_b32 v26, 16, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v4
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v21
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v5
; GFX11-FAKE16-NEXT: v_perm_b32 v7, v55, v7, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v23, v23
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v20
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v20, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v19
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v19, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v26, v24
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v21, v21, v5, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v23, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v20, v20, v4, s0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v3, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v25, v24
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v19
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v20, v20, v4, s3
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v23, v23
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v21, v5 :: v_dual_lshlrev_b32 v22, 16, v20
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v3, s3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v18, s2
; GFX11-FAKE16-NEXT: v_perm_b32 v5, v33, v5, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v22
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v21
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_perm_b32 v6, v64, v6, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v17, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v21, v21
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX11-FAKE16-NEXT: v_perm_b32 v4, v54, v4, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v16, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v1, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v23, v23
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v16, v16, v0, s2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v21, v21
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v18, v18, v2, s2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v22, v21
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v18
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v1, s2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v24, v23
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v26, v25
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v16, v16, v0, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v18, v18, v2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v16
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v19, v3, s1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v21
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v22
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_perm_b32 v3, v37, v3, 0x5040100
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v19
; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, v17, v1, s1
; GFX11-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v16, v0, s1
; GFX11-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v49, v1, 0x5040100
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v18, v2, s1
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v34, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v2, v65, v2, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v32bf16:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: scratch_load_b32 v55, off, s32
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v53, v15 :: v_dual_mov_b32 v48, v13
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v31, v10
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v33, v8
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v54.l, 0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v51, v14 :: v_dual_and_b32 v8, 0xffff0000, v53
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v34, v11 :: v_dual_mov_b32 v37, v12
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v36, v9
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v51
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v30
; GFX12-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v24
; GFX12-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v23
; GFX12-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v22
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v48
; GFX12-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v19
; GFX12-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff0000, v18
; GFX12-TRUE16-NEXT: v_and_b32_e32 v64, 0xffff0000, v17
; GFX12-TRUE16-NEXT: v_and_b32_e32 v65, 0xffff0000, v16
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v66.l, v54.l
; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v55
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v53.h, v55.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v8.l, v54.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v55.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v15.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v51.h, v30.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v29
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v37
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v30.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v14.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v48.h, v29.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v28
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v34
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v29.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v13.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v37.h, v28.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v31
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v28.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v12.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v34.h, v27.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v36
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v27.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v31.h, v26.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, 0xffff0000, v25
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v33
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v26.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v36.h, v25.h, s1
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v9, v9
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v25.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v33.h, v24.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v24.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v32, v32
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v32.l, v54.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v8
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v8
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v35, v35
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.h, v8.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v7.h, v23.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v23.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX12-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v5
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v50.h, v32.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v6.h, v22.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v21
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX12-TRUE16-NEXT: v_and_b32_e32 v35, 0xffff0000, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v22.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v39.h, v32.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v5.h, v21.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v20
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v35, v35
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v35.l, v54.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v21.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v32
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v32
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.h, v32.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v4.h, v20.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v35.h, v20.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v38, v38
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v38.l, v54.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v35
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v35.h, v35.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v35
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v49, v49
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v35.h, v35.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v3.h, v19.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v38.h, v19.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v49, v49
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v49.l, v54.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v38
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v38.h, v38.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v38
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v52, v52
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v38.h, v38.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v2.h, v18.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v52, 0xffff0000, v1
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v49.h, v18.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v52, v52
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v52.l, v54.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v49
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v49.h, v49.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v49
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v64, v64
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v49.h, v49.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v1.h, v17.h, s1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v64, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v52.h, v17.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v64, v64
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v64.l, v54.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v52
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v52.h, v52.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v52
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v52.h, v52.h, v54.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v0.h, v16.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 16, v53
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v64.h, v16.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v54.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v54, v64
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v64.h, v64.h, v54.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v65, 16, v55
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v64
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v66.h, v53.l, v55.l, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v65, v65
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v53.h, v64.h, v54.h, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v55.l, v66.h, vcc_lo
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 16, v51
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v64, 16, v30
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v66.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v66, v54
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v64, v64
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v66.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v55, v55
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v55, 16, v29
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v51.h, v51.l, v30.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v51.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v15.l, v54.h, v66.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v30.l, v51.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v48
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v51.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v55, v55
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v51, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v51.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v30, v30
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v30.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v30.h, v48.l, v29.l, vcc_lo
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v48, 16, v28
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v14.l, v54.h, v51.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v29.l, v30.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v37
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v30.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v48, v48
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v30, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v30.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v29.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v29.h, v37.l, v28.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v13.l, v54.h, v30.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v28.l, v29.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v34
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v30, 16, v27
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v29.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v29, v54
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v30, v30
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v29.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v28, v28
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v28.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v28.h, v34.l, v27.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v12.l, v54.h, v29.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v27.l, v28.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v31
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v29, 16, v26
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v28.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v28, v54
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v29, v29
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v28.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v27.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v27.h, v31.l, v26.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v11.l, v54.h, v28.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v26.l, v27.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v36
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v28, 16, v25
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v27.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v27, v54
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v28, v28
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v27.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v26.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v26.h, v36.l, v25.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v10.l, v54.h, v27.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v25.l, v26.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v25, 16, v33
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v26.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v26, v54
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v27, v27
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v26.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v25, v25
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v25.l, v54.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v25.h, v33.l, v24.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v9.l, v54.h, v26.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v24.l, v25.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v7
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v25.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v25, v54
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v26, v26
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v25.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v24, v24
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v7.l, v23.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v8.l, v54.h, v25.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v23.l, v7.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v23, 16, v6
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v24, v24
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v7.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v23, v23
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.l, v22.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v50.l, v54.h, v7.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v22.l, v6.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v22, 16, v21
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v54
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v20
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.l, v21.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v39.l, v54.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v21.l, v5.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v5.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v50
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v5.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v19
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.l, v20.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v32.l, v54.h, v5.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v20.l, v4.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v39
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v18
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.l, v19.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v35.l, v54.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v19.l, v3.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v3.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v32
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v3.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v17
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.l, v18.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v38.l, v54.h, v3.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v18.l, v2.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v35
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v16
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v17.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v49.l, v54.h, v2.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v17.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v3, v3
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v3, v38
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v2, v49
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v16.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v54.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v52.l, v54.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v16.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v1, v52
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v54
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v54.h, v54.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v54
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v53.l, v54.h, v0.h, s0
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v53
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v32bf16:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: scratch_load_b32 v31, off, s32
; GFX12-FAKE16-NEXT: v_and_b32_e32 v34, 0xffff0000, v14
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v35, 16, v30
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v36, 16, v14
; GFX12-FAKE16-NEXT: v_and_b32_e32 v118, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v119, 16, v19
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v34, v34
; GFX12-FAKE16-NEXT: v_and_b32_e32 v37, 0xffff0000, v30
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v128, 16, v3
; GFX12-FAKE16-NEXT: v_and_b32_e32 v130, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v131, 16, v18
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v36, v36, v35, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v118, v118
; GFX12-FAKE16-NEXT: v_and_b32_e32 v49, 0xffff0000, v29
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v132, 16, v2
; GFX12-FAKE16-NEXT: v_and_b32_e32 v134, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v54, 0xffff0000, v11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v118, v128, v119, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v130, v130
; GFX12-FAKE16-NEXT: v_and_b32_e32 v53, 0xffff0000, v28
; GFX12-FAKE16-NEXT: v_and_b32_e32 v82, 0xffff0000, v8
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v135, 16, v17
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v144, 16, v1
; GFX12-FAKE16-NEXT: v_and_b32_e32 v146, 0xffff0000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v128, v132, v131, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v134, v134
; GFX12-FAKE16-NEXT: v_and_b32_e32 v69, 0xffff0000, v26
; GFX12-FAKE16-NEXT: v_and_b32_e32 v70, 0xffff0000, v9
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v83, 16, v24
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v84, 16, v8
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v147, 16, v16
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v34, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v54, v54
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v14
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s5, v82, v82
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v130, v144, v135, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v146, v146
; GFX12-FAKE16-NEXT: v_and_b32_e32 v81, 0xffff0000, v25
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s4, v70, v70
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v70, 16, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v84, v84, v83, s5
; GFX12-FAKE16-NEXT: v_and_b32_e32 v86, 0xffff0000, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v147, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v54, v54
; GFX12-FAKE16-NEXT: v_and_b32_e32 v65, 0xffff0000, v27
; GFX12-FAKE16-NEXT: v_and_b32_e32 v102, 0xffff0000, v5
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s6, v86, v86
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v12
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v14, v14, v30, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v70, v70
; GFX12-FAKE16-NEXT: v_and_b32_e32 v32, 0xffff0000, v15
; GFX12-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v13
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v48, 16, v13
; GFX12-FAKE16-NEXT: v_and_b32_e32 v50, 0xffff0000, v12
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v87, 16, v23
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v96, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s8, v102, v102
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v13, v13, v29 :: v_dual_lshlrev_b32 v102, 16, v11
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v86, v86
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v51, 16, v28
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v52, 16, v12
; GFX12-FAKE16-NEXT: v_and_b32_e32 v97, 0xffff0000, v23
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v50, v50
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v29
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v96, v96, v87, s6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v98, 0xffff0000, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v12, v12, v28, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v102, v102
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v64, 16, v11
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v99, 16, v22
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v100, 16, v6
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s7, v98, v98
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v28
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v52, v52, v51, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v11, v11, v27, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v37, v37
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v53, v53
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s15, v82, v82
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v39, 16, v29
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v100, v100, v99, s7
; GFX12-FAKE16-NEXT: v_and_b32_e32 v133, 0xffff0000, v18
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v35, v35, v36, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v38, v38
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v51, v51, v52, s1
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v29, v29, v13, s15
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s15, v98, v98
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s11, v133, v133
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v48, v48, v39, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v49, v49
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 16, v52
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v28, v28, v12, s15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 16, v51
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v55, 16, v27
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v67, 16, v26
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v68, 16, v10
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v71, 16, v25
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s17, v49, v133
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v80, 16, v9
; GFX12-FAKE16-NEXT: v_and_b32_e32 v145, 0xffff0000, v17
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v64, v64, v55, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v39, v39, v48, s0
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v51, v51, v52, s17
; GFX12-FAKE16-NEXT: v_and_b32_e32 v66, 0xffff0000, v10
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v65, v65
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v33, 16, v15
; GFX12-FAKE16-NEXT: v_and_b32_e32 v85, 0xffff0000, v24
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v80, v80, v71, s4
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v66, v66
; GFX12-FAKE16-NEXT: v_and_b32_e32 v129, 0xffff0000, v19
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v36
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v48
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v55, v55, v64, s2
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v68, v68, v67, s3
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v69, v69
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s4, v81, v81
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s12, v145, v145
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v35
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 16, v39
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v67, v67, v68, s3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v64
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v65, 16, v68
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v70, v71, v80, s4
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s5, v85, v85
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s10, v129, v129
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v129, v135, v130, s12
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 16, v55
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v135, 16, v67
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v54, v98
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s16, v37, v132
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v69, 16, v80
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v81, v83, v84, s5
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s6, v97, v97
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v144, 16, v70
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s18, v53, v134
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v35, v35, v36, s15
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v39, v39, v48, s16
; GFX12-FAKE16-NEXT: v_and_b32_e32 v101, 0xffff0000, v22
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v65, v135
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v103, 16, v21
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v112, 16, v5
; GFX12-FAKE16-NEXT: v_and_b32_e32 v114, 0xffff0000, v4
; GFX12-FAKE16-NEXT: v_and_b32_e32 v38, 0xffff0000, v16
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v71, 16, v84
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v85, v87, v96, s6
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s7, v101, v101
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v145, 16, v81
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v55, v55, v64, s18
; GFX12-FAKE16-NEXT: v_and_b32_e32 v113, 0xffff0000, v21
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v65, v67, v68, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v69, v144
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v115, 16, v20
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v116, 16, v4
; GFX12-FAKE16-NEXT: v_and_b32_e32 v117, 0xffff0000, v20
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s9, v114, v114
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v112, v112, v103, s8
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v83, 16, v96
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v87, v99, v100, s7
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s8, v113, v113
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s13, v38, v38
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v146, 16, v85
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v67, v70, v80, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v71, v145
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v116, v116, v115, s9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v99, v103, v112, s8
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s9, v117, v117
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v38, v147, v34, s13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v147, 16, v87
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v69, v81, v84, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v83, v146
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 16, v112
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v102, v115, v116, s9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v54, 16, v99
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 16, v116
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v70, v85, v96, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v86, v147
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v102
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v113, v119, v118, s10
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v117, v131, v128, s11
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v30
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v71, v87, v100, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v97, v54
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v37, 16, v113
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 16, v128
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v132, 16, v117
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 16, v130
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v54, v99, v112, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v101, v98
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s14, v66, v66
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v49, 16, v129
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v131, 16, v34
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v133, 16, v38
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v81, v102, v116, s15
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v103, 16, v118
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v30, v30, v14, s14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v134, 16, v29
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v103, v37
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v30
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v36
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v85, 16, v55
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v48
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v37, v113, v118, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v115, v132
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v86, 16, v65
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v52
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v87, 16, v67
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v64
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v83, v117, v128, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v119, v49
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s18, 0, v85
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v97, 16, v69
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s3, 0x8000, v68
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s19, 0, v86
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v49, v129, v130, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v131, v133
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v98, 16, v70
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v80
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s20, 0, v87
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v99, 16, v71
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v38, v38, v34, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v66, v53
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v53, 16, v35
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v39
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v84
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s21, 0, v97
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v30, v30, v14, s15
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s15, v82, v134
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v82, 16, v51
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s16, 0, v66
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v96
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s22, 0, v98
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v29, v29, v13, s15
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s15, 0, v53
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s17, 0, v82
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s7, 0x8000, v100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s23, 0, v99
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v15
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s15, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v101, 16, v54
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v35, v35, v36, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s16, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v112
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v36, v39, v48, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s17, s1
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s24, 0, v101
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v39, v51, v52, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s18, s2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v102, 16, v81
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v48, v55, v64, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s19, s3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v103, 16, v37
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v51, v65, v68, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s20, s4
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s9, 0x8000, v116
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v52, v67, v80, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s21, s5
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s25, 0, v102
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v53, v69, v84, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s22, s6
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s10, 0x8000, v118
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v55, v70, v96, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s23, s7
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s26, 0, v103
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v64, v71, v100, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v113, 16, v83
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v115, 16, v49
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v117, 16, v38
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s11, 0x8000, v128
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s12, 0x8000, v130
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s27, 0, v113
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s28, 0, v115
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s13, 0x8000, v34
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s29, 0, v117
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v119, 16, v30
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s14, 0x8000, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v129, 16, v29
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v114, 16, v27
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s40, 0, v119
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s41, 0, v129
; GFX12-FAKE16-NEXT: s_and_b32 s3, s40, s14
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v14, v30, v14, s3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_perm_b32 v14, v35, v14, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v65, 16, v31
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v15, v15, v31, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v32, v32
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v31
; GFX12-FAKE16-NEXT: v_and_b32_e32 v66, 0xffff0000, v31
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v32, v33, v65, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s24, s8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v33, v54, v112, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v50, v50
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 16, v32
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v32
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v15, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v66, v66
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v31
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v50, v65, v32 :: v_dual_lshlrev_b32 v65, 16, v15
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s25, s9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v54, v81, v116, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s26, s10
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v68, 16, v50
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v37, v37, v118, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v65, v66
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v31, v31, v15, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v67, v68
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v66, 16, v31
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v50, v50, v32, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s27, s11
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v65, v83, v128, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s28, s12
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v67, 16, v50
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v49, v49, v130, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s29, s13
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v66
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v34, v38, v34, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v15
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v67
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v26
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v114, v114
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v15, v31, v15, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v28
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v30, v50, v32, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v13
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v12
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v27, v27, v11, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v50, 16, v25
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v12
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s41, vcc_lo
; GFX12-FAKE16-NEXT: v_perm_b32 v15, v30, v15, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v13, v29, v13, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v10
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v32, v31
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v11
; GFX12-FAKE16-NEXT: v_perm_b32 v13, v36, v13, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v28, v28, v12, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v29, v29
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v10, v10, v26 :: v_dual_lshlrev_b32 v29, 16, v28
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v32, v31
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v27, v27, v11, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v38, v38
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v38, 16, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v26, v26, v10 :: v_dual_lshlrev_b32 v31, 16, v27
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v29
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v32, 16, v26
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v12, v28, v12, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v10
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v38, v32
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v9, v9, v25, s1
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v31
; GFX12-FAKE16-NEXT: v_perm_b32 v12, v39, v12, 0x5040100
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v26, v26, v10, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v50, v50
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v31, 16, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v26
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v25, v25, v9, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v11
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v25
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v11, v27, v11, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v8
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v31, v29
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v29, 16, v22
; GFX12-FAKE16-NEXT: v_perm_b32 v11, v48, v11, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v25, v25, v9, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v10, v26, v10, vcc_lo
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v8, v8, v24, s1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v27, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v23
; GFX12-FAKE16-NEXT: v_perm_b32 v10, v51, v10, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v24, v24, v8, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v27, v27
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v7, v7, v23 :: v_dual_lshlrev_b32 v26, 16, v24
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v28
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v23, v23, v7, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v9
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v28, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v28, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v23
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v24, v24, v8, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v27, v27
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v9, v25, v9, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v8
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v24
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v6, v6, v22, s1
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v28, v26
; GFX12-FAKE16-NEXT: v_perm_b32 v9, v52, v9, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v27
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v23, v23, v7, s1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s1, v29, v29
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v23
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v6, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v8, v24, v8, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v22
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_perm_b32 v8, v53, v8, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v21
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v22, v22, v6, s3
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s3, v25, v25
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v26, v26
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v22
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v5, v5, v21, s3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v21, v21, v5, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v7, v23, v7 :: v_dual_lshlrev_b32 v26, 16, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v24
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v21
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v5
; GFX12-FAKE16-NEXT: v_perm_b32 v7, v55, v7, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v23, v23
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v20
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v4, v4, v20, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v19
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v19, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v26, v24
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v27, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v21, v21, v5, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v23, v23
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v21
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v20, v20, v4, s0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v25, v25
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v4
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v23
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v20
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v3, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v25, v24
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v19
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v6, v22, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v20, v20, v4, s3
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v27, v26
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v23, v23
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v21, v5 :: v_dual_lshlrev_b32 v22, 16, v20
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v19, v19, v3, s3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v18, s2
; GFX12-FAKE16-NEXT: v_perm_b32 v5, v33, v5, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v22
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v19
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v26, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v21
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v17
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v20, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_perm_b32 v6, v64, v6, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v1, v17, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v21, v21
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v18
; GFX12-FAKE16-NEXT: v_perm_b32 v4, v54, v4, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v16, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v22, v22
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v24, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v1, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v23, v23
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v16, v16, v0, s2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s2, v21, v21
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v23, 16, v16
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v18, v18, v2, s2
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v22, v21
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v25, 16, v18
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v17, v17, v1, s2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s2, v24, v23
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s3, v26, v25
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v21, 16, v17
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v16, v16, v0, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v3
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v18, v18, v2, s3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v22, 16, v16
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v19, v3, s1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v19, 16, v18
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v21
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v22
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_perm_b32 v3, v37, v3, 0x5040100
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s5, 0, v19
; GFX12-FAKE16-NEXT: s_and_b32 s1, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v1, v17, v1, s1
; GFX12-FAKE16-NEXT: s_and_b32 s1, s3, s4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v16, v0, s1
; GFX12-FAKE16-NEXT: s_and_b32 s1, s5, s6
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v49, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v18, v2, s1
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v34, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v2, v65, v2, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <32 x bfloat> @llvm.minimumnum.v32bf16(<32 x bfloat> %x, <32 x bfloat> %y)
ret <32 x bfloat> %result
}
define bfloat @v_minimumnum_bf16_no_ieee(bfloat %x, bfloat %y) #0 {
; GFX7-LABEL: v_minimumnum_bf16_no_ieee:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_bf16_no_ieee:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v3, v2
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX8-NEXT: s_movk_i32 s4, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s4, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_bf16_no_ieee:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX900-NEXT: s_movk_i32 s4, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s4, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_bf16_no_ieee:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: s_movk_i32 s0, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s0, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_bf16_no_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v2
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_bf16_no_ieee:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v1.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_bf16_no_ieee:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_bf16_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v0.l, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v1.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_bf16_no_ieee:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v2, 16, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call bfloat @llvm.minimumnum.bf16(bfloat %x, bfloat %y)
ret bfloat %result
}
define <2 x bfloat> @v_minimumnum_v2bf16_no_ieee(<2 x bfloat> %x, <2 x bfloat> %y) #0 {
; GFX7-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v1
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v2, v3, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
; GFX7-NEXT: v_alignbit_b32 v0, v2, v0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX8-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX900-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v3
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
; GFX950-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX950-NEXT: v_cndmask_b32_sdwa v3, v0, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v3
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v4
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v4, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v1
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v2, v0, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX10-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1
; GFX10-NEXT: v_cndmask_b32_sdwa v6, v0, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v6
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v3, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v5, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v4
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v3, v3
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v1.h, v2.h, s0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v3
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.l, v1.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v4.h, s2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v2
; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v4.h, s0
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v2, v4, v3 :: v_dual_lshlrev_b32 v5, 16, v0
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v1, v1, v0 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v3, v3
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v0.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v2.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v1.h, v2.h, s0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v2.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v2, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v3.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v2.l
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.l, v1.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v1.l, v4.h, s2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v2
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v2.h, v2.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v2
; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.h, v4.h, s0
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v2bf16_no_ieee:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v2, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v2, v4, v3 :: v_dual_lshlrev_b32 v5, 16, v0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s1, 0x8000, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v1, v1, v0 :: v_dual_lshlrev_b32 v4, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s2, 0, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s2, s1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %x, <2 x bfloat> %y)
ret <2 x bfloat> %result
}
define <3 x bfloat> @v_minimumnum_v3bf16_no_ieee(<3 x bfloat> %x, <3 x bfloat> %y) #0 {
; GFX7-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v3, v4, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_alignbit_b32 v0, v3, v0, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX8-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX900-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v4, v0, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX950-NEXT: v_cndmask_b32_sdwa v5, v0, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v5
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v5
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v5, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v4, v0, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_cmp_u_f32_e64 s4, v6, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX10-NEXT: v_cndmask_b32_e64 v6, v0, v2, s4
; GFX10-NEXT: v_cndmask_b32_sdwa v0, v0, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s5, 0x8000, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v1
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v7
; GFX10-NEXT: v_cmp_eq_u16_e64 s7, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc_lo
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v9, v5
; GFX10-NEXT: v_cmp_eq_f32_e64 s6, 0, v8
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v1, s4
; GFX10-NEXT: v_cmp_eq_f32_e64 s4, 0, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v3
; GFX10-NEXT: s_and_b32 vcc_lo, s4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s6, s5
; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v5
; GFX10-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s7
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.h, v2.h, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v4.h, s0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v4.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v3.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.h, v5.h, v4.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v1.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.h, s2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v3.l, v4.h, v0.h, s0
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v6, v5 :: v_dual_and_b32 v7, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_lshlrev_b32 v6, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v4 :: v_dual_lshlrev_b32 v8, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v7, 16, v2
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX11-FAKE16-NEXT: s_and_b32 s0, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v7, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v0.h, v2.h, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v4, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v4.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, 16, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.l, v3.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.h, v5.h, v4.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.l, v1.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v0
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v6, v6
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v1.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.h, v1.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v2.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v3.l, v4.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v3
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v3bf16_no_ieee:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v6, v5 :: v_dual_and_b32 v7, 0xffff0000, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v8, v8
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_lshlrev_b32 v6, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v5, v5, v4 :: v_dual_lshlrev_b32 v8, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v9, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v7, 16, v3
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v8, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v7, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v6
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v7
; GFX12-FAKE16-NEXT: s_and_b32 s0, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v1
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x bfloat> @llvm.minimumnum.v3bf16(<3 x bfloat> %x, <3 x bfloat> %y)
ret <3 x bfloat> %result
}
define <4 x bfloat> @v_minimumnum_v4bf16_no_ieee(<4 x bfloat> %x, <4 x bfloat> %y) #0 {
; GFX7-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v4, 0xffff0000, v3
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_min_f32_e32 v4, v5, v4
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v2
; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX7-NEXT: v_min_f32_e32 v3, v5, v3
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
; GFX7-NEXT: v_alignbit_b32 v0, v3, v0, 16
; GFX7-NEXT: v_alignbit_b32 v1, v4, v1, 16
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX8-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX8-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX8-NEXT: s_movk_i32 s6, 0x8000
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX8-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX8-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX8-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX8-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX8-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX8-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v5
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX900-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX900-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX900-NEXT: s_movk_i32 s6, 0x8000
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX900-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX900-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX900-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX900-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX900-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v5
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX900-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v1
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX900-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX900-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX900-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX900-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX900-NEXT: v_cmp_eq_u16_e64 s[4:5], s6, v0
; GFX900-NEXT: s_and_b64 vcc, vcc, s[4:5]
; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX900-NEXT: s_mov_b32 s4, 0x5040100
; GFX900-NEXT: v_perm_b32 v0, v5, v0, s4
; GFX900-NEXT: v_perm_b32 v1, v4, v1, s4
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
; GFX950-NEXT: v_lshrrev_b32_e32 v4, 16, v3
; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX950-NEXT: v_cndmask_b32_sdwa v5, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v5
; GFX950-NEXT: s_movk_i32 s2, 0x8000
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v7
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v4
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX950-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
; GFX950-NEXT: v_and_b32_e32 v8, 0xffff0000, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v5
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v7, v8
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v6, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v6
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v7
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v1
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v7, v6
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v3
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v6
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX950-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v0
; GFX950-NEXT: v_cmp_eq_u16_e64 s[0:1], s2, v0
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_lt_f32_e32 vcc, v6, v3
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v2
; GFX950-NEXT: v_cmp_eq_f32_e32 vcc, 0, v3
; GFX950-NEXT: s_and_b64 vcc, vcc, s[0:1]
; GFX950-NEXT: s_mov_b32 s0, 0x5040100
; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
; GFX950-NEXT: v_perm_b32 v0, v5, v0, s0
; GFX950-NEXT: v_perm_b32 v1, v4, v1, s0
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v3
; GFX10-NEXT: v_and_b32_e32 v7, 0xffff0000, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v0
; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v2
; GFX10-NEXT: v_cndmask_b32_sdwa v11, v1, v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v14, 16, v2
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v11
; GFX10-NEXT: v_cndmask_b32_e32 v6, v8, v4, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5
; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v6
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v13, v13
; GFX10-NEXT: v_cmp_eq_u16_e64 s8, 0x8000, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v9, v9
; GFX10-NEXT: v_cmp_eq_u16_e64 s6, 0x8000, v0
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v8
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v12, v12
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v10, v9
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v5
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc_lo
; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX10-NEXT: v_cndmask_b32_e64 v4, v4, v6, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v13, 16, v2
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, v7, v12
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v4
; GFX10-NEXT: v_cmp_lt_f32_e64 s4, v8, v13
; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v1, s5
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v0, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v11
; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v2
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v6
; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc_lo
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v7
; GFX10-NEXT: v_cmp_eq_f32_e64 s5, 0, v8
; GFX10-NEXT: v_cmp_eq_f32_e64 s7, 0, v9
; GFX10-NEXT: s_and_b32 vcc_lo, vcc_lo, s4
; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s5, s6
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX10-NEXT: s_and_b32 vcc_lo, s7, s8
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX10-NEXT: v_perm_b32 v0, v4, v0, 0x5040100
; GFX10-NEXT: v_perm_b32 v1, v5, v1, 0x5040100
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-TRUE16-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.l, 0
; GFX11-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX11-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v1.h, v3.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v5, v5
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v4
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v0.h, v2.h, s1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 16, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v8, v8
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v6.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v7.h, v1.l, v3.l, vcc_lo
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v5.h, v6.h, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v3.l, v7.h, s2
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v6
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v7.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX11-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, v6.h, v7.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v2.l, v0.h, s2
; GFX11-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v6
; GFX11-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v0.h, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v6.h, v0.h, s0
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v4
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc_lo
; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX11-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v6, v9, v8 :: v_dual_lshlrev_b32 v13, 16, v5
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v4
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_lshlrev_b32 v13, 16, v1
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v11, v11
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc_lo
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v12
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v8, v9
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v6, s0
; GFX11-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v11, v10
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v5, 16, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v5
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX11-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX11-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-TRUE16-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-TRUE16-NEXT: s_wait_expcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v3
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, 0
; GFX12-TRUE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s0, v5, v5
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX12-TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v1.h, v3.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s1, v5, v5
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v3.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v4
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v6.h
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v4
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.h, v4.h, v6.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v0.h, v2.h, s1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v7, 16, v1
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v8, 16, v3
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v2.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6.h
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v8, v8
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v6, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v5.h, v5.h, v6.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v7.h, v1.l, v3.l, vcc_lo
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.h, v5.h, v6.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_4)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v3.l, v7.h, s2
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v3, 16, v0
; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v2
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v7.h
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v7, v6
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e64 s2, v5, v5
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v7.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v3, v3
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.l, v2.l, vcc_lo
; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX12-TRUE16-NEXT: s_and_b32 s0, s1, s0
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v4.l, v6.h, v7.h, s0
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v2.l, v0.h, s2
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
; GFX12-TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v6
; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v6.h, v6.h, v0.h, vcc_lo
; GFX12-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.h
; GFX12-TRUE16-NEXT: v_cmp_eq_f32_e64 s0, 0, v6
; GFX12-TRUE16-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-TRUE16-NEXT: v_cndmask_b16 v1.l, v6.h, v0.h, s0
; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v4
; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-FAKE16-LABEL: v_minimumnum_v4bf16_no_ieee:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-FAKE16-NEXT: s_wait_expcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v3
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v1
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc_lo
; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v6, v6
; GFX12-FAKE16-NEXT: v_and_b32_e32 v7, 0xffff0000, v2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v6, v9, v8 :: v_dual_lshlrev_b32 v13, 16, v5
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v4
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v14, 16, v0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v12, v13
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v5, v4, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v10, v10
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v14, v14
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_lshlrev_b32 v13, 16, v1
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v7, v7
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v11, v11
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s4, 0x8000, v1
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v11, 16, v0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v7, v8, v6, vcc_lo
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v6
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v15, 16, v2
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s2, 0x8000, v0
; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v15, v15
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v12, 16, v3
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v9
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v7
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s1, v13, v12
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v2
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v8, v9
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v3, v3, v1, s1
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v7, v7, v6, s0
; GFX12-FAKE16-NEXT: v_cmp_lt_f32_e64 s0, v11, v10
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v7
; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e64 v2, v2, v0, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v4
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v9, 16, v2
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: v_cmp_eq_u16_e64 s0, 0x8000, v6
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_dual_cndmask_b32 v4, v5, v4 :: v_dual_lshlrev_b32 v5, 16, v3
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, 0, v8
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s1, 0, v9
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX12-FAKE16-NEXT: v_cmp_eq_f32_e64 s3, 0, v5
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, vcc_lo, s0
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v5, v7, v6, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s1, s2
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
; GFX12-FAKE16-NEXT: s_and_b32 vcc_lo, s3, s4
; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-FAKE16-NEXT: v_perm_b32 v0, v5, v0, 0x5040100
; GFX12-FAKE16-NEXT: v_perm_b32 v1, v4, v1, 0x5040100
; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %x, <4 x bfloat> %y)
ret <4 x bfloat> %result
}
attributes #0 = { "amdgpu-ieee"="false" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11: {{.*}}
; GFX12: {{.*}}
; GFX9: {{.*}}