[LLVM][SelectionDAG] Align poison/undef binop folds with IR. (#149334)

The "at construction" binop folds in SelectionDAG::getNode() has
different behaviour when compared to the equivalent LLVM IR. This PR
makes the behaviour consistent while also extending the coverage to
include signed/unsigned max/min operations.
This commit is contained in:
Paul Walker 2025-07-30 11:20:30 +01:00 committed by GitHub
parent 984ec02236
commit 13f38c97d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1594 additions and 1331 deletions

View File

@ -7843,20 +7843,43 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
} }
} }
// Perform trivial constant folding. if (N1.getOpcode() == ISD::POISON || N2.getOpcode() == ISD::POISON) {
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags)) switch (Opcode) {
return SV; case ISD::XOR:
case ISD::ADD:
case ISD::PTRADD:
case ISD::SUB:
case ISD::SIGN_EXTEND_INREG:
case ISD::UDIV:
case ISD::SDIV:
case ISD::UREM:
case ISD::SREM:
case ISD::MUL:
case ISD::AND:
case ISD::SSUBSAT:
case ISD::USUBSAT:
case ISD::UMIN:
case ISD::OR:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::UMAX:
case ISD::SMAX:
case ISD::SMIN:
// fold op(arg1, poison) -> poison, fold op(poison, arg2) -> poison.
return N2.getOpcode() == ISD::POISON ? N2 : N1;
}
}
// Canonicalize an UNDEF to the RHS, even over a constant. // Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) { if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() != ISD::UNDEF) {
if (TLI->isCommutativeBinOp(Opcode)) { if (TLI->isCommutativeBinOp(Opcode)) {
std::swap(N1, N2); std::swap(N1, N2);
} else { } else {
switch (Opcode) { switch (Opcode) {
case ISD::PTRADD: case ISD::PTRADD:
case ISD::SUB: case ISD::SUB:
// fold op(undef, arg2) -> undef, fold op(poison, arg2) ->poison. // fold op(undef, non_undef_arg2) -> undef.
return N1.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); return N1;
case ISD::SIGN_EXTEND_INREG: case ISD::SIGN_EXTEND_INREG:
case ISD::UDIV: case ISD::UDIV:
case ISD::SDIV: case ISD::SDIV:
@ -7864,18 +7887,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SREM: case ISD::SREM:
case ISD::SSUBSAT: case ISD::SSUBSAT:
case ISD::USUBSAT: case ISD::USUBSAT:
// fold op(undef, arg2) -> 0, fold op(poison, arg2) -> poison. // fold op(undef, non_undef_arg2) -> 0.
return N1.getOpcode() == ISD::POISON ? getPOISON(VT) return getConstant(0, DL, VT);
: getConstant(0, DL, VT);
} }
} }
} }
// Fold a bunch of operators when the RHS is undef. // Fold a bunch of operators when the RHS is undef.
if (N2.isUndef()) { if (N2.getOpcode() == ISD::UNDEF) {
switch (Opcode) { switch (Opcode) {
case ISD::XOR: case ISD::XOR:
if (N1.isUndef()) if (N1.getOpcode() == ISD::UNDEF)
// Handle undef ^ undef -> 0 special case. This is a common // Handle undef ^ undef -> 0 special case. This is a common
// idiom (misuse). // idiom (misuse).
return getConstant(0, DL, VT); return getConstant(0, DL, VT);
@ -7883,29 +7905,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ADD: case ISD::ADD:
case ISD::PTRADD: case ISD::PTRADD:
case ISD::SUB: case ISD::SUB:
// fold op(arg1, undef) -> undef.
return N2;
case ISD::UDIV: case ISD::UDIV:
case ISD::SDIV: case ISD::SDIV:
case ISD::UREM: case ISD::UREM:
case ISD::SREM: case ISD::SREM:
// fold op(arg1, undef) -> undef, fold op(arg1, poison) -> poison. // fold op(arg1, undef) -> poison.
return N2.getOpcode() == ISD::POISON ? getPOISON(VT) : getUNDEF(VT); return getPOISON(VT);
case ISD::MUL: case ISD::MUL:
case ISD::AND: case ISD::AND:
case ISD::SSUBSAT: case ISD::SSUBSAT:
case ISD::USUBSAT: case ISD::USUBSAT:
// fold op(arg1, undef) -> 0, fold op(arg1, poison) -> poison. case ISD::UMIN:
return N2.getOpcode() == ISD::POISON ? getPOISON(VT) // fold op(undef, undef) -> undef, fold op(arg1, undef) -> 0.
: getConstant(0, DL, VT); return N1.getOpcode() == ISD::UNDEF ? N2 : getConstant(0, DL, VT);
case ISD::OR: case ISD::OR:
case ISD::SADDSAT: case ISD::SADDSAT:
case ISD::UADDSAT: case ISD::UADDSAT:
// fold op(arg1, undef) -> an all-ones constant, fold op(arg1, poison) -> case ISD::UMAX:
// poison. // fold op(undef, undef) -> undef, fold op(arg1, undef) -> -1.
return N2.getOpcode() == ISD::POISON ? getPOISON(VT) return N1.getOpcode() == ISD::UNDEF ? N2 : getAllOnesConstant(DL, VT);
: getAllOnesConstant(DL, VT); case ISD::SMAX:
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> MAX_INT.
return N1.getOpcode() == ISD::UNDEF
? N2
: getConstant(
APInt::getSignedMaxValue(VT.getScalarSizeInBits()), DL,
VT);
case ISD::SMIN:
// fold op(undef, undef) -> undef, fold op(arg1, undef) -> MIN_INT.
return N1.getOpcode() == ISD::UNDEF
? N2
: getConstant(
APInt::getSignedMinValue(VT.getScalarSizeInBits()), DL,
VT);
} }
} }
// Perform trivial constant folding.
if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2}, Flags))
return SV;
// Memoize this node if possible. // Memoize this node if possible.
SDNode *N; SDNode *N;
SDVTList VTs = getVTList(VT); SDVTList VTs = getVTList(VT);

View File

@ -4,7 +4,6 @@
define i32 @f(i32 %a0) { define i32 @f(i32 %a0) {
; CHECK-LABEL: f: ; CHECK-LABEL: f:
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret ; CHECK-NEXT: ret
%1 = lshr i32 %a0, 2147483647 %1 = lshr i32 %a0, 2147483647
%2 = add i32 %1, 2147483647 %2 = add i32 %1, 2147483647

View File

@ -235,7 +235,7 @@ define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4 ; GFX6-NEXT: v_med3_i32 v3, v2, s4, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 ; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
; GFX6-NEXT: s_setpc_b64 s[30:31] ; GFX6-NEXT: s_setpc_b64 s[30:31]
; ;

View File

@ -202,10 +202,9 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 ; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_min_u32_e32 v3, 0xffff, v2 ; GFX6-NEXT: v_min_u32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 ; GFX6-NEXT: v_alignbit_b32 v1, v2, v1, 16
; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16
; GFX6-NEXT: s_setpc_b64 s[30:31] ; GFX6-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX8-LABEL: v_uaddsat_v3i16: ; GFX8-LABEL: v_uaddsat_v3i16:

View File

@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX9-SDAG-LABEL: test_vector_reduce_smax_v8i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v8i8: ; GFX10-SDAG-LABEL: test_vector_reduce_smax_v8i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v5
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v3 ; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v3
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 ; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v4
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v8i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v8i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2 ; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v1.l, v5.l, v3.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v5.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v2.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smax_v8i8(<8 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v5
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v3
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v2 ; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v4
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX9-SDAG-LABEL: test_vector_reduce_smax_v16i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13
; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX9-SDAG-NEXT: v_max_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_i16 v2, v2, v10, v6 ; GFX9-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_max_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v12
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_max3_i16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_smax_v16i8: ; GFX10-SDAG-LABEL: test_vector_reduce_smax_v16i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v7, v7, v15 ; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v5, v5, v13
; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_max_i16 v1, v1, v9
; GFX10-SDAG-NEXT: v_max3_i16 v3, v3, v11, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v3
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v5, v6, v7 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v7, v7, v15
; GFX10-SDAG-NEXT: v_max_i16 v3, v3, v11
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v5, v13
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX10-SDAG-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v5
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_max_i16 v3, v4, v3 ; GFX10-SDAG-NEXT: v_max_i16 v3, v6, v3
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v8 ; GFX10-SDAG-NEXT: v_max_i16 v2, v2, v8
; GFX10-SDAG-NEXT: v_max3_i16 v2, v2, v10, v5 ; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v4, v9
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v3, v2 ; GFX10-SDAG-NEXT: v_max3_i16 v0, v0, v2, v3
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_max_i16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smax_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v16i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smax_v16i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5 ; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2 ; GFX11-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.h, v4.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.l, v6.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v1.h, v0.h, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.h, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v1.l, v1.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v5.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v2.l, v3.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.h, v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.l, v1.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_max3_i16 v0.l, v0.h, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max_i16 v0.l, v0.l, v1.l
@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smax_v16i8(<16 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v1, v1, v9
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v3, v3, v11, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v3
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v5, v6, v7 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v3, v11
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v5
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v4, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v3, v6, v3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v8 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v2, v2, v8
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v2, v2, v10, v5 ; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v4, v9
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v3, v2 ; GFX12-SDAG-FAKE16-NEXT: v_max3_i16 v0, v0, v2, v3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max_i16 v0, v0, v1
@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smax_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16: ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smax_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16: ; GFX7-SDAG-LABEL: test_vector_reduce_smax_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_max_i32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_max3_i32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_max3_i32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_max_i32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_max_i32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_max_i32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_max_i32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_max3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;

View File

@ -604,18 +604,18 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -698,15 +698,15 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX9-SDAG-LABEL: test_vector_reduce_smin_v8i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v7) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v5) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v6) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -741,20 +741,20 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v8i8: ; GFX10-SDAG-LABEL: test_vector_reduce_smin_v8i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v5
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v3 ; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v3
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 ; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v4
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -796,62 +796,62 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v8i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v8i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2 ; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
@ -906,39 +906,39 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v1.l, v5.l, v3.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v5.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v6.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v5.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v6 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 8, v5
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v2.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
@ -949,23 +949,23 @@ define i8 @test_vector_reduce_smin_v8i8(<8 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v5
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v6, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v3
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v2 ; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v4
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
@ -1025,32 +1025,32 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 8
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1165,21 +1165,21 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX9-SDAG-LABEL: test_vector_reduce_smin_v16i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX9-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX9-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_i16_sdwa v5, sext(v5), sext(v13) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v1, sext(v1), sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v7, sext(v7), sext(v15) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v3, sext(v3), sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8 ; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13
; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX9-SDAG-NEXT: v_min_i16_sdwa v4, sext(v4), sext(v12) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, sext(v0), sext(v8) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_i16 v2, v2, v10, v6 ; GFX9-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_min_i16_sdwa v6, sext(v6), sext(v14) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_i16_sdwa v2, sext(v2), sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v12
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_min3_i16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_i16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -1222,34 +1222,34 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_smin_v16i8: ; GFX10-SDAG-LABEL: test_vector_reduce_smin_v16i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v7, v7, v15 ; GFX10-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v5, v5, v13
; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_min_i16 v1, v1, v9
; GFX10-SDAG-NEXT: v_min3_i16 v3, v3, v11, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v3
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v5, v6, v7 ; GFX10-SDAG-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v7, v7, v15
; GFX10-SDAG-NEXT: v_min_i16 v3, v3, v11
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v5, v13
; GFX10-SDAG-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX10-SDAG-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX10-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v5
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_min_i16 v3, v4, v3 ; GFX10-SDAG-NEXT: v_min_i16 v3, v6, v3
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v8 ; GFX10-SDAG-NEXT: v_min_i16 v2, v2, v8
; GFX10-SDAG-NEXT: v_min3_i16 v2, v2, v10, v5 ; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v4, v9
; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v3, v2 ; GFX10-SDAG-NEXT: v_min3_i16 v0, v0, v2, v3
; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_min_i16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -1307,59 +1307,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_smin_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
@ -1368,37 +1367,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v16i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_smin_v16i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5 ; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2 ; GFX11-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
@ -1468,59 +1467,58 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v16, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v17.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v9.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v17, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v15, v2, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v4, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v3, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v14, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v15.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v1, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v18.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v7.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v0, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v11, v3, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v13.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v11.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v6.l, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.h, v4.l, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v11.l, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.l, v6.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v8, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v12, v12, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v14.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v9.l, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v6, v10, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v17, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v1.h, v0.h, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.h, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v11, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v17.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v9.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v5, v18, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v4, v9, 0, 8
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v7.l, 8, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v10.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v16.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 8, v7
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v1.l, v1.l, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v5.l, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v2.l, v3.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.h, v0.h, v2.l, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8 ; GFX12-SDAG-TRUE16-NEXT: v_bfe_i32 v2, v6, 0, 8
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.l, v1.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_min3_i16 v0.l, v0.h, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v1.l
@ -1533,37 +1531,37 @@ define i8 @test_vector_reduce_smin_v16i8(<16 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v9, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v13, v13, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v5, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v11, v11, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v15, v15, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v7, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v10, v10, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v3, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v1, v1, v9
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v3, v3, v11, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v7, v14, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v8, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v3
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v12, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v5, v6, v7 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v9, v12, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v3, v11
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v5, v8, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v4, v4, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v8, v10, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v6, v6, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v3, v14, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v2, v2, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v5
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v4, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v3, v6, v3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v8 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v2, v2, v8
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v2, v2, v10, v5 ; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v4, v9
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v3, v2 ; GFX12-SDAG-FAKE16-NEXT: v_min3_i16 v0, v0, v2, v3
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8 ; GFX12-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_min_i16 v0, v0, v1
@ -2055,18 +2053,18 @@ define i16 @test_vector_reduce_smin_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16: ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -2253,32 +2251,32 @@ define i16 @test_vector_reduce_smin_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i16: ; GFX7-SDAG-LABEL: test_vector_reduce_smin_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v9, v9, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v12, v12, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16 ; GFX7-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_bfe_i32 v11, v11, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_bfe_i32 v15, v15, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_bfe_i32 v7, v7, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_bfe_i32 v10, v10, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v14, v14, 0, 16
; GFX7-SDAG-NEXT: v_bfe_i32 v6, v6, 0, 16
; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_min_i32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_min3_i32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_min3_i32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_min_i32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_min_i32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_min_i32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_min_i32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_min3_i32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_i32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_i32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;

View File

@ -320,7 +320,7 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v4i8: ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v4i8:
@ -351,8 +351,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -387,9 +388,9 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v3 ; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 8
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -429,8 +430,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -446,8 +447,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
@ -500,8 +501,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v0.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
@ -521,8 +522,8 @@ define i8 @test_vector_reduce_umax_v4i8(<4 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v3
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v1
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
@ -572,18 +573,18 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -628,7 +629,7 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2 ; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v8i8: ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v8i8:
@ -660,17 +661,17 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX9-SDAG-LABEL: test_vector_reduce_umax_v8i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_max_u16_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v8i8: ; GFX9-GISEL-LABEL: test_vector_reduce_umax_v8i8:
@ -702,21 +703,21 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v8i8: ; GFX10-SDAG-LABEL: test_vector_reduce_umax_v8i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v6 ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v5
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4 ; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v6
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v3, v2 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v3
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v4
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -756,50 +757,49 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v8i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
@ -852,27 +852,26 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v3.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.l, v1.l, v3.h, v1.h
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v3.l, v3.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v1.h, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v3.l, 8, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 8, v3
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8: ; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v8i8:
@ -882,24 +881,24 @@ define i8 @test_vector_reduce_umax_v8i8(<8 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v4
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
@ -957,32 +956,32 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1051,9 +1050,8 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2 ; GFX8-SDAG-NEXT: v_max_u16_e32 v0, v0, v2
; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-SDAG-NEXT: v_max_u16_sdwa v1, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 8 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX8-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX8-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX8-GISEL-LABEL: test_vector_reduce_umax_v16i8: ; GFX8-GISEL-LABEL: test_vector_reduce_umax_v16i8:
@ -1093,25 +1091,24 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX9-SDAG-LABEL: test_vector_reduce_umax_v16i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 ; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-SDAG-NEXT: v_max_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX9-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7 ; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX9-SDAG-NEXT: v_max_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6 ; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12
; GFX9-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_max3_u16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX9-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_max_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX9-GISEL-LABEL: test_vector_reduce_umax_v16i8: ; GFX9-GISEL-LABEL: test_vector_reduce_umax_v16i8:
@ -1151,38 +1148,38 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_umax_v16i8: ; GFX10-SDAG-LABEL: test_vector_reduce_umax_v16i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX10-SDAG-NEXT: v_max_u16 v5, v5, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX10-SDAG-NEXT: v_max_u16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_max_u16 v6, v6, v14 ; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX10-SDAG-NEXT: v_max3_u16 v3, v3, v11, v7 ; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-SDAG-NEXT: v_max_u16 v4, v4, v12 ; GFX10-SDAG-NEXT: v_max_u16 v7, v7, v15
; GFX10-SDAG-NEXT: v_max_u16 v3, v3, v11
; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v8 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v8
; GFX10-SDAG-NEXT: v_max3_u16 v2, v2, v10, v6 ; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v13
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_max_u16 v5, v6, v14
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v2 ; GFX10-SDAG-NEXT: v_max_u16 v2, v2, v10
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v4, v12
; GFX10-SDAG-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_max3_u16 v0, v0, v2, v5
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_max_u16 v0, v2, v0 ; GFX10-SDAG-NEXT: v_max_u16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX10-GISEL-LABEL: test_vector_reduce_umax_v16i8: ; GFX10-GISEL-LABEL: test_vector_reduce_umax_v16i8:
@ -1237,84 +1234,82 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v16i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umax_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6 ; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 ; GFX11-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1382,44 +1377,42 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v10.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.h, v0.h, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v4.h, v5.l, v4.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v6.l, v6.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v3.l, v3.h, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.l, v4.l, v5.h ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v3.h, v6.h, v3.h
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v2.h, v3.l, v2.h
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v5.l, v5.h
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.l, v6.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v1.h, v2.l, v1.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v1.h, v2.l, v2.h, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v4.l, v4.h
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v1.l, v4.h, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.h, v0.h, v2.h, v3.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v3.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_max3_u16 v0.l, v0.l, v1.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_max_u16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
; ;
; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8: ; GFX12-SDAG-FAKE16-LABEL: test_vector_reduce_umax_v16i8:
@ -1429,41 +1422,41 @@ define i8 @test_vector_reduce_umax_v16i8(<16 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v5, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v6, v6, v14 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v3, v3, v11, v7 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v4, v4, v12 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v3, v3, v11
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v8
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v5, v6, v14
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v2, v2, v10
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v2, v2, v10, v6 ; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v12
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v4, v2 ; GFX12-SDAG-FAKE16-NEXT: v_max3_u16 v0, v0, v2, v5
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_max_u16 v0, v0, v1
; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] ; GFX12-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1940,18 +1933,18 @@ define i16 @test_vector_reduce_umax_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16: ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -2136,32 +2129,32 @@ define i16 @test_vector_reduce_umax_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16: ; GFX7-SDAG-LABEL: test_vector_reduce_umax_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_max_u32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_max_u32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_max_u32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_max3_u32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_max3_u32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_max_u32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_max_u32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_max_u32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_max_u32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_max3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_max3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_max_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;

View File

@ -485,18 +485,18 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -549,15 +549,15 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX9-SDAG-LABEL: test_vector_reduce_umin_v8i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX9-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX9-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_min_u16_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: v_min_u16_e32 v0, v0, v1
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -578,20 +578,20 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v8i8: ; GFX10-SDAG-LABEL: test_vector_reduce_umin_v8i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v7 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v6 ; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v5
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v4 ; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v4
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v6
; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 8
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v3, v2 ; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v3
; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-SDAG-NEXT: v_lshrrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v1 ; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v1
; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
@ -620,24 +620,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v8i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v8i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
@ -646,23 +646,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v8i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v8i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4
; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
@ -699,24 +699,24 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v6.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v3.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.h, v1.h
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.l, v1.l, v3.l, v3.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v1.h, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX12-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v4.l
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.l, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2 ; GFX12-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 8, v2
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
@ -729,23 +729,23 @@ define i8 @test_vector_reduce_umin_v8i8(<8 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v6 ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v4
; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v1
@ -787,32 +787,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i8:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -899,20 +899,20 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX9-SDAG-LABEL: test_vector_reduce_umin_v16i8:
; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11 ; GFX9-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX9-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX9-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX9-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX9-SDAG-NEXT: v_min_u16_sdwa v5, v5, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v1, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX9-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7 ; GFX9-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX9-SDAG-NEXT: v_min_u16_sdwa v4, v4, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v7, v7, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_u16_sdwa v3, v3, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v0, v0, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6 ; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v6, v6, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX9-SDAG-NEXT: v_min_u16_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12
; GFX9-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX9-SDAG-NEXT: v_min3_u16 v0, v0, v2, v6
; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1
; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 8
@ -944,32 +944,32 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX10-SDAG-LABEL: test_vector_reduce_umin_v16i8: ; GFX10-SDAG-LABEL: test_vector_reduce_umin_v16i8:
; GFX10-SDAG: ; %bb.0: ; %entry ; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX10-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX10-SDAG-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX10-SDAG-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX10-SDAG-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX10-SDAG-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX10-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9
; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX10-SDAG-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX10-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX10-SDAG-NEXT: v_min_u16 v5, v5, v13 ; GFX10-SDAG-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX10-SDAG-NEXT: v_min_u16 v1, v1, v9 ; GFX10-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX10-SDAG-NEXT: v_min_u16 v6, v6, v14 ; GFX10-SDAG-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX10-SDAG-NEXT: v_min3_u16 v3, v3, v11, v7 ; GFX10-SDAG-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-SDAG-NEXT: v_min_u16 v4, v4, v12 ; GFX10-SDAG-NEXT: v_min_u16 v7, v7, v15
; GFX10-SDAG-NEXT: v_min_u16 v3, v3, v11
; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v8 ; GFX10-SDAG-NEXT: v_min_u16 v0, v0, v8
; GFX10-SDAG-NEXT: v_min3_u16 v2, v2, v10, v6 ; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v13
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX10-SDAG-NEXT: v_min_u16 v5, v6, v14
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX10-SDAG-NEXT: v_min_u16 v2, v2, v10
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v4, v12
; GFX10-SDAG-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX10-SDAG-NEXT: v_min3_u16 v0, v0, v2, v5
; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX10-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-SDAG-NEXT: v_mov_b32_e32 v1, 8
@ -1018,34 +1018,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v16i8: ; GFX11-SDAG-TRUE16-LABEL: test_vector_reduce_umin_v16i8:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h ; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l ; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l
; GFX11-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h ; GFX11-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
@ -1061,34 +1061,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v16i8: ; GFX11-SDAG-FAKE16-LABEL: test_vector_reduce_umin_v16i8:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7 ; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8 ; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14
; GFX11-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6 ; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX11-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
@ -1147,34 +1147,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v10.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v9.l, 0xff, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v10.l, 0xff, v15.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v13.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.l, 0xff, v5.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v1.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v10.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v8.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v5.h, 0xff, v12.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.h, 0xff, v11.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.h, 0xff, v15.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.h, 0xff, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v3.l, 0xff, v3.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.h, v0.h, v9.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.h, 0xff, v12.l
; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v4.l, 0xff, v4.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v4.h, v5.l, v4.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v1.h, 0xff, v10.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v1.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v2.l, 0xff, v2.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v6.l, v6.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v7.l, 0xff, v14.l
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v3.l, v3.h, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_and_b16 v6.l, 0xff, v6.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.l, v4.l, v5.h ; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v3.h, v6.h, v3.h
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v7.l ; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v2.h, v3.l, v2.h
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v5.l, v5.h
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.l, v6.l, v7.l
; GFX12-SDAG-TRUE16-NEXT: v_min_u16 v1.h, v2.l, v1.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v1.h, v2.l, v2.h, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v4.l, v4.h
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v1.l, v4.h, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.h, v0.h, v2.h, v3.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v3.l, v1.h ; GFX12-SDAG-TRUE16-NEXT: v_min3_u16 v0.l, v0.l, v1.h, v1.l
; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_lshlrev_b16 v0.h, 8, v0.h
; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h ; GFX12-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v0.h
@ -1194,34 +1194,34 @@ define i8 @test_vector_reduce_umin_v16i8(<16 x i8> %v) {
; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v9, 0xff, v9
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v13, 0xff, v13
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v8, 0xff, v8
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v11, 0xff, v11
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v15, 0xff, v15
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v3, 0xff, v3
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v12, 0xff, v12
; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v5, v13 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v10, 0xff, v10
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v1, v1, v9 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v2, 0xff, v2
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v6, v6, v14 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v14, 0xff, v14
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v3, v3, v11, v7 ; GFX12-SDAG-FAKE16-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v4, v4, v12 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v7, v7, v15
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v3, v3, v11
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8 ; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v0, v0, v8
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v13
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v5, v6, v14
; GFX12-SDAG-FAKE16-NEXT: v_min_u16 v2, v2, v10
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v2, v2, v10, v6 ; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v12
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v5, v3 ; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v1, v1, v3, v7
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v4, v2 ; GFX12-SDAG-FAKE16-NEXT: v_min3_u16 v0, v0, v2, v5
; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1 ; GFX12-SDAG-FAKE16-NEXT: v_lshlrev_b16 v1, 8, v1
; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX12-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v1
@ -1685,18 +1685,18 @@ define i16 @test_vector_reduce_umin_v8i16(<8 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16: ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v8i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v7 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v5
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;
@ -1878,32 +1878,32 @@ define i16 @test_vector_reduce_umin_v16i16(<16 x i16> %v) {
; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16: ; GFX7-SDAG-LABEL: test_vector_reduce_umin_v16i16:
; GFX7-SDAG: ; %bb.0: ; %entry ; GFX7-SDAG: ; %bb.0: ; %entry
; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9 ; GFX7-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v13, 0xffff, v13
; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5 ; GFX7-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX7-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX7-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v12, 0xffff, v12
; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX7-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15 ; GFX7-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14 ; GFX7-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7-SDAG-NEXT: v_min_u32_e32 v4, v4, v12 ; GFX7-SDAG-NEXT: v_and_b32_e32 v15, 0xffff, v15
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8 ; GFX7-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
; GFX7-SDAG-NEXT: v_min_u32_e32 v5, v5, v13 ; GFX7-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v10
; GFX7-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7-SDAG-NEXT: v_and_b32_e32 v14, 0xffff, v14
; GFX7-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9 ; GFX7-SDAG-NEXT: v_min_u32_e32 v1, v1, v9
; GFX7-SDAG-NEXT: v_min3_u32 v2, v2, v10, v6 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v8
; GFX7-SDAG-NEXT: v_min3_u32 v3, v3, v11, v7 ; GFX7-SDAG-NEXT: v_min_u32_e32 v6, v6, v14
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v3 ; GFX7-SDAG-NEXT: v_min_u32_e32 v2, v2, v10
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v2 ; GFX7-SDAG-NEXT: v_min_u32_e32 v7, v7, v15
; GFX7-SDAG-NEXT: v_min_u32_e32 v3, v3, v11
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v4, v12
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v5, v13
; GFX7-SDAG-NEXT: v_min3_u32 v1, v1, v3, v7
; GFX7-SDAG-NEXT: v_min3_u32 v0, v0, v2, v6
; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1
; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
; ;

View File

@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65534,0,65280,32768,0] ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,u,65534,0,65280,32768,0]
; AVX-NEXT: retq ; AVX-NEXT: retq
%res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
ret <8 x i16> %res ret <8 x i16> %res

View File

@ -62,12 +62,13 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,u,65535,65535,65535,2,65535]
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65535,65535,65535,65535,65535,2,65535] ; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [65535,65535,2,65535,65535,65535,2,65535]
; AVX-NEXT: # xmm0 = mem[0,0]
; AVX-NEXT: retq ; AVX-NEXT: retq
%res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>) %res = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -65535, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 1, i16 65535>)
ret <8 x i16> %res ret <8 x i16> %res

View File

@ -62,12 +62,12 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX-LABEL: combine_constfold_undef_v8i16: ; AVX-LABEL: combine_constfold_undef_v8i16:
; AVX: # %bb.0: ; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,65282,32786,2] ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,65282,32786,2]
; AVX-NEXT: retq ; AVX-NEXT: retq
%res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>) %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> <i16 undef, i16 1, i16 undef, i16 65535, i16 -1, i16 -255, i16 -32760, i16 1>, <8 x i16> <i16 1, i16 undef, i16 undef, i16 65535, i16 1, i16 65535, i16 -10, i16 65535>)
ret <8 x i16> %res ret <8 x i16> %res

View File

@ -73,17 +73,17 @@ define <8 x i16> @combine_constfold_v8i16() {
define <8 x i16> @combine_constfold_undef_v8i16() { define <8 x i16> @combine_constfold_undef_v8i16() {
; SSE-LABEL: combine_constfold_undef_v8i16: ; SSE-LABEL: combine_constfold_undef_v8i16:
; SSE: # %bb.0: ; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; SSE-NEXT: retq ; SSE-NEXT: retq
; ;
; AVX1-LABEL: combine_constfold_undef_v8i16: ; AVX1-LABEL: combine_constfold_undef_v8i16:
; AVX1: # %bb.0: ; AVX1: # %bb.0:
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;
; AVX2-LABEL: combine_constfold_undef_v8i16: ; AVX2-LABEL: combine_constfold_undef_v8i16:
; AVX2: # %bb.0: ; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,65534,0,0,0] ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,u,0,65534,0,0,0]
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512-LABEL: combine_constfold_undef_v8i16: ; AVX512-LABEL: combine_constfold_undef_v8i16:

View File

@ -800,13 +800,13 @@ define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
; CHECK-LABEL: shift_i32_by_32: ; CHECK-LABEL: shift_i32_by_32:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl $-1, 4(%eax) ; CHECK-NEXT: movl $0, 4(%eax)
; CHECK-NEXT: movl $-1, (%eax) ; CHECK-NEXT: movl $0, (%eax)
; CHECK-NEXT: retl ; CHECK-NEXT: retl
; ;
; CHECK64-LABEL: shift_i32_by_32: ; CHECK64-LABEL: shift_i32_by_32:
; CHECK64: # %bb.0: # %entry ; CHECK64: # %bb.0: # %entry
; CHECK64-NEXT: movq $-1, (%rdx) ; CHECK64-NEXT: movq $0, (%rdx)
; CHECK64-NEXT: retq ; CHECK64-NEXT: retq
entry: entry:
%load1 = load i8, ptr %src1, align 1 %load1 = load i8, ptr %src1, align 1

View File

@ -7,12 +7,10 @@
define void @PR33960() { define void @PR33960() {
; X86-LABEL: PR33960: ; X86-LABEL: PR33960:
; X86: # %bb.0: # %entry ; X86: # %bb.0: # %entry
; X86-NEXT: movl $-1, b
; X86-NEXT: retl ; X86-NEXT: retl
; ;
; X64-LABEL: PR33960: ; X64-LABEL: PR33960:
; X64: # %bb.0: # %entry ; X64: # %bb.0: # %entry
; X64-NEXT: movl $-1, b(%rip)
; X64-NEXT: retq ; X64-NEXT: retq
entry: entry:
%tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3 %tmp = insertelement <4 x i32> <i32 undef, i32 -7, i32 -3, i32 undef>, i32 -2, i32 3

View File

@ -42,6 +42,7 @@ add_llvm_unittest(CodeGenTests
ScalableVectorMVTsTest.cpp ScalableVectorMVTsTest.cpp
SchedBoundary.cpp SchedBoundary.cpp
SelectionDAGAddressAnalysisTest.cpp SelectionDAGAddressAnalysisTest.cpp
SelectionDAGNodeConstructionTest.cpp
SelectionDAGPatternMatchTest.cpp SelectionDAGPatternMatchTest.cpp
TypeTraitsTest.cpp TypeTraitsTest.cpp
TargetOptionsTest.cpp TargetOptionsTest.cpp

View File

@ -7,103 +7,12 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "SelectionDAGTestBase.h"
#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "gtest/gtest.h"
namespace llvm { namespace llvm {
class SelectionDAGAddressAnalysisTest : public testing::Test { class SelectionDAGAddressAnalysisTest : public SelectionDAGTestBase {};
protected:
static void SetUpTestCase() {
InitializeAllTargets();
InitializeAllTargetMCs();
}
void SetUp() override {
StringRef Assembly = "@g = global i32 0\n"
"@g_alias = alias i32, i32* @g\n"
"define i32 @f() {\n"
" %1 = load i32, i32* @g\n"
" ret i32 %1\n"
"}";
Triple TargetTriple("aarch64--");
std::string Error;
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
// FIXME: These tests do not depend on AArch64 specifically, but we have to
// initialize a target. A skeleton Target for unittests would allow us to
// always run these tests.
if (!T)
GTEST_SKIP();
TargetOptions Options;
TM = std::unique_ptr<TargetMachine>(
T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt,
std::nullopt, CodeGenOptLevel::Aggressive));
if (!TM)
GTEST_SKIP();
SMDiagnostic SMError;
M = parseAssemblyString(Assembly, SMError, Context);
if (!M)
report_fatal_error(SMError.getMessage());
M->setDataLayout(TM->createDataLayout());
F = M->getFunction("f");
if (!F)
report_fatal_error("F?");
G = M->getGlobalVariable("g");
if (!G)
report_fatal_error("G?");
AliasedG = M->getNamedAlias("g_alias");
if (!AliasedG)
report_fatal_error("AliasedG?");
MachineModuleInfo MMI(TM.get());
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
MMI.getContext(), 0);
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
if (!DAG)
report_fatal_error("DAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr, TTI.hasBranchDivergence(F));
}
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
}
EVT getTypeToTransformTo(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
}
LLVMContext Context;
std::unique_ptr<TargetMachine> TM;
std::unique_ptr<Module> M;
Function *F;
GlobalVariable *G;
GlobalAlias *AliasedG;
std::unique_ptr<MachineFunction> MF;
std::unique_ptr<SelectionDAG> DAG;
};
TEST_F(SelectionDAGAddressAnalysisTest, sameFrameObject) { TEST_F(SelectionDAGAddressAnalysisTest, sameFrameObject) {
SDLoc Loc; SDLoc Loc;

View File

@ -0,0 +1,317 @@
//===---- llvm/unittest/CodeGen/SelectionDAGPatternMatchTest.cpp ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SelectionDAGTestBase.h"
using namespace llvm;
class SelectionDAGNodeConstructionTest : public SelectionDAGTestBase {};
TEST_F(SelectionDAGNodeConstructionTest, ADD) {
SDLoc DL;
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Op, Undef), Undef);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Op), Undef);
EXPECT_EQ(DAG->getNode(ISD::ADD, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, AND) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Op, Undef), Zero);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::AND, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, MUL) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Op, Undef), Zero);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::MUL, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, OR) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Op, Undef), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Op), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::OR, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, SADDSAT) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Op, Undef), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Op), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::SADDSAT, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, SDIV) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Op, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::SDIV, DL, MVT::i32, Undef, Undef), Poison);
}
TEST_F(SelectionDAGNodeConstructionTest, SMAX) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue MaxInt = DAG->getConstant(APInt::getSignedMaxValue(32), DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Op, Undef), MaxInt);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Op), MaxInt);
EXPECT_EQ(DAG->getNode(ISD::SMAX, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, SMIN) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue MinInt = DAG->getConstant(APInt::getSignedMinValue(32), DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Op, Undef), MinInt);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Op), MinInt);
EXPECT_EQ(DAG->getNode(ISD::SMIN, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, SREM) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Op, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::SREM, DL, MVT::i32, Undef, Undef), Poison);
}
TEST_F(SelectionDAGNodeConstructionTest, SSUBSAT) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Op, Undef), Zero);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::SSUBSAT, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, SUB) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Op, Undef), Undef);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Op), Undef);
EXPECT_EQ(DAG->getNode(ISD::SUB, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, UADDSAT) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Op, Undef), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Op), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::UADDSAT, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, UDIV) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Op, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::UDIV, DL, MVT::i32, Undef, Undef), Poison);
}
TEST_F(SelectionDAGNodeConstructionTest, UMAX) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue AllOnes = DAG->getAllOnesConstant(DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Op, Undef), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Op), AllOnes);
EXPECT_EQ(DAG->getNode(ISD::UMAX, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, UMIN) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Op, Undef), Zero);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::UMIN, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, UREM) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Op, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::UREM, DL, MVT::i32, Undef, Undef), Poison);
}
TEST_F(SelectionDAGNodeConstructionTest, USUBSAT) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Op, Undef), Zero);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Op), Zero);
EXPECT_EQ(DAG->getNode(ISD::USUBSAT, DL, MVT::i32, Undef, Undef), Undef);
}
TEST_F(SelectionDAGNodeConstructionTest, XOR) {
SDLoc DL;
SDValue Op = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, MVT::i32);
SDValue Poison = DAG->getPOISON(MVT::i32);
SDValue Undef = DAG->getUNDEF(MVT::i32);
SDValue Zero = DAG->getConstant(0, DL, MVT::i32);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Op), Poison);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Poison, Undef), Poison);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Poison), Poison);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Op, Undef), Undef);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Op), Undef);
EXPECT_EQ(DAG->getNode(ISD::XOR, DL, MVT::i32, Undef, Undef), Zero);
}

View File

@ -6,102 +6,12 @@
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "SelectionDAGTestBase.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "gtest/gtest.h"
using namespace llvm; using namespace llvm;
class SelectionDAGPatternMatchTest : public testing::Test { class SelectionDAGPatternMatchTest : public SelectionDAGTestBase {};
protected:
static void SetUpTestCase() {
InitializeAllTargets();
InitializeAllTargetMCs();
}
void SetUp() override {
StringRef Assembly = "@g = global i32 0\n"
"@g_alias = alias i32, i32* @g\n"
"define i32 @f() {\n"
" %1 = load i32, i32* @g\n"
" ret i32 %1\n"
"}";
Triple TargetTriple("riscv64--");
std::string Error;
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
// FIXME: These tests do not depend on RISCV specifically, but we have to
// initialize a target. A skeleton Target for unittests would allow us to
// always run these tests.
if (!T)
GTEST_SKIP();
TargetOptions Options;
TM = std::unique_ptr<TargetMachine>(T->createTargetMachine(
TargetTriple, "", "+m,+f,+d,+v", Options, std::nullopt, std::nullopt,
CodeGenOptLevel::Aggressive));
if (!TM)
GTEST_SKIP();
SMDiagnostic SMError;
M = parseAssemblyString(Assembly, SMError, Context);
if (!M)
report_fatal_error(SMError.getMessage());
M->setDataLayout(TM->createDataLayout());
F = M->getFunction("f");
if (!F)
report_fatal_error("F?");
G = M->getGlobalVariable("g");
if (!G)
report_fatal_error("G?");
AliasedG = M->getNamedAlias("g_alias");
if (!AliasedG)
report_fatal_error("AliasedG?");
MachineModuleInfo MMI(TM.get());
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
MMI.getContext(), 0);
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
if (!DAG)
report_fatal_error("DAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr, TTI.hasBranchDivergence(F));
}
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
}
EVT getTypeToTransformTo(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
}
LLVMContext Context;
std::unique_ptr<TargetMachine> TM;
std::unique_ptr<Module> M;
Function *F;
GlobalVariable *G;
GlobalAlias *AliasedG;
std::unique_ptr<MachineFunction> MF;
std::unique_ptr<SelectionDAG> DAG;
};
TEST_F(SelectionDAGPatternMatchTest, matchValueType) { TEST_F(SelectionDAGPatternMatchTest, matchValueType) {
SDLoc DL; SDLoc DL;

View File

@ -0,0 +1,99 @@
//===---- llvm/unittest/CodeGen/SelectionDAGTestBase.h --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
#include "gtest/gtest.h"
using namespace llvm;
class SelectionDAGTestBase : public testing::Test {
protected:
static void SetUpTestCase() {
InitializeAllTargets();
InitializeAllTargetMCs();
}
void SetUp() override {
StringRef Assembly = "@g = global i32 0\n"
"@g_alias = alias i32, i32* @g\n"
"define i32 @f() {\n"
" %1 = load i32, i32* @g\n"
" ret i32 %1\n"
"}";
Triple TargetTriple("aarch64--");
std::string Error;
const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error);
// FIXME: These tests do not depend on AArch64 specifically, but we have to
// initialize a target. A skeleton Target for unittests would allow us to
// always run these tests.
if (!T)
GTEST_SKIP();
TargetOptions Options;
TM = std::unique_ptr<TargetMachine>(
T->createTargetMachine(TargetTriple, "", "+sve", Options, std::nullopt,
std::nullopt, CodeGenOptLevel::Aggressive));
if (!TM)
GTEST_SKIP();
SMDiagnostic SMError;
M = parseAssemblyString(Assembly, SMError, Context);
ASSERT_TRUE(M && "Could not parse module!");
M->setDataLayout(TM->createDataLayout());
F = M->getFunction("f");
ASSERT_TRUE(F && "Could not get function f!");
G = M->getGlobalVariable("g");
ASSERT_TRUE(G && "Could not get global g!");
AliasedG = M->getNamedAlias("g_alias");
ASSERT_TRUE(AliasedG && "Could not get alias g_alias!");
MachineModuleInfo MMI(TM.get());
MF = std::make_unique<MachineFunction>(*F, *TM, *TM->getSubtargetImpl(*F),
MMI.getContext(), 0);
DAG = std::make_unique<SelectionDAG>(*TM, CodeGenOptLevel::None);
if (!DAG)
reportFatalUsageError("Failed to create SelectionDAG?");
OptimizationRemarkEmitter ORE(F);
FunctionAnalysisManager FAM;
FAM.registerPass([&] { return TM->getTargetIRAnalysis(); });
TargetTransformInfo TTI = TM->getTargetIRAnalysis().run(*F, FAM);
DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr, MMI,
nullptr, TTI.hasBranchDivergence(F));
}
TargetLoweringBase::LegalizeTypeAction getTypeAction(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeAction(Context, VT);
}
EVT getTypeToTransformTo(EVT VT) {
return DAG->getTargetLoweringInfo().getTypeToTransformTo(Context, VT);
}
LLVMContext Context;
std::unique_ptr<TargetMachine> TM;
std::unique_ptr<Module> M;
Function *F;
GlobalVariable *G;
GlobalAlias *AliasedG;
std::unique_ptr<MachineFunction> MF;
std::unique_ptr<SelectionDAG> DAG;
};